Blog Archives
Map and Reduce Python Script Example
Below is an example of your first Map, Reduce and Data Sample.
Let’s look at the Mapper.py file:
import sys from numpy import mat, mean, power #read input folder line by line def read_input(file): for line in file: #returns file input with training char removed (same as Trim()) yield line.rstrip() #creates a list of input lines input = read_input(sys.stdin) #cast to floats input = [float(line) for line in input] #determine number of inputs numInputs = len(input) #convert list to matrix input = mat(input) #Form a vector of squares sqInput = power(input,2) #calculate output size, mean and mean(square values) print numInputs, mean(input), mean(sqInput) #calc mean of columns print >> sys.stderr, "report: still alive" #pass squared values to the reducer if __name__ == '__main__': pass
Now for the Reducer.py. So 3 elements per lines are passed to the Reducer from the Mapper:
numInputs, mean(input), mean(sqInput)
import sys
from numpy import mat, mean, power
def read_input(file):
for line in file:
yield line.rstrip()
#creates a list of input lines from mapper
input = read_input(sys.stdin)
#split the 3 input into separate items and store in list of lists
mapperOut = [instance.split() for instance in input]
#assign total number of samples (cumN), overall sum(cumVal) and overall sum sq (cumSumSq) to 0
cumVal=0.0
cumSumSq=0.0
cumN=0.0
for instance in mapperOut:
#for each item in the list cast to float
nj = float(instance[0])
#increase cumN with item value
cumN = cumN + nj
#multiply instance[0] with instance[1] and instance [2] with CumVal and cumSumSq
cumVal = cumVal + nj*float(instance[1])
cumSumSq = cumSumSq + nj*float(instance[2])
#calculate means
mean = cumVal/cumN
#calculate means squared
meanSq = cumSumSq/cumN
#output size, mean, mean(square values)
print cumN, mean, meanSq
print >> sys.stderr, "report: still alive"
if __name__ == '__main__':
pass
See the sample dataset:
0.865670009848 0.240464946103 0.38583753445 0.851896046359 0.56613365811 0.901353547484 0.47530934886 0.903698474043 0.690057722624 0.549349071622 0.374166366825 0.63335531551 0.607434274558 0.1626603772

