Map and Reduce Python Script Example
Below is an example of your first Map, Reduce and Data Sample.
Let’s look at the Mapper.py file:
import sys from numpy import mat, mean, power #read input folder line by line def read_input(file): for line in file: #returns file input with training char removed (same as Trim()) yield line.rstrip() #creates a list of input lines input = read_input(sys.stdin) #cast to floats input = [float(line) for line in input] #determine number of inputs numInputs = len(input) #convert list to matrix input = mat(input) #Form a vector of squares sqInput = power(input,2) #calculate output size, mean and mean(square values) print numInputs, mean(input), mean(sqInput) #calc mean of columns print >> sys.stderr, "report: still alive" #pass squared values to the reducer if __name__ == '__main__': pass
Now for the Reducer.py. So 3 elements per lines are passed to the Reducer from the Mapper:
numInputs, mean(input), mean(sqInput)
import sys
from numpy import mat, mean, power
def read_input(file):
for line in file:
yield line.rstrip()
#creates a list of input lines from mapper
input = read_input(sys.stdin)
#split the 3 input into separate items and store in list of lists
mapperOut = [instance.split() for instance in input]
#assign total number of samples (cumN), overall sum(cumVal) and overall sum sq (cumSumSq) to 0
cumVal=0.0
cumSumSq=0.0
cumN=0.0
for instance in mapperOut:
#for each item in the list cast to float
nj = float(instance[0])
#increase cumN with item value
cumN = cumN + nj
#multiply instance[0] with instance[1] and instance [2] with CumVal and cumSumSq
cumVal = cumVal + nj*float(instance[1])
cumSumSq = cumSumSq + nj*float(instance[2])
#calculate means
mean = cumVal/cumN
#calculate means squared
meanSq = cumSumSq/cumN
#output size, mean, mean(square values)
print cumN, mean, meanSq
print >> sys.stderr, "report: still alive"
if __name__ == '__main__':
pass
See the sample dataset:
0.865670009848 0.240464946103 0.38583753445 0.851896046359 0.56613365811 0.901353547484 0.47530934886 0.903698474043 0.690057722624 0.549349071622 0.374166366825 0.63335531551 0.607434274558 0.1626603772
Advertisement
Posted on January 27, 2012, in Amazon EC2 and tagged mapper, python, reducer, sample. Bookmark the permalink. Leave a Comment.


Leave a Comment
Comments (0)