Below is an example of your first Map, Reduce and Data Sample.
Let’s look at the Mapper.py file:
import sys from numpy import mat, mean, power #read input folder line by line def read_input(file): for line in file: #returns file input with training char removed (same as Trim()) yield line.rstrip() #creates a list of input lines input = read_input(sys.stdin) #cast to floats input = [float(line) for line in input] #determine number of inputs numInputs = len(input) #convert list to matrix input = mat(input) #Form a vector of squares sqInput = power(input,2) #calculate output size, mean and mean(square values) print numInputs, mean(input), mean(sqInput) #calc mean of columns print >> sys.stderr, "report: still alive" #pass squared values to the reducer if __name__ == '__main__': pass
Now for the Reducer.py. So 3 elements per lines are passed to the Reducer from the Mapper:
numInputs, mean(input), mean(sqInput)
import sys from numpy import mat, mean, power def read_input(file): for line in file: yield line.rstrip() #creates a list of input lines from mapper input = read_input(sys.stdin) #split the 3 input into separate items and store in list of lists mapperOut = [instance.split() for instance in input] #assign total number of samples (cumN), overall sum(cumVal) and overall sum sq (cumSumSq) to 0 cumVal=0.0 cumSumSq=0.0 cumN=0.0 for instance in mapperOut: #for each item in the list cast to float nj = float(instance[0]) #increase cumN with item value cumN = cumN + nj #multiply instance[0] with instance[1] and instance [2] with CumVal and cumSumSq cumVal = cumVal + nj*float(instance[1]) cumSumSq = cumSumSq + nj*float(instance[2]) #calculate means mean = cumVal/cumN #calculate means squared meanSq = cumSumSq/cumN #output size, mean, mean(square values) print cumN, mean, meanSq print >> sys.stderr, "report: still alive" if __name__ == '__main__': pass
See the sample dataset:
0.865670009848 0.240464946103 0.38583753445 0.851896046359 0.56613365811 0.901353547484 0.47530934886 0.903698474043 0.690057722624 0.549349071622 0.374166366825 0.63335531551 0.607434274558 0.1626603772