Map and Reduce Python Script Example

Below is an example of your first Map, Reduce and Data Sample.

Let’s look at the Mapper.py file:

import sys
from numpy import mat, mean, power
#read input folder line by line
def read_input(file):
for line in file:

#returns file input with training char removed (same as Trim())
yield line.rstrip()
#creates a list of input lines
input = read_input(sys.stdin)

#cast to floats
input = [float(line) for line in input]

#determine number of inputs
numInputs = len(input)

#convert list to matrix
input = mat(input)

#Form a vector of squares
sqInput = power(input,2)

#calculate output size, mean and mean(square values)
print numInputs, mean(input), mean(sqInput)

#calc mean of columns
print >> sys.stderr, "report: still alive"

#pass squared values to the reducer
if __name__ == '__main__':
pass

Now for the Reducer.py. So 3 elements per lines are passed to the Reducer from the Mapper:
numInputs, mean(input), mean(sqInput)

import sys
from numpy import mat, mean, power

def read_input(file):
    for line in file:
        yield line.rstrip()

#creates a list of input lines from mapper
input = read_input(sys.stdin)

#split the 3 input into separate items and store in list of lists

mapperOut = [instance.split() for instance in input]

#assign total number of samples (cumN), overall sum(cumVal) and overall sum sq (cumSumSq) to 0
cumVal=0.0
cumSumSq=0.0
cumN=0.0

for instance in mapperOut:
#for each item in the list cast to float
    nj = float(instance[0])
#increase cumN with item value
    cumN = cumN + nj
#multiply instance[0] with instance[1] and instance [2] with CumVal and cumSumSq
    cumVal = cumVal + nj*float(instance[1])
    cumSumSq = cumSumSq + nj*float(instance[2])

#calculate means
mean = cumVal/cumN
#calculate means squared
meanSq = cumSumSq/cumN

#output size, mean, mean(square values)
print cumN, mean, meanSq
print >> sys.stderr, "report: still alive" 

if __name__ == '__main__':
    pass

See the sample dataset:

0.865670009848
0.240464946103
0.38583753445
0.851896046359
0.56613365811
0.901353547484
0.47530934886
0.903698474043
0.690057722624
0.549349071622
0.374166366825
0.63335531551
0.607434274558
0.1626603772
Advertisements

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s

%d bloggers like this: