python利用M/R实现wordcount
Map阶段
#!/usr/bin/python import sys for line in sys.stdin: line = line.strip() words = line.split() for word in words: print("{} {}".format(word,1))
Reduce阶段
#!/usr/bin/python
import sys current_word = None current_count = 0
for line in sys.stdin: line = line.strip() word, count = line.split('\t', 1) try: count = int(count) except ValueError: #count如果不是数字的话,直接忽略掉 continue if current_word == word: current_count += count else: if current_word: print("{} {}".format(current_word, current_count)) current_count = count current_word = word if word == current_word: #不要忘记最后的输出
print("{} {}".format(current_word, current_count))

浙公网安备 33010602011771号