Part I:词频统计并返回topN
统计的文本数据:
what do you do
how do you do
how do you do
how are you
from operator import add
from pyspark import SparkContext
def sort_t():
sc = SparkContext(a...
#瓦登尔湖词频统计:
import string
path = 'D:/python3/Walden.txt'
with open(path,'r',encoding= 'utf-8') as text:
words = [raw_word.strip(string.punctuation).lower() for raw_word in text.read().s...