统计书中单词出现次数, 然后把数据生成pdf

3/8/2017来源:ASP.NET技巧人气:1699

#/usr/bin/env python # *-*coding:utf-8 *-* #运行环境python3用Counter统计书中单词的使用次数 import sys from collections import Counter #参照python基础教程使用lines 和blocks生成文本块 # def lines(filee): for line in filee: yield line yield '\n' def blocks(filee): block = [] for line in lines(filee): if line.strip(): block.append(line) elif block: yield ''.join(block).strip() block = [] #old_add_new将初始字典的单词计数键值和每个块counter返回的字典相加得到新的键值 def old_add_new(new_dict, old_dict): for key in new_dict: try: new_value = new_dict[key]+old_dict[key] old_dict[key] = new_value except KeyError: old_dict[key] =  new_dict[key] return old_dict #初始字典 Total = {} #处理文件输出 def handler(filee): for block in blocks(filee): block = block.split() counter = Counter(block) old_add_new(counter, Total) for Word, number in (Counter(Total).most_common(100)): PRint(word, number) handler(sys.stdin)

#命令行输入 $: python counter.py <input-text.txt> out-text.txt

"""-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------""" #把统计的数据生成pdf

#/usr/bin/env python # *-*coding:utf-8 *-* """ 把书中单词数据制作成pdf """ from reportlab.lib.styles import getSampleStyleSheet from reportlab.platypus import * from reportlab.lib import colors #对象容器 elements = [] #列表样式 styles = getSampleStyleSheet() #设置文件标题 doc_title = SimpleDocTemplate('Words Times') #添加表格的标题文字, 风格为Title elements.append(Paragraph('Number of times the word appears in the book', styles['Title'])) #创建二维数据表格 filepath = '~/janeoutput.txt' data = [['BookName', 'Jane Eyre']] with open(filepath) as files: for line in files: data.append(line.split()) data.append(['End!', 'End!']) #print(data) #设置表格风格 字体对齐方式 网格以及划线 ts = [('ALIGN', (1,1), (-1,-1), 'CENTER'),       ('LINEABOVE', (0,0), (-1,0), 1, colors.yellow),       ('LINEBELOW', (0,0), (-1,0), 1, colors.green),       ('FONT', (0,0), (-1,0), 'Times-Bold'), #下三行设置  ('LINEABOVE', (0,-1), (-1,-1), 1, colors.blue),      ('LINEBELOW', (0,-1), (-1,-1), 0.5, colors.black, 1, None, None, 4,1),      ('LINEBELOW', (0,-1), (-1,-1), 1, colors.black),      ('FONT', (0,-1), (-1,-1), 'Times-Bold')] #将数据和风格添加到Table中创建Table对象, 将Table对象加入到elements中 table = Table(data, style= ts) elements.append(table) #创建图像 doc_title.build(elements)