看代码有时候电脑不方便,有个kindle吃灰在,整理了下 有些学习资料里面的代码,没事可以读读看。
工具部分源码来源于网络,经过自己整理适合自己用,相信有些功能大家可能也会用的到。里面具体使用方法我会说明下,还有问题大家可以联系我
文章包含,介绍按可能使用的顺序
- 一 去重相同的代码文件的工具
- 二 ansi编码转换为utf8编码工具
- 三 code转换代码为pdf工具
- 四 code转换为word工具
一 去重工具代码,因为用的少,代码没有时间多做整理 ,用的时候大家自己调整 代码中部分代码以适应自己想需要,能用就行
-
这个去重判定的是文件md5码,使用场景,很多相同文件 分散在不同目录,直接做code2pdf或者code2word 重复的太多了,没法看。如果你的code 没有这样的问题不需要执行这个。
-
指定要处理的目录 root_path = r".\aa" 例如
-
suffix_list 定义的部分 大家需要看那种代码 就把那种代码后缀放后面
-
os.system("rm " +fileMap[fileMap]) 部分,大家需要就根据自己系统调整活着写成 os.remove(fileMap[fileMap])谨慎使用默认关着。 我这里是打印保存出来自己确认下,然后手工执行
print("\nrm " +fileMap[xmd5])
import hashlib
import os
# 指定根目录
root_path = r"e:\aa"
# 指定排除目录
exclude_path_list = [r"./build", ]
# 指定需要更改的文件后缀
suffix_list = [r".cpp", r".c", r".java", ]
suffix_list = [r".dart", ]
count = 0
fileMap={}
def check_file_suffix(file, suffix_list):
for p in suffix_list:
if file.endswith(p):
return True
return False
def check_path_exclude(path, exclude_path_list):
for p in exclude_path_list:
if path.startswith(p):
return True
return False
def GetFileMd5(filename):
if not os.path.isfile(filename):
return
myhash = hashlib.md5()
f = open(filename,'rb')
while True:
b = f.read(8096)
if not b :
break
myhash.update(b)
f.close()
return myhash.hexdigest()
def main():
global count
for path, subdirs, files in os.walk(root_path):
if check_path_exclude(path, exclude_path_list):
continue
for name in files:
file = os.path.join(path, name)
if check_file_suffix(file, suffix_list):
xmd5 = (GetFileMd5(file))
count += 1
if xmd5 in fileMap.keys():
#os.system("rm " +fileMap[fileMap])
print("\nrm " +fileMap[xmd5])
else:
pass
fileMap[xmd5] = file
if __name__ == '__main__':
main()
执行结果如下
确认屏幕输出 然后保存为批处理文件,或者复制了输出,直接打开控制台粘贴回车即可删除里面重复的内容了。
二 ansi编码转换为utf8编码工具
有时候遇到有些code源文件的编码不是utf8的,直接转换出来是乱码的,如果有需要您可以使用这个工具,这个跟上一个大家结构差不多。设置配置部分也是雷同,参考上面的即可设置适合你要求的
代码如下
import os
import chardet
# 指定根目录
root_path = r"./dp"
# 指定排除目录
exclude_path_list = [r"./build", ]
# 指定需要更改的文件后缀
suffix_list = [r".cpp", r".c", r".java", r".dart", ]
count = 0
def check_file_suffix(file, suffix_list):
for p in suffix_list:
if file.endswith(p):
return True
return False
def check_path_exclude(path, exclude_path_list):
for p in exclude_path_list:
if path.startswith(p):
return True
return False
def main():
global count
for path, subdirs, files in os.walk(root_path):
if check_path_exclude(path, exclude_path_list):
continue
for name in files:
file = os.path.join(path, name)
if check_file_suffix(file, suffix_list):
print(file)
with open(file, 'rb') as f:
c = chardet.detect(f.read())
s = open(file, mode='r', encoding=c['encoding']).read() # UTF-8 with BOM
open(file, mode='w', encoding='utf-8').write(s) # UTF-8 without BOM
count += 1
print("共", count, "个文件,转换完毕")
if __name__ == '__main__':
main()
执行结果如下
三 code转换代码为pdf工具
代码没有上面的的问题 想转成pdf的可以参考此工具
- 参数设置 -dst "./aa" -out codeaa.pdf dart
- 其中 ./aa 为代码目录, codeaa.pdf 为生成文件 dart是代码后缀
import argparse
import os
import sys
from glob import glob
import html
from xhtml2pdf import pisa # import python module
# Lines will be broken if over specified char count
BREAK_LIMIT = 110
output_filename = "out.pdf"
# Define your data
MAIN_TEMPLATE = """
<html>
<style>
body{
font-family:STSong-Light;
}
.code {
border: 1px solid grey;
padding: 1px;
overflow: hide;
font-family:STSong-Light;
}
pre {
font-size:12px;
font-family:STSong-Light;
}
</style>
<body>
<h1>Code Listing</h1>
%%%code%%%
</body>
</html>"""
CODE_TEMPLATE = """
<hr><div>
<h1>%%%name%%%</h1>
<div class="code">
<pre>%%%snippet%%%</pre>
</div>
</div>"""
# ACCEPTED_EXTENSIONS
def convert_html_to_pdf(source_html, output_filename):
result_file = open(output_filename, "w+b")
pisa_status = pisa.CreatePDF(
source_html, # the HTML to convert
dest=result_file) # file handle to recieve result
result_file.close() # close output file
return pisa_status.err
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description='Convert files in destination into a pdf')
parser.add_argument('-dst',
help='folder containing your source files',
required=True)
parser.add_argument('-out',
help='name of pdf to output',
default="out.pdf")
parser.add_argument('ext',
help='list of file extensions to parse',
nargs="*")
args = parser.parse_args()
if args.dst[-1] != "/": args.dst += "/"
files_grabbed = []
for type in args.ext:
files_grabbed.extend(glob(args.dst + "**/*." + type, recursive=True))
source_html = MAIN_TEMPLATE
code_html = ""
for file in files_grabbed:
with open(file, 'r',encoding='UTF-8') as f:
lines = f.readlines()
contents = ""
for line in lines:
if len(line) > BREAK_LIMIT:
contents += line[:BREAK_LIMIT] + "\n" + line[BREAK_LIMIT:]
else:
contents += line
contents = contents.replace(" ", " ")
contents = contents.replace("\t", " ")
code_html += CODE_TEMPLATE.replace(
"%%%name%%%",
file.replace(args.dst, "")).replace("%%%snippet%%%",
html.escape(contents))
pisa.showLogging()
convert_html_to_pdf(source_html.replace("%%%code%%%", code_html), args.out)
- 执行结果
4.生成文件
5.存在的问题和缺陷
生产的文件,电脑什么的看是没什么问题了,可能你的电子书不一定适合里面的编码方式还是呈现的乱码,解决的办法有以下几个
- pdf编辑的工具打开,另存为图像格式的,缺点文件较大
- 专业的pdf工具打开,设置一些编码方面的打开,具体自行查询
- pdf编辑工具打开,ctrl a 然后ctrl c ,新建word文档 ctrl v 保存后,邮件发送到kindle。
四 code转换为word工具
如果不喜欢pdf格式的 可以直接转换到word文档,可以使用下面工具设置部分如下
-
命令参数 -s .\dp* -m "deep" -t e:\testjava.docx -i "java|yml"
-
.\dp* 代码路径,设置为你想要的
-
e:\testjava.docx 生成的文件
-
"java|yml" 代码的后缀
import os, sys, getopt
import codecs
from docx import Document
doc = Document()
errorlist = []
def convert(dir, mode='flat', title=None, include=None, exclude=None, encoding='utf-8'):
print('copy from diretory: ' + dir)
if title is not None:
doc.add_heading(title, 1)
if include is not None:
inc = include.split('|')
else:
inc = None
if exclude is not None:
exc = exclude.split('|')
else:
exc = None
if mode == 'flat':
walkflat(dir, inc, exc, encoding)
elif mode == 'deep':
walkdeep(dir, 2, inc, exc, encoding)
else:
print('mode is invaild')
def walkflat(dir, inc, exc, encoding):
currentdir = ''
for root, dirs, files in os.walk(dir, False):
for file in files:
if file == 'pom.xml':
print(1)
if (inc is None or os.path.splitext(file)[1][1:] in inc) and (
exc is None or os.path.splitext(file)[1][1:] not in exc):
filepath = os.path.join(root, file).replace('\', '/')
try:
with codecs.open(filepath, encoding=encoding) as f:
content = f.read()
thisdir = filepath[len(dir) + 1:filepath.rfind('/')]
if currentdir != thisdir:
currentdir = thisdir
doc.add_heading(thisdir, 2)
print('into directory ' + thisdir)
doc.add_heading(filepath[filepath.rfind('/') + 1:], 3)
doc.add_paragraph(content)
doc.add_page_break()
print('copied ' + filepath[filepath.rfind('/') + 1:])
except Exception as e:
errorlist.append(filepath)
print('read ' + filepath + ' error')
print(str(e))
def walkdeep(root, level, inc, exc, encoding):
for file in os.listdir(root):
filepath = os.path.join(root, file).replace('\', '/')
if os.path.isfile(filepath):
if (inc is None or os.path.splitext(file)[1][1:] in inc) and (
exc is None or os.path.splitext(file)[1][1:] not in exc):
try:
with codecs.open(filepath, encoding=encoding) as f:
content = f.read()
doc.add_heading(filepath[filepath.rfind('/') + 1:], level)
doc.add_paragraph(content)
doc.add_page_break()
print('copied ' + filepath[filepath.rfind('/') + 1:])
except Exception as e:
errorlist.append(filepath)
print('read ' + filepath + ' error')
print(str(e))
else:
if level<9:
doc.add_heading(file, level)
print('into directory ' + file)
walkdeep(filepath, level + 1, inc, exc, encoding)
if __name__ == '__main__':
src = None
mode = 'flat'
target = None
include = None
exclude = None
encoding = 'utf-8'
myhelp = 'run.py -s <source directory path> -m 'flat|deep' -t <target docx file path>\
-i <include extension of scanned files> -e <exclude extension of scanned files>\
-c <encoding of the files>'
argv = sys.argv[1:]
try:
opts, args = getopt.getopt(argv,'hs:m:t:i:e:c:',['source=','mode=','target=','include=','exclude=','encoding='])
except expression as identifier:
print(myhelp)
sys.exit(2)
for opt, arg in opts:
if opt == '-h':
print(myhelp)
sys.exit(2)
elif opt in ('-s','--source'):
src = arg
elif opt in ('-m','--mode'):
mode = arg
elif opt in ('-t','--target'):
target = arg
elif opt in ('-i','--include'):
include = arg
elif opt in ('-e','--exclude'):
exclude = arg
elif opt in ('-c','--encoding'):
encoding = arg
if src is None or target is None:
print('source and target is needed')
sys.exit(2)
pos = src.find('*')
if pos == -1:
convert(src, mode=mode, include=include, exclude=exclude, encoding=encoding)
else:
presrc = src[0:pos]
dirs = os.listdir(presrc)
for dir in dirs:
convert(presrc + dir + src[pos+1:], mode=mode, title=dir, include=include, exclude=exclude, encoding=encoding)
doc.save(target)
print('\nfinish copying, your document is saved into "'+target+'" , thanks for your using!')
if len(errorlist) != 0:
print('\nerror file list:\n')
for e in errorlist:
print(e)
- 执行结果
6. 生成文件截图示例
7.缺点是目前适用10级目录以内的文件目录
以上程序运行环境为windows10 pycharm 社区版
以上是针对懂点python的同学,如果有需要pc版本的,可以私信我 如果需要的人多,可能可以考虑弄个玩玩。