看代码有时候电脑不方便，有个kindle吃灰在，整理了下有些学习资料里面的代码，没事可以读读看。

工具部分源码来源于网络，经过自己整理适合自己用，相信有些功能大家可能也会用的到。里面具体使用方法我会说明下，还有问题大家可以联系我

文章包含，介绍按可能使用的顺序

一去重相同的代码文件的工具
二 ansi编码转换为utf8编码工具
三 code转换代码为pdf工具
四 code转换为word工具

一去重工具代码，因为用的少，代码没有时间多做整理，用的时候大家自己调整代码中部分代码以适应自己想需要，能用就行

这个去重判定的是文件md5码，使用场景，很多相同文件分散在不同目录，直接做code2pdf或者code2word 重复的太多了，没法看。如果你的code 没有这样的问题不需要执行这个。
指定要处理的目录 root_path = r".\aa" 例如
suffix_list 定义的部分大家需要看那种代码就把那种代码后缀放后面
os.system("rm " +fileMap[fileMap]) 部分，大家需要就根据自己系统调整活着写成 os.remove(fileMap[fileMap])谨慎使用默认关着。我这里是打印保存出来自己确认下，然后手工执行
```
print("\nrm " +fileMap[xmd5])
```

import hashlib
import os


# 指定根目录
root_path = r"e:\aa"
# 指定排除目录
exclude_path_list = [r"./build", ]
# 指定需要更改的文件后缀
suffix_list = [r".cpp", r".c", r".java",  ]
suffix_list = [r".dart", ]
count = 0
fileMap={}

def check_file_suffix(file, suffix_list):
    for p in suffix_list:
        if file.endswith(p):
            return True
    return False

def check_path_exclude(path, exclude_path_list):
    for p in exclude_path_list:
        if path.startswith(p):
            return True
    return False

def GetFileMd5(filename):
    if not os.path.isfile(filename):
        return
    myhash = hashlib.md5()
    f = open(filename,'rb')
    while True:
        b = f.read(8096)
        if not b :
            break
        myhash.update(b)
    f.close()
    return myhash.hexdigest()

def main():
    global count
    for path, subdirs, files in os.walk(root_path):
        if check_path_exclude(path, exclude_path_list):
            continue
        for name in files:
            file = os.path.join(path, name)
            if check_file_suffix(file, suffix_list):
                xmd5 = (GetFileMd5(file))
                count += 1
                if xmd5 in fileMap.keys():
                    #os.system("rm " +fileMap[fileMap])
                    print("\nrm " +fileMap[xmd5])
                else:
                    pass
                fileMap[xmd5] = file


if __name__ == '__main__':
    main()

执行结果如下

确认屏幕输出然后保存为批处理文件，或者复制了输出，直接打开控制台粘贴回车即可删除里面重复的内容了。

二 ansi编码转换为utf8编码工具

有时候遇到有些code源文件的编码不是utf8的，直接转换出来是乱码的，如果有需要您可以使用这个工具，这个跟上一个大家结构差不多。设置配置部分也是雷同，参考上面的即可设置适合你要求的

代码如下

import os
import chardet

# 指定根目录
root_path = r"./dp"

# 指定排除目录
exclude_path_list = [r"./build", ]
# 指定需要更改的文件后缀
suffix_list = [r".cpp", r".c", r".java", r".dart", ]
count = 0
def check_file_suffix(file, suffix_list):
 for p in suffix_list:
     if file.endswith(p):
         return True
 return False

def check_path_exclude(path, exclude_path_list):
 for p in exclude_path_list:
     if path.startswith(p):
         return True
 return False

def main():
 global count
 for path, subdirs, files in os.walk(root_path):
     if check_path_exclude(path, exclude_path_list):
         continue
     for name in files:
         file = os.path.join(path, name)
         if check_file_suffix(file, suffix_list):
             print(file)
             with open(file, 'rb') as f:
                 c = chardet.detect(f.read())
                 s = open(file, mode='r', encoding=c['encoding']).read()  # UTF-8 with BOM
                 open(file, mode='w', encoding='utf-8').write(s)  # UTF-8 without BOM
                 count += 1
 print("共", count, "个文件，转换完毕")


if __name__ == '__main__':
 main()

执行结果如下

三 code转换代码为pdf工具

代码没有上面的的问题想转成pdf的可以参考此工具

参数设置 -dst "./aa" -out codeaa.pdf dart
其中 ./aa 为代码目录， codeaa.pdf 为生成文件 dart是代码后缀

import argparse
import os
import sys
from glob import glob
import html
from xhtml2pdf import pisa  # import python module

# Lines will be broken if over specified char count
BREAK_LIMIT = 110
output_filename = "out.pdf"
# Define your data
MAIN_TEMPLATE = """
<html>
<style>

body{
font-family:STSong-Light;
}
.code {
border: 1px solid grey;
padding: 1px;
overflow: hide;
font-family:STSong-Light;
}
pre {
font-size:12px;
font-family:STSong-Light;
}
</style>
<body>
<h1>Code Listing</h1>
%%%code%%%
</body>
</html>"""
CODE_TEMPLATE = """
<hr><div>
    <h1>%%%name%%%</h1>
    <div class="code">
        <pre>%%%snippet%%%</pre>
    </div>
</div>"""
# ACCEPTED_EXTENSIONS



def convert_html_to_pdf(source_html, output_filename):
    result_file = open(output_filename, "w+b")
    pisa_status = pisa.CreatePDF(
        source_html,  # the HTML to convert
        dest=result_file)  # file handle to recieve result
    result_file.close()  # close output file
    return pisa_status.err


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description='Convert files in destination into a pdf')
    parser.add_argument('-dst',
                        help='folder containing your source files',
                        required=True)
    parser.add_argument('-out',
                        help='name of pdf to output',
                        default="out.pdf")
    parser.add_argument('ext',
                        help='list of file extensions to parse',
                        nargs="*")
    args = parser.parse_args()
    if args.dst[-1] != "/": args.dst += "/"
    files_grabbed = []
    for type in args.ext:
        files_grabbed.extend(glob(args.dst + "**/*." + type, recursive=True))

    source_html = MAIN_TEMPLATE
    code_html = ""
    for file in files_grabbed:
        with open(file, 'r',encoding='UTF-8') as f:
            lines = f.readlines()
            contents = ""
            for line in lines:
                if len(line) > BREAK_LIMIT:
                    contents += line[:BREAK_LIMIT] + "\n" + line[BREAK_LIMIT:]
                else:
                    contents += line
            contents = contents.replace("    ", "  ")
            contents = contents.replace("\t", "    ")
            code_html += CODE_TEMPLATE.replace(
                "%%%name%%%",
                file.replace(args.dst, "")).replace("%%%snippet%%%",
                                                    html.escape(contents))
    pisa.showLogging()
    convert_html_to_pdf(source_html.replace("%%%code%%%", code_html), args.out)

执行结果

4.生成文件

5.存在的问题和缺陷生产的文件，电脑什么的看是没什么问题了，可能你的电子书不一定适合里面的编码方式还是呈现的乱码，解决的办法有以下几个

pdf编辑的工具打开，另存为图像格式的，缺点文件较大
专业的pdf工具打开，设置一些编码方面的打开，具体自行查询
pdf编辑工具打开，ctrl a 然后ctrl c ，新建word文档 ctrl v 保存后，邮件发送到kindle。

四 code转换为word工具

如果不喜欢pdf格式的可以直接转换到word文档，可以使用下面工具设置部分如下

命令参数 -s .\dp* -m "deep" -t e:\testjava.docx -i "java|yml"
.\dp* 代码路径，设置为你想要的
e:\testjava.docx 生成的文件
"java|yml" 代码的后缀

import os, sys, getopt
import codecs
from docx import Document

doc = Document()
errorlist = []


def convert(dir, mode='flat', title=None, include=None, exclude=None, encoding='utf-8'):
 print('copy from diretory: ' + dir)

 if title is not None:
     doc.add_heading(title, 1)

 if include is not None:
     inc = include.split('|')
 else:
     inc = None

 if exclude is not None:
     exc = exclude.split('|')
 else:
     exc = None

 if mode == 'flat':
     walkflat(dir, inc, exc, encoding)
 elif mode == 'deep':
     walkdeep(dir, 2, inc, exc, encoding)
 else:
     print('mode is invaild')


def walkflat(dir, inc, exc, encoding):
 currentdir = ''
 for root, dirs, files in os.walk(dir, False):
     for file in files:
         if file == 'pom.xml':
             print(1)
         if (inc is None or os.path.splitext(file)[1][1:] in inc) and (
                 exc is None or os.path.splitext(file)[1][1:] not in exc):
             filepath = os.path.join(root, file).replace('\', '/')
             try:
                 with codecs.open(filepath, encoding=encoding) as f:
                     content = f.read()
                     thisdir = filepath[len(dir) + 1:filepath.rfind('/')]
                     if currentdir != thisdir:
                         currentdir = thisdir
                         doc.add_heading(thisdir, 2)
                         print('into directory ' + thisdir)
                     doc.add_heading(filepath[filepath.rfind('/') + 1:], 3)
                     doc.add_paragraph(content)
                     doc.add_page_break()
                     print('copied ' + filepath[filepath.rfind('/') + 1:])
             except Exception as e:
                 errorlist.append(filepath)
                 print('read ' + filepath + ' error')
                 print(str(e))


def walkdeep(root, level, inc, exc, encoding):
 for file in os.listdir(root):
     filepath = os.path.join(root, file).replace('\', '/')
     if os.path.isfile(filepath):
         if (inc is None or os.path.splitext(file)[1][1:] in inc) and (
                 exc is None or os.path.splitext(file)[1][1:] not in exc):
             try:
                 with codecs.open(filepath, encoding=encoding) as f:
                     content = f.read()
                     doc.add_heading(filepath[filepath.rfind('/') + 1:], level)
                     doc.add_paragraph(content)
                     doc.add_page_break()
                     print('copied ' + filepath[filepath.rfind('/') + 1:])
             except Exception as e:
                 errorlist.append(filepath)
                 print('read ' + filepath + ' error')
                 print(str(e))

     else:
         if level<9:
             doc.add_heading(file, level)
             print('into directory ' + file)
             walkdeep(filepath, level + 1, inc, exc, encoding)

if __name__ == '__main__':

src = None
mode = 'flat'
target = None
include = None
exclude = None
encoding = 'utf-8'
myhelp = 'run.py -s <source directory path> -m 'flat|deep' -t <target docx file path>\
-i <include extension of scanned files> -e <exclude extension of scanned files>\
-c <encoding of the files>'
argv = sys.argv[1:]

try:
 opts, args = getopt.getopt(argv,'hs:m:t:i:e:c:',['source=','mode=','target=','include=','exclude=','encoding='])
except expression as identifier:
 print(myhelp)
 sys.exit(2)

for opt, arg in opts:
 if opt == '-h':
   print(myhelp)
   sys.exit(2)
 elif opt in ('-s','--source'):
   src = arg
 elif opt in ('-m','--mode'):
   mode = arg
 elif opt in ('-t','--target'):
   target = arg
 elif opt in ('-i','--include'):
   include = arg
 elif opt in ('-e','--exclude'):
   exclude = arg
 elif opt in ('-c','--encoding'):
   encoding = arg

if src is None or target is None:
 print('source and target is needed')
 sys.exit(2)

pos = src.find('*')
if pos == -1:
 convert(src, mode=mode, include=include, exclude=exclude, encoding=encoding)
else:
 presrc = src[0:pos]
 dirs = os.listdir(presrc)
 for dir in dirs:
   convert(presrc + dir + src[pos+1:], mode=mode, title=dir, include=include, exclude=exclude, encoding=encoding)

doc.save(target)
print('\nfinish copying, your document is saved into "'+target+'" , thanks for your using!')
if len(errorlist) != 0:
 print('\nerror file list:\n')
 for e in errorlist:
   print(e)