任务一
1.将文件解析为16进制格式
2.将解析文件移入自定义文件夹
3.对解析文件自定义均分
from ntpath import join
import os
import re
import shutil
from unicodedata import name
import pandas as pd
import sys
from tqdm import tqdm
'''
将原始.TXT文件解析为16进制
IN: 输入文件地址,输出文件地址
F: ord(c)转换为ASCII码
'''
def convert2hex(inputfile,outputfile):
f = open(inputfile, "rb")
outfile = open(outputfile, "w")
i = 0
while 1:
c = f.read(1)
i = i + 1
if not c:
break
else:
if ord(c) <= 15:
outfile.write(("0x0"+hex(ord(c))[2:])[2:]+" ")
else:
outfile.write((hex(ord(c)))[2:]+" ")
outfile.close()
f.close()
'''
将.log文件移入自定义文件夹(LOG)
'''
def movelog(absolutepath):
list = os.listdir(absolutepath)
for f in tqdm(list):
if f.find(".log") != -1:
dst_dir = absolutepath+ "LOG" +"\\"
oldname = absolutepath + f
newname = dst_dir + f
if os.path.exists(dst_dir):
os.rename(oldname, newname)
else:
os.mkdir(dst_dir)
os.rename(oldname, newname)
else:
pass
'''
将log按照每num个分成一份
IN: 文件路径,num
e.g:分割后的文件树
LOG(一级文件夹)
|20210418(15-18)(二级文件夹)
|1.log
|2.log
...
|num.log
|20210418(19-22)
|1.log
|2.log
...
|num.log
|20210418(23-02)
|1.log
|2.log
...
|num.log
'''
def splitlog(absolutepath,num):
file_path = absolutepath
new_file_path = absolutepath
list = os.listdir(absolutepath)
cnt = 0
num_file = int(len(list)/num)
for n in tqdm(range(1, num_file + 1)):
headname = list[num*cnt].split(".log")[0]
footname = list[num*(cnt+1)-1].split(".log")[0]
nametime = headname.split("-")[:3]
nametime = ''.join(nametime)
new_file = os.path.join(new_file_path + nametime +
'('+str(headname.split("-")[3])+"-"+
str(footname.split("-")[3])+')')
os.mkdir(new_file)
list_n = list[num * cnt:num * (cnt + 1)]
for m in list_n:
old_path = os.path.join(file_path, m)
new_path = os.path.join(new_file, m)
os.rename(old_path, new_path)
cnt += 1
'''
批量处理将.TXT转为.log
'''
def muliconverthex(o_dir):
dirs = os.listdir(o_dir)
for dir in tqdm(dirs):
if dir.find(".TXT") != -1:
name = o_dir+"\\"+dir.split(".TXT")[0]
convert2hex(name+".TXT",name+".log")
else:
pass
if __name__ == '__main__':
o_dir = sys.argv[1]
muliconverthex(o_dir)
movelog(o_dir)
splitlog(o_dir+"\\LOG\\",4)
任务二
1.解析文件内容自定义分割
2.分割后文件处理存为xlsx
3.xlsx文件合并为csv
import os
import re
import shutil
import pandas as pd
import sys
from tqdm import tqdm
'''
将16进制LOG文件转换为规定格式的TXT文件
IN:
search_text需要替换的文本
replace_text替换后的文本
dirfiles包含log文件的文件夹
absolutepath文件夹的绝对路径
F:
data.replace(search_text, replace_text)
e.g:
search_text = "85 00 00"
replace_text = "\n85 00 00"
'''
def LOG2TXT(search_text,replace_text,dirfiles,absolutepath):
for dir in tqdm(dirfiles):
if dir.find(".log") != -1:
name = absolutepath+"\\"+dir.split(".log")[0]
with open(name + ".log", 'r',encoding='UTF-8') as file:
data = file.read()
data = data.replace(search_text, replace_text)
file.close()
with open(name + ".txt", 'w',encoding='UTF-8') as fl:
fl.write(data)
fl.close()
else:
pass
'''
将txt文件转换为xlsx
IN:dirfiles包含txt文件的文件夹
absolutepath文件夹的绝对路径
F:
1.使用.read_csv()读取txt文件,跳过第一行和最后一行,保留3,4,5列,并命名为'KEY','RAW_H','RAW_L'
2.将'RAW_H','RAW_L'的10进制计算结果放在'RAW_HD','RAW_LD'列;将'RAW_HD','RAW_LD'的和放入RAW列
3.将上述文件存入xlsx的sheet1中
4.将'KEY'=i 对应的'RAW'值写入sheet2的第i列,并命名为RAWi;i为数字0到6
5.保存xlsx
'''
def TXT2XLSX(dirfiles,absolutepath):
for dir in tqdm(dirfiles):
if dir.find(".txt") != -1:
name = absolutepath+"\\"+dir.split(".txt")[0]
data = pd.read_csv(name + ".txt",
sep="\s+",
usecols=[3,4,5],
skiprows=1,
skipfooter=1,
header=None,
names=['KEY','RAW_H','RAW_L'],
engine='python')
data['RAW_HD'] = data['RAW_H'].str[:].apply(lambda x: int(x,16)*256)
data['RAW_LD'] = data['RAW_L'].str[:].apply(lambda x: int(x,16))
data.eval("RAW = RAW_HD + RAW_LD ",inplace = True)
writer = pd.ExcelWriter(name+'.xlsx')
data.to_excel(writer, sheet_name='Sheet1',
startrow=0, startcol=0, header=True, index=False)
for i in range(0,7):
DFK = data.loc[data['KEY'] == i, ['RAW']]
DFK.rename(columns = {'RAW':'RAW'+str(i)}, inplace = True)
DFK.to_excel(writer, sheet_name='Sheet2',
startrow=0, startcol=i, header=True, index=False)
writer.save()
else:
pass
'''
将所有xlsx中sheet2的值合并到csv中
IN:
i 文件夹名
absolutepath 文件夹所在的绝对地址
dirfiles 文件名
F: 使用.to_csv()写入,写入模式为"a"不覆盖写入
'''
def APPENDXLSX(i,dirfiles,absolutepath):
for dir in tqdm(dirfiles):
if dir.find(".xlsx") != -1:
name = absolutepath+"\\"+dir.split(".xlsx")[0]
print(name)
data = pd.read_excel(name + ".xlsx", sheet_name="Sheet2")
data.to_csv(absolutepath+"\\"+str(i)+"_"+'ALL.csv', mode='a', index=False, header=0)
else:
pass
'''
文件夹循环
IN: o_dir 一级文件夹
search_text,replace_text同函数LOG2TXT()
e.g:
DIR (一级文件夹)
|DIR1(二级文件夹)
|FILE11(文件)
|FILE12
...
|DIR2(二级文件夹)
|FILE21
|FILE22
...
|DIR3(二级文件夹)
|FILE31
|FILE32
...
'''
def loopdirs(o_dir,search_text,replace_text):
dirs = os.listdir(o_dir)
for i in tqdm(dirs):
absolutepath = o_dir + i+"\\"
dirfiles = os.listdir(absolutepath)
LOG2TXT(search_text,replace_text,dirfiles,absolutepath)
for i in tqdm(dirs):
absolutepath = o_dir + i+"\\"
dirfiles = os.listdir(absolutepath)
TXT2XLSX(dirfiles,absolutepath)
for i in tqdm(dirs):
absolutepath = o_dir + i+"\\"
dirfiles = os.listdir(absolutepath)
APPENDXLSX(i,dirfiles,absolutepath)
if __name__ == '__main__':
o_dir = sys.argv[1]
search_text = "85 00 00"
replace_text = "\n85 00 00"
loopdirs(o_dir,search_text,replace_text)
其他应用
正则表达式替换字符串
'''
处理文件结构
DIR (一级文件夹)
|DIR1(二级文件夹)
|FILE11(文件)
|FILE12
运行格式: python XXX.py "F:\\01data\\方位数据\\"
'''
import os
import re
import shutil
from pandas import DataFrame
import pandas as pd
import sys
from tqdm import tqdm
text1 = re.compile("\[2022-05-25 \d{2}:\d{2}:\d{2}.\d{3}\]# RECV HEX>")
o_dir = sys.argv[1]
dirs = os.listdir(o_dir)
for i in tqdm(dirs):
absolutepath = o_dir + i+"\\"
dirfiles = os.listdir(absolutepath)
for dir in tqdm(dirfiles):
if dir.find(".log") != -1:
name = absolutepath+"\\"+dir.split(".log")[0]
with open(name + ".log", 'r',encoding='UTF-8') as file:
data = file.read()
data = text1.sub('\n',data)
file.close()
with open(name + ".txt", 'w',encoding='UTF-8') as fl:
fl.write(data)
fl.close()
else:
pass