import xlrd
import xlwt
import difflib
arrayNum = 6
tables = []
newTables = []
def read_excel():
workbook = xlrd.open_workbook(r'test.xlsx')
sheet_name = workbook.sheet_names()[0]
sheet = workbook.sheet_by_index(0)
print (sheet.name,sheet.nrows,sheet.ncols)
rows = sheet.row_values(1)
arr = []
for rown in range(sheet.nrows):
array = {'UID': '', 'ID': '', 'email': '', 'IP': '', 'Login_IP': ''}
array['UID'] = sheet.cell_value(rown,0)
array['ID'] = sheet.cell_value(rown,1)
array['email'] = sheet.cell_value(rown,2)
array['IP'] = sheet.cell_value(rown,3)
array['Login_IP'] = sheet.cell_value(rown,4)
arr.append(array['Login_IP'])
tables.append(array)
arr_1=[]
for i in range(len(tables)):
if i !=0:
num= arr.count(tables[i]['Login_IP'])
if num>1:
arr_1.append(tables[i])
return arr_1
def likeNumber():
workbook = xlrd.open_workbook(r'aa.xlsx')
sheet = workbook.sheet_by_index(0)
rows = sheet.row_values(1)
arr = []
arr_2 =[]
tables_1=[]
for rown in range(sheet.nrows):
array = {'UID': '', 'ID': '', 'email': '', 'IP': '', 'Login_IP': ''}
array['UID'] = sheet.cell_value(rown, 0)
array['ID'] = str(sheet.cell_value(rown, 1))
array['email'] = sheet.cell_value(rown, 2)
array['IP'] = sheet.cell_value(rown, 3)
array['Login_IP'] = sheet.cell_value(rown, 4)
arr.append(array['ID'])
arr_2.append(array['email'])
tables_1.append(array)
a_arr=[]
for i in range(len(tables_1)):
if i != 0:
a = difflib.get_close_matches(tables_1[i]['ID'], arr, 100, cutoff=0.7)
if len(a) > 1:
if a[0] != "":
for i in range(len(a)):
print("读取用户名:", a[i])
a_arr.append(a[i])
a_arr = list(set(a_arr))
arr_1 = []
for i in range(len(tables_1)):
if i != 0:
if tables_1[i]['ID'] in a_arr:
arr_1.append(tables_1[i])
return arr_1
def likeEmail():
workbook = xlrd.open_workbook(r'aa.xlsx')
sheet = workbook.sheet_by_index(0)
rows = sheet.row_values(1)
arr = []
arr_2 = []
tables_1 = []
for rown in range(sheet.nrows):
array = {'UID': '', 'ID': '', 'email': '', 'IP': '', 'Login_IP': ''}
array['UID'] = sheet.cell_value(rown, 0)
array['ID'] = str(sheet.cell_value(rown, 1))
array['email'] = str(sheet.cell_value(rown, 2))
array['IP'] = sheet.cell_value(rown, 3)
array['Login_IP'] = sheet.cell_value(rown, 4)
arr.append(array['ID'])
arr_2.append(array['email'])
tables_1.append(array)
b_arr = []
for i in range(len(tables_1)):
if i != 0:
mStr =tables_1[i]['email']
email_1=mStr.split("@")
b = difflib.get_close_matches(email_1[0], arr_2, 100, cutoff=0.4)
if len(b) > 1:
if b[0] != "":
for i in range(len(b)):
print("读取邮箱:", b[i])
b_arr.append(b[i])
b_arr = list(set(b_arr))
arr_1 = []
for i in range(len(tables_1)):
if i != 0:
if tables_1[i]['email'] in b_arr:
arr_1.append(tables_1[i])
return arr_1
class WriteExcel:
def __init__(self, filename, sheet_name):
self.work_book = xlwt.Workbook(encoding="UTF-8")
self.worksheet = self.work_book.add_sheet(sheet_name)
self.filename = filename
self.row = 0
def save(self):
self.work_book.save(self.filename)
def set_style(self, name, height, bold=False, format_str='', align='center'):
style = xlwt.XFStyle()
font = xlwt.Font()
font.name = name
font.bold = bold
font.height = height
borders = xlwt.Borders()
borders.left = 1
borders.right = 1
borders.top = 1
borders.bottom = 1
alignment = xlwt.Alignment()
if align == 'center':
alignment.horz = xlwt.Alignment.HORZ_CENTER
alignment.vert = xlwt.Alignment.VERT_CENTER
elif align == 'left':
alignment.horz = xlwt.Alignment.HORZ_LEFT
alignment.vert = xlwt.Alignment.VERT_BOTTOM
else:
alignment.horz = xlwt.Alignment.HORZ_RIGHT
alignment.vert = xlwt.Alignment.VERT_BOTTOM
style.font = font
style.borders = borders
style.num_format_str = format_str
style.alignment = alignment
return style
def set_title_style(self):
return self.set_style('黑体', 300, bold=True, format_str='')
def set_head_style(self):
head_style = self.set_style('Times New Roman', 220, bold=True, format_str='')
pattern = xlwt.Pattern()
pattern.pattern = xlwt.Pattern.SOLID_PATTERN
pattern.pattern_fore_colour = xlwt.Style.colour_map['yellow']
head_style.pattern = pattern
return head_style
def set_default_style(self):
return self.set_style('Times New Roman', 200, bold=False, format_str='', align='right')
def add_title(self, title):
self.worksheet.write_merge(0, 0, 0, 2, title, self.set_title_style())
self.row += 1
def add_head(self, key, value):
self.worksheet.write(self.row, 0, key)
self.worksheet.write(self.row, 1, value)
self.row += 1
def add_list(self, table_head, table_detail):
self.row += 1
for i, value in enumerate(table_head):
self.worksheet.write(self.row, i, value, self.set_head_style())
self.worksheet.col(i).width = 150 * 30
for rows in table_detail:
self.row += 1
for i, key in enumerate(rows):
self.worksheet.
write(self.row, i, rows[key])
if __name__ == '__main__':
list_detail= read_excel()
print ('读取成功')
list_head = ["UID", "ID", "email","IP","Login_IP"]
writeExcel = WriteExcel("aa.xlsx", "统计")
writeExcel.add_title("IP统计表")
writeExcel.add_list(list_head, list_detail)
writeExcel.save()
print ('写入成功')
print ("=======================")
print("第二次读取")
likeNumber=likeNumber()
print("用户名读取成功")
writeExcel_1 = WriteExcel("用户名.xlsx", "用户名相似")
writeExcel_1.add_title("用户名相似")
writeExcel_1.add_list(list_head, likeNumber)
writeExcel_1.save()
print("用户名写入成功")
print ("=======================")
print("第三次读取")
likeEmail = likeEmail()
print("邮箱读取成功")
writeExcel_1 = WriteExcel("邮箱.xlsx", "邮箱相似")
writeExcel_1.add_title("邮箱相似")
writeExcel_1.add_list(list_head, likeEmail)
writeExcel_1.save()
print("邮箱读取成功")