关于python正则的基础记录:
例1:匹配文中“mouxxx”
import re
text = "你好mouxxx,阿斯顿发mouxxxasd 阿士大夫能接受的mouxxxiff"
dist_list = re.findall("mouxxx", text)
print(data_list) # ["mouxxx", "mouxxx"] 空格终止,可用于计算字符串某个字符出现的次数
例2:[abc]匹配a或b或c字符
import re
text = "你2b好mouxxx,阿斯顿发amouxxxasd 阿士大夫a能接受的wffbbupqaceiqiff"
data_list = re.findall("[abc]", text)
print(data_list) # ['b','a','a','a','b','b','c']
import re
text = "你2b好mouxxx,阿斯顿发amouxxxasd 阿士大夫a能接受的wffbbupqaceiqiff"
data_list = re.findall("q[abc]", text)
print(data_list) # ['qa','qc']
例3:[^abc]匹配除了abc以外的其他字符
import re
text = "你wffbbupceiqiff"
data_list = re.findall("[^abc]", text)
print(data_list) # ['你', 'w', 'f', 'f', 'u', 'p', 'e', 'i', 'q', 'i', 'f', 'f']
例4:[a-z]匹配a~z的任意字符([0-9]也可以)
import re
text = "alexrootrootadmin"
data_list = re.findall("t[a-z]", text)
print(data_list) # ['tr', 'ta']
例5:. 代指除换行符以外的任意字符
import re
text = "alexraotrootadmin"
data_list = re.findall("r.o", text)
print(data_list) # ["rao", "roo"]
import re
text = "alexraotrootadmin"
data_list = re.findall("r.+o", text) # python遵循贪婪匹配
print(data_list) # ["raotroo"]
import re
text = "alexraotrootadmin"
data_list = re.findall("r.+?o", text) # 非贪婪匹配
print(data_list) # ["rao"]
例6:\w代指字母数字下划线(汉字)
import re
text = "北京高某某kotte 北京高某kotte某"
data_list = re.findall("高\w+e", text)
print(data_list) # ["高某某kotte", "高某kotte"]
例7:\d代表数字
import re
text = "root-ad32min-add3-admd1in"
data_list = re.findall("d\d", text)
print(data_list) # ['d3', 'd3', 'd1']
import re
text = "root-ad32min-add3-admd1in"
data_list = re.findall("d\d+", text)
print(data_list) # ['d32', 'd3', 'd1']
例8:\s代指任意的空白符,包括空格、制表符等
import re
text = "root admin add admin"
data_list = re.findall("a\w+\s\w+", text)
print(data_list) # ["admin", "add"]
例9:* 重复0次或更多次
import re
text = "他是大B个,确实是个大2B。"
data_list = re.findall("大2*B ", text)
print(data_list) # ["大B", "大2B"]
例10:+ 重复1次或更多次
import re
text = "他是大B个,确实是个大2B, 大3B,大6666B"
data_list = re.findall("大\d+B", text)
print(data_list) # ["大2B", "大3B", "大6666B"]
例11:? 重复0次或1次(区分+?的非贪婪匹配)
import re
text = "他是大B个,确实是个大2B, 大3B,大6666B"
data_list = re.findall("大\d?B", text)
print(data_list) # ["大B", "大2B", "大3B"]
例12:{n} 重复n次
import re
text = "楼主太牛逼了,在线要235698526@qq.com和xxxx@live.com,手机号也可18536256985"
data_list = re.findall("185362\d{5}", text)
print(data_list) # ["18536256985"]
例13:{n,}重复n次或更多次
import re
text = "楼主太牛逼了,在线要235698526@qq.com和xxxx@live.com,手机号也可18536256985"
data_list = re.findall("\d{9,}", text)
print(data_list) # ["235698526", "18536256985"]
例14:{n,m}重复n到m次
import re
text = "楼主太牛逼了,在线要235698526@qq.com和xxxx@live.com,手机号也可18536256985"
data_list = re.findall("\d{10,15}", text)
print(data_list) # ["18536256985"]
例15:提取数据区域:
import re
text = "楼主太牛逼了,在线要235698526@qq.com和xxxx@live.com,手机号也可18536256985"
data_list = re.findall("18536(2\d{5})", text)
print(data_list) # ["256985"]
import re
text = "楼主太牛逼了,在线要235698526@qq.com和xxxx@live.com,手机号也可18536256985,或者18536266666"
data_list = re.findall("18(53)6(2\d{5})", text)
print(data_list) # [("53","256985"),("53","266666")]
import re
text = "楼主太牛逼了,在线要235698526@qq.com和xxxx@live.com,手机号也可18536256985"
data_list = re.findall("(18536(2\d{5}))", text)
print(data_list) # [("18536256985","256985")]
import re
text = "楼主18536root太牛18536kotte逼了,在线要235698526@qq.com和xxxx@live.com,手机号也可18536256985"
data_list = re.findall("18536(2\d{5}|r\w+太)", text)
print(data_list) # ["root太","256985"]
import re
text = "楼主18536root太牛18536kotte逼了,在线要235698526@qq.com和xxxx@live.com,手机号也可18536256985"
data_list = re.findall("(18536(2\d{5}|r\w+太))", text)
print(data_list) # [("18536root太", "root太"), ("18536256985","256985")]