第六课：正则表达式1.匹配单个字符串 2.匹配多个字符串 3.正则表达式案例,正则表达式因为比较繁琐，所有需要经常使用，

1.匹配单个字符串

import re

# 1.匹配某个字符串：
text = 'abc'
result = re.match('a', text)
print(result.group())       # a
result2 = re.match('b', text)
print(result2.group())      # 报错，NoneType,因为只能从第一个匹配，也就是text里面的a

# 2.点（.）：匹配任意的字符(除了'\n')：
text = 'abc'
result = re.match('.', text)
print(result.group())       # a，还是从第一个匹配
text = '\nabc'
result2 = re.match('.', text)
print(result2.group())      # 报错，\n为换行符


# 3. \d：匹配任意的数字：0，1，2，3，4，5，6，7，8，9
text = '1bc'
print(re.match('\d', text).group())     # 1

# \D：匹配任意的非数字：
text = '+abc'
print(re.match('\D', text).group())     # +

# \s：匹配的是空白字符（包括：\n，\t，\r和空格）：
text = '\nabc'
result = re.match('\s', text)
print(result.group())

# \S：非空白字符：
text = 'abc'
result = re.match('\S', text)
print(result.group())       # a

# \w：匹配的是a-z和A-Z以及数字和下划线：
text = '_abc'
result = re.match('\w', text)
print(result.group())

# \W：匹配的是和\w相反的：
text = '*abc'
result = re.match('\W', text)
print(result.group())       # *

# []组合的方式，只要满足中括号中的某一项都算匹配成功：
text = 'abc'
result = re.match('[1a]', text)
print(result.group())       # a

# 使用组合的方式[0-9]\d：
text = '1abc'
result = re.match('[0-9]', text)
print(result.group())     # 1

# 使用组合的方式实现\w：
text = 'abc'
result = re.match('[a-zA-Z0-9_]', text)
print(result.group())       # a

2.匹配多个字符串

import re

# *：匹配0个或者多个字符：
text = 'abc'
text2 = '+abc'
print(re.match('\w*', text).group())        # abc
print(re.match('\w*', text2).group())       # 从第一个开始匹配，匹配不上，返回为空，但是不会报错，因为允许返回0个或者多个。

# +：匹配1个或者多个字符：
text = 'abc'
text2 = '*abc'
print(re.match('\w+', text).group())        # abc
print(re.match('\w+', text2).group())       # 报错，因为匹配不上，但是至少返回一个字符，所以报错

# ?：匹配前一个字符0个或者1个：
text = '_abc'
text2 = '+abc'
print(re.match('\w?', text).group())        # _
print(re.match('\w?', text2).group())       # 不会报错，返回为空，因为允许返回0个

# {m}：匹配m个字符:
text = '_abc'
print(re.match('\w{2}', text).group())      # _a

# {m,n}：匹配m-n之间的个数的字符：
text = '_abc'
print(re.match('\w{1,3}', text).group())      # _ab
text ='a+bc'
print(re.match('\w{1,3}', text).group())      # a
text = 'ab+c'
print(re.match('\w{1,3}', text).group())      # ab

3.正则表达式小案例

import re

# 1. 验证手机号码：手机号码的规则是以1开头，第二位可以是34587，后面那9位就可以随意了。
text = '17883172560'
result = re.match('1[14578]\d{9}', text)
print(result.group())

# 2. 验证邮箱：邮箱的规则是邮箱名称是用数字、英文字符、下划线组成的，然后是@符号，后面就是域名了。
text = "1194707433@qq.com"
result = re.match('\w+@[a-z0-9]+.[a-z]+', text)
print(result.group())

# 3. 验证URL：URL的规则是前面是http或者https或者是ftp然后再加上一个冒号，再加上一个斜杠，再后面就是可以出现任意非空白字符了。
text = "https://edu.csdn.net/learn/24756/280708?spm=1002.2001.3001.4157"
result = re.match('(http|https|ftp)://\S+', text)
print(result.group())

# 4. 验证身份证：身份证的规则是，总共有18位，前面17位都是数字，后面一位可以是数字，也可以是小写的x，也可以是大写的X。
text = "521423200008084064"
result = re.match('\d{17}[\dxX]', text)
print(result.group())

4.开始/结尾/贪婪/非贪婪

import re

import re

# ^：以...开头：
text = 'hello world'
result01 = re.match('hello', text)   # 返回hello,match方法中已经蕴含了^（以...为开头）这个意思了
result02 = re.search('world', text)  # 返回world,在字符串任意位置查找
result03 = re.search('^hello', text) # 返回hello,加上^，即表示以什么为开头进行查找

# $：以...结尾：
text = 'hello world'
result01 = re.search('world$', text)      # 返回world，表示以...结尾
result02 = re.search('hello$', text)    # 报错
print(result01.group())
# print(result02.group)
text = ''
result03 = re.search('^$', text)
print(result03.group())

# |：匹配多个字符串或者表达式：
text = 'https://www.baidu.com'
result = re.match('(https|http|ftp)://\S+', text)
print(result.group())

# 贪婪和非贪婪：
text = 'hello world'
# 贪婪模式
result01 = re.search('\w+', text)

# 非贪婪模式
result02 = re.search('\w+?', text)   # ?表示0个或者多个，?放在*， + 等后面，表示非贪婪，如果?直接放在像\w这种后面，就不是贪婪模式
print(result02.group())


# 案例1：提取html标签名称：
text = '<h1>我是一级标题<h1>'
# 贪婪模式
result01 = re.search('<.+>', text)          # 返回：<h1>我是一级标题<h1>
print(result01.group())
# 非贪婪模式
result02 = re.search('<.+?>', text)         # <h1>
print(result02.group())

# 案例2：验证一个字符是不是0-100之间的数字：
# 四种情况 0， 一位数， 两位数， 100
text = '12'
result = re.match("0$|[1-9]\d?$|100$",text)
print(result.group())

5.转义字符串和原生字符串

import re

# Python中的转义字符：
# 1.\
text = 'hello\nworld'      # hello\nworld
print(text)
# 2.r
text = r'hello\nworld'      # hello\nworld
print(text)

# 正则表达式中的转义字符：
# \
text = 'apple price is $99, orange price is $100'
result = re.findall('$\d+', text)      # ['$99', '$100']
print(result)

# 原生字符串和正则表达式：
# 正则表达式的字符串解析规则：
# 1. 先把这个字符串放在Python语言层面进行解析。
# 2. 把Python语言层面解析的结果再放到正则表达式层间进行解析。
text = "\cba c"
# result = re.match("\\c",text) # \\c =(Python语言层面)> \c =(正则表达式层面)> \c
result = re.match(r"\c",text) # \c =(正则表达式层面)> \c
print(result.group())

6.分组

import re

text = 'apple price is $99, orange price is $100'
result = re.match('.+$\d+.+$\d+', text)
print(result.group())   # apple price is $99, orange price is $100

# 分组
result02 = re.match('.+($\d+).+($\d+)', text)
print(result02.group(0))    # apple price is $99, orange price is $100
print(result02.group(1))    # $99
print(result02.group(2))    # 100
print(result02.groups())    # ('$99', '$100')

7.re中的函数

import re

# findall：查找所有满足条件的
text = "apple price is $99,orange price is $88"
result = re.findall(r'$\d+', text)     # 返回的是列表
print(result)


# sub：根据规则替换其他字符串
text = "nihao zhongguo,hello world"
# python当中的替代
new_text = text.replace(" ","")     # nihaozhongguo,helloworld
new_text = re.sub(r' |,','\n', text)
'''
nihao
zhongguo
hello
world
'''
print(new_text)

html = """
<div class="job-detail">
    <p>1. 3年以上相关开发经验 ，全日制统招本科以上学历</p>
    <p>2. 精通一门或多门开发语言(Python,C,Java等)，其中至少有一门有3年以上使用经验</p>
    <p>3. 熟练使用ES/mysql/mongodb/redis等数据库；</p>
    <p>4. 熟练使用django、tornado等web框架，具备独立开发 Python/Java 后端开发经验；</p>
    <p>5. 熟悉 Linux / Unix 操作系统&nbsp;</p>
    <p>6. 熟悉 TCP/IP，http等网络协议</p>
    <p>福利：</p>
    <p>1、入职购买六险一金（一档医疗+公司全额购买商业险）+开门红+全额年终奖（1年13薪，一般会比一个月高）</p>
    <p>2、入职满一年有2次调薪调级机会</p>
    <p>3、项目稳定、团队稳定性高，团队氛围非常好（汇合员工占招行总员工比例接近50%）；</p>
    <p>4、有机会转为招商银行内部员工；</p>
    <p>5、团队每月有自己的活动经费，法定节假日放假安排；</p>
    <p>6、办公环境优良，加班有加班费（全额工资为计算基数，加班不超过晚上10点，平日加班为时薪1.5倍，周末加班为日薪2倍，周末加班也可优先选择调休，管理人性化）。</p>
</div>
"""
new_html = re.sub(r'<.+?>', '', html)
'''
    1. 3年以上相关开发经验 ，全日制统招本科以上学历
    2. 精通一门或多门开发语言(Python,C,Java等)，其中至少有一门有3年以上使用经验
    3. 熟练使用ES/mysql/mongodb/redis等数据库；
    4. 熟练使用django、tornado等web框架，具备独立开发 Python/Java 后端开发经验；
    5. 熟悉 Linux / Unix 操作系统&nbsp;
    6. 熟悉 TCP/IP，http等网络协议
    福利：
    1、入职购买六险一金（一档医疗+公司全额购买商业险）+开门红+全额年终奖（1年13薪，一般会比一个月高）
    2、入职满一年有2次调薪调级机会
    3、项目稳定、团队稳定性高，团队氛围非常好（汇合员工占招行总员工比例接近50%）；
    4、有机会转为招商银行内部员工；
    5、团队每月有自己的活动经费，法定节假日放假安排；
    6、办公环境优良，加班有加班费（全额工资为计算基数，加班不超过晚上10点，平日加班为时薪1.5倍，周末加班为日薪2倍，周末加班也可优先选择调休，管理人性化）。
'''
print(new_html)


# split：根据规则分割字符串
text = "nihao zhongguo,hello world"
result = re.split(r' |,', text)     # ['nihao', 'zhongguo', 'hello', 'world']
print(result)

# compile：编译正则表达式
text = "apple price is 34.56"
r = re.compile(r"""
\d+     # 整数部分
.      # .
\d+     # 小数部分
""", re.VERBOSE)
result = re.search(r, text)     # 34.56
print(result.group())

第六课：正则表达式

1.匹配单个字符串

2.匹配多个字符串

3.正则表达式小案例

4.开始/结尾/贪婪/非贪婪

5.转义字符串和原生字符串

6.分组

7.re中的函数

8.