JS正则表达式(4)：正则的捕获方法本文已参与「新人创作礼」活动，一起开启掘金创作之路。正则的捕获方法正则捕获实现

本文已参与「新人创作礼」活动，一起开启掘金创作之路。

正则的捕获方法

正则捕获

实现正则捕获的方法 ( RegExp.prototype )
1. exec
2. test
字符串支持正则的方法 ( String.prototype )
1. split
2. replace
3. match
4. ......

正则的懒惰性

exec

let str = 'aaa111bbb222ccc333'

// 正则进行捕获的前提: 正则表达式能匹配当前字符串, 不匹配结果为null
let reg = /^\d+$/;		// => 字符串必须是纯数字
console.log(reg.test(str))	// => false
console.log(reg.exec(str))	// => null

// 只有正则表达式能匹配字符串才能开始捕获
/** 
 *	基于exec实现正则的捕获:
 *		1. 结果要么是数组(捕获成功), 要么是null(捕获失败)
 *			捕获成功:
 *				第一项(下标:0): 捕获成功的内容
 *				其余项(下标:1~∞): 对应分组()匹配下来的数据
 *				index: 捕获到的字符串开头第一个字符下标
 *				input: 被匹配的字符串主体
 *				length: 捕获到的字符串长度
 *		2. 执行一次exec, 永远只能捕获到第一串符合规则的字符(默认情况下, 懒惰性)
 *			懒惰性原因是lastIndex的值永远是0, 每次都是重头捕获
 * 			解决办法: 全局修饰符g
 */
let reg = /\d+/;
console.log(reg.test(str))	// => true
console.log(reg.exec(str))	
// => Array [ 0:"111", index: 3, input:"aaa111bbb222ccc333", length: 1, 	<prototype>: Array[] ]

exec 捕获字符串的原理 ( 懒惰性 )

let str = 'aaa111bbb222ccc333'
let reg = /\d+/

/** 
 *	正则对象默认有个属性叫做lastIndex, 值为0:
 *		reg.lastIndex: 当前正则进行下一次捕捉字符的起始下标
 */
console.log(reg.lastIndex)		// => 0 正则匹配时从下标0处开始匹配
console.log(reg.exec(str));		// =>  Array [ 0:"111", index: 3,...]
console.log(reg.lastIndex);		// => 0 第一次捕获完成, 其值还是为0
console.log(reg.exec(str));		// =>  Array [ 0:"111", index: 3,...]
console.log(reg.lastIndex);		// => 0 第二次捕获完成, 其值还是为0
.......
console.log(reg.lastIndex);		// => 0 第n次捕获完成, 值还是为0
// 这就是为甚exec匹配到的字符永远是复合规则的第一托代码

修饰符g进行全局匹配

let str = 'aaa111bbb222ccc333'
let reg = /\d+/g;			// 加了一个修饰符g, 进行全局匹配, lastIndex会被修改

console.log(reg.exec(str));	// => Array [ 0:"111", index: 3,...]
console.log(reg.lastIndex);	// => 6
console.log(reg.exec(str));	// => Array [ 0:"222", index: 9,...]
console.log(reg.lastIndex);	// => 12
console.log(reg.exec(str));	// => Array [ 0:"222", index: 9,...]
console.log(reg.lastIndex);	// => 18, 到最末尾了
console.log(reg.exec(str));	// => null, 从18为开始匹配, 匹配失败, 结果为null
console.log(reg.lastIndex);	// => 0, lastIndex重新回归0
console.log(reg.exec(str));	// => Array [ 0:"111", index: 3,...]
console.log(reg.lastIndex);	// => 6
......  // => 一直循环

/******************错误的使用********************/
// 注意, 只要正则加上了g全局匹配修饰符, 正则的方法每调用一次都会改变reg.laseIndex
if(reg.test(str)) {
  // => 验证字符串时候和我们的字符串匹配
  console.log(reg.lastIndex)  // => 6, 此值被修改了, exec捕获的位置从6开始
  console.log(reg.exec(str))	// Array [ 0:"222", index: 9,...]
}
/***********************************************/

解决方法

捕获所有符合结果的字符串

// => 写一个方法, 正则能匹配所有的符合条件的方法(功能和str.match功能相同)
~ function () {
  function execAll(str) {
    if (!this.global) {
      return this.exec(str)
    }

    let val = this.exec(str),
        arr = [];

    while (val) {
      arr.push(val[0])
      val = this.exec(str);
    }

    return arr.length == 0 ? null : arr
  }
  RegExp.prototype.execAll = execAll;
}();

let str = 'aaa123bbb222ccc333';
let reg = /\s/g;
console.log(reg.execAll(str))


console.log('/************************/');
console.log(str.match(/\d+/g));		// => Array(3) [ "123", "222", "333" ]
console.log(str.match(/\d+/));		// => Array [ "123" ]

分组的三大作用

分组捕获

捕获单次

// => 身份证号匹配
let str = '22615819951204161X'
let reg = /^(\d{6})(\d{4})(\d{2})(\d{2})\d{2}(\d)(\d|X)$/;
console.log(reg.exec(str))
console.log(str.match(reg))
// Array(7) [ "226158199512041612", "226158", "1995", "12", "04", "1", "X" ]
// 第一项: 大正则匹配的结果 /^(\d{6})(\d{4})(\d{2})(\d{2})\d{2}(\d)(\d|X)$/
// 其余项: (\d{6}), (\d{4}), (\d{2}), (\d{2})

// 但是(\d|X)这一项我们不想要, 在()里面加上?:只匹配不捕获, 只用其优先级的作用
let reg = /^(\d{6})(\d{4})(\d{2})(\d{2})\d{2}(\d)(?:\d|X)$/;
console.log(str.match(reg))
// => // Array(7) [ "226158199512041612", "226158", "1995", "12", "04", "1" ]


// 如果分组捕获没有捕获到数据, 为defined
let re = /^(?:(\d+)|([a-zA-Z]+))$/;
let str = '123456';
console.log(str.match(re););		// => Array(3) [ "123456", "123456", undefined ]

捕获多次

// => 即想匹配到<<字母>>, 也想匹配到字母
let str = "<<aaa>>哈哈哈<<123>>!!!!<<bbb>><<ccc456>>";
let reg = /<<([a-zA-Z]+)>>/;

// 不设置g修饰符, 两者匹配的结果一致
console.log(reg.exec(str));
console.log(str.match(reg));
// Array [ "<<aaa>>", "aaa" ] Array [ "<<aaa>>", "aaa" ]
// 懒惰匹配第一个符合的, 分组匹配内容

// 设置g修饰符, match匹配所有, exec还是懒惰匹配(在带有()分组匹配的情况下)
let reg = /<<[a-zA-Z]+>>/g;
console.log(reg.exec(str));
console.log(str.match(reg));
// Array [ "<<aaa>>", "aaa" ]
// Array [ "<<aaa>>", "<<bbb>>" ]

/** 
 *	自己写一个捕获所有方法(大正则 + 分组匹配)
 */
let str = "<<aaa>>哈哈哈<<123>>!!!!<<bbb>><<ccc456>>";
let reg = /<<([a-zA-Z]+)>>/;
let bigArry = [],
    smallArry = [],
    val = reg.exec(str);
while(val) {
  let [big, small] = val;
  bigArry.push(big)
  smallArry.push(small)
  val = reg.exec(str)
}
console.log(bigArry, smallArry)
// Array [ "<<aaa>>", "<<bbb>>" ]
// Array [ "aaa", "bbb" ]

分组引用

// 如果想让前面一个字符和后一个或多个字符相同, 可以用()的第三个功能: 分组引用
// 分组引用: \1, 并且只能\1, 如果要多个:\1\1\1
let reg = /^[a-zA-Z]([0-2])\1\1[a-zA-Z]$/;

console.log(reg.test('a00b'));		// => false
console.log(reg.test('a111b'));		// => true
console.log(reg.test('a222b'));		// => true
console.log(reg.test('a333b'));		// => false
console.log(reg.test('a0011b'));	// => false
console.log(reg.test('a555b'));		// => false

正则捕获的贪婪性

let str = 'aaa111bbb222ccc';
let reg1 = /\d+/g;
let reg2 = /\d+?/g
let reg3 = /\d*?/g

// => 正则捕获的贪婪性: 在默认情况下, 正则都可能尽可能多的取匹配字符串
console.log(str.match(reg1))	// => [ '111', '222', '333' ]

// 加上 ? 之后, 正则会尽可能少的匹配字符串, +: 1~∞(1次) *:0~∞(0次)
console.log(str.match(reg2))	// => Array(6) [ "1", "1", "1", "2", "2", "2" ]
console.log(str.match(reg3))	// => Array(6) [ "", "", "", "", "", "", ... ]

问好的五大作用

? 左边是非量词符 ( 匹配字符0 | 1 次 )
? 左边是量词符 ( 取消贪婪匹配 )
? 左边是 (:?) ( 只匹配不捕获 )
? 左边是 (?=) ( 正向预查 )
? 左边是 (?!) ( 负向预查 )

其他正则的捕获方法

test捕获

let str = "<name><age><sex>"
let reg = /<([a-zA-Z]+)>/g;

console.log(reg.test(str));	// => true
console.log(RegExp.$1);		// => name

console.log(reg.test(str));	// => true
console.log(RegExp.$1);		// => age

console.log(reg.test(str));	// => true
console.log(RegExp.$1);		// => sex

console.log(reg.test(str));	// => false, 全部匹配结束, 开始新的循环
console.log(RegExp.$1);		// => sex

console.log(reg.test(str));	// => true
console.log(RegExp.$1);		// => name

// egExp.$1 ~ egExp.$9 : 获得当前本次正则匹配后, 第一个分组的信息 - 第九个分组的信息

replace 字符串中用来实现替换的方法 ( 一般伴随正则一起使用 )

let str = "今天天气真好, fuck, 尼玛的"

// => 把 fuck, 尼玛 这些敏感词替换成*
str = str.replace(/(fuck|尼玛)/g, '**')

console.log(str)
// => 今天天气真好, **, **的

replace的分组替换功能 ( replace的功能之一 )

let str = '2020-1-19 14:19 2020-1-19 14:19';
let reg = /(\d{4})-(\d{1,2})-(\d{2}) (\d{2}):(\d{2})/;	// 懒惰匹配, 只有一次
console.log(str.replace(reg, "$1年$2月$3日 $4时$5分"))
// => 2020年1月19日 14时19分 2020-1-19 14:19

let str = '2020-1-19 14:19 2020-1-19 14:19';
let reg = /(\d{4})-(\d{1,2})-(\d{2}) (\d{2}):(\d{2})/g;	// 全局匹配, 替换所有
console.log(str.replace(reg, "$1年$2月$3日 $4时$5分"))
// => 2020年1月19日 14时19分 2020年1月19日 14时19分

replace方法的使用:

/** 
 *		str.replace([RegExp], [function])
 *			1. replace方法每次将正则的规则匹配到字符串一次, 就执行一次函数
 *						let str = '123456';
 *						let reg = /\d/g;
 *						reg.replace(reg, () => { console.log(我被执行); return 11 })
 *			2. 函数默认接收大正则匹配的结果, $1 $2 $3小分组结果, 匹配下表,原始字符
 *			3. 函数的返回值就是把匹配的字符串替换的值
 */

// 1. 不进行分组( 传入函数中的值: 匹配字符串, 匹配开始下表, 原始字符串 )
str = '1 2 3 4 5 6';
reg = /\d/g;
str.replace(reg, (...args) => { 
  console.log(args)	// => Array(3) [ "1", 0, "1 2 3 4 5 6" ]
  return 哈哈;
})
// str => 哈哈 哈哈 哈哈 哈哈 哈哈 哈哈

// 2. 进行分组( 传入函数中的值: 匹配字符串, 分组字符, 匹配开始下表, 原始字符串 )
str = '1 2 3 4 5 6';
reg = /\d/g;
str.replace(reg, (...args) => { 
  console.log(args)	// => Array(4) [ "1", "1", 0, "1 2 3 4 5 6" ]
  return '嘻嘻';
})
// str => 嘻嘻 嘻嘻 嘻嘻 嘻嘻 嘻嘻 嘻嘻

// 案例: 进行事件的匹配, 并且进行补充 2019年1月20日 => 2019-01-20
let str = '2019年12月1日, 还是2019年2月4日';
let reg = /(\d{4})年(\d{1,2})月(\d{1,2})日/g;
str = str.replace(reg, (...args) => {
  console.log(args);
  let [big, year, month, day] = args;
  month = month.length == 1 ? '0' + month : month;
  day = day.length == 1 ? '0' + day : day;
  return `${year}-${month}-${day}`
})
console.log(str)
// => 2019-12-01, 还是2019-02-04