Path-to-RegExp 模块学习笔记

一个对URL进行解析的模块，支持正则表达式验证、匹配、反向解析，可用于增加 koa 框架对于 koa-router 对 path 的解析工作。

包安装

npm install path-to-regexp --save

加载方法

const {pathToRegexp, match, parse, compile } = require('path-to-regexp');

// pathToRegexp(path, keys?, options?)
// match(path)
// parse(path)
// compile(path)

// 也可以直接加载包:
const PathToRegexp = require('path-to-regexp');

pathToRegexp 方法

创建一个正则表达式匹配对象

const PathToRegexp = require('path-to-regexp');

const keys = [];

const regexp = pathToRegexp('/foo/:bar', keys);

console.log(regexp);  //    /^/foo(?:/([^/#?]+?))[/#?]?$/i

console.log(keys)
/*
[
  {
    name: 'bar',
    prefix: '/',
    suffix: '',
    pattern: '[^\\/#\\?]+?',
    modifier: ''
  }
]
*/

创建的正则表达式:  
/^/foo(?:/([^/#?]+?))[/#?]?$/i
1、/.../i   不区分大小写搜索。
2、^...$    从字符串开头匹配到字符串结束
3、/foo(?:...)[/#?]?  匹配 /foo 后面是(?:...) 非捕获组  然后匹配 [/#?]? /、#、? 出现 或 出现任何一个
4、\/(...) 非捕获组中的内容  \/()  表示匹配 特殊字符 /  后面是个捕获组
5、捕获组中的内容：[^/#?]+?  非 /、#、? 的字符  +?非贪婪匹配 
创建的数组:
[
  {
    name: 'bar',   捕获组名称
    prefix: '/',   前缀
    suffix: '',    后缀
    pattern: '[^\\/#\\?]+?',   匹配表达式   [^\/#\?]+?
    modifier: ''   修饰符
  }
]

pathToRegexp 函数创建了一个对 url /foo/:bar 的正则表达式匹配对象, 并对使用 :冒号标识的URL命名参数创建了一个命名参数对象 添加到 keys(参数2) 数组中。

由上例可知：

path-to-regexp 函数所创建的正则表达式，功能仅能匹配一个 url路径、包含主机名称的url路径。其中的 \/规则、 [^/#?] 及匹配规则，都是为解析url路径所定制功能，不能匹配任意的 非url字符串或 json。

使用 path-ro-regexp 函数创建对象时，如果要匹配?key1=value&key2=value... GET 请求参数，需要对?进行转义处理。

执行URL匹配测试

const {pathToRegexp, match, parse, compile } = require('path-to-regexp');

const keys = [];

const regexp = pathToRegexp('/foo/:bar', keys);

console.log(regexp.exec('/foo/test_a'));
/*
[
  '/foo/test_a',      匹配的字符串
  'test_a',           捕获组中子匹配
  index: 0,           匹配位置
  input: '/foo/test_a',   进行匹配的字符串
  groups: undefined   正则表达式中的 捕获组 是个非命名捕获组所以没有 groups 对象
]
 */

注意： 命名URL参数 名称仅能使用单词字符，也就是[A-Za-z0-9_].

自定义匹配参数

命名URL参数可以有一个自定义的正则表达式，它会覆盖默认匹配:([^\/#\?]+?)：

const {pathToRegexp, match, parse, compile } = require('path-to-regexp');
const keys = [];
const regexp_number = pathToRegexp('/icon-:foo(\\d+).png', keys);
// 注意：字符串表示正则字符: \d 必须对 \ 进行转义: \\d 
console.log(regexp_number);    // /^/icon-(\d+).png[/#?]?$/i
console.log(keys);
/*
[
  {
    name: 'foo',
    prefix: '',
    suffix: '',
    pattern: '\\d+',    // 默认的 [^\\/#\\?]+? 被替换掉了
    modifier: ''
  }
]
 */

javascript 中 \符号具有特殊意义，所以在字符串中添加\d必须对\进行转义: \\d

测试匹配:

const {pathToRegexp, match, parse, compile } = require('path-to-regexp');
const keys = [];
const regexp_number = pathToRegexp('/icon-:foo(\d+).png', keys);

console.log(regexp_number.exec('/icon-123.png'));
/*
[
  '/icon-123.png',
  '123',    // 子组捕获 123  匹配 \d+
  index: 0,
  input: '/icon-123.png',
  groups: undefined
]
 */
console.log(regexp_number.exec('/icon-abc.png')); // abc 不匹配 \d+ 所以返回 null
// null

匹配字符串的例子:

const {pathToRegexp, match, parse, compile } = require('path-to-regexp');
const keys = [];
const regexp_word = pathToRegexp('/(user|u)', keys);
console.log(regexp_word.exec('/u'));    // [ '/u', 'u', index: 0, input: '/u', groups: undefined ]
console.log(regexp_word.exec('/user')); // [ '/user', 'user', index: 0, input: '/user', groups: undefined ]

自定义匹配参数添加修饰符

自定义参数的重复，可以添加正则表达式量词修饰符:

例如匹配: http://127.0.0.1/test/test/test/test   中的 test
URL中多个参数的重复

const {pathToRegexp, match, parse, compile } = require('path-to-regexp');
const keys = [];

const repeat_param1 = pathToRegexp('/:num(\d)?');
const repeat_param2 = pathToRegexp('/:num(\d)*');
const repeat_param3 = pathToRegexp('/:num(\d)+');
console.log(repeat_param1);                 // /^(?:/(\d))?[/#?]?$/i
console.log(repeat_param2);                 // /^(?:/((?:\d)(?:/(?:\d))*))?[/#?]?$/i
console.log(repeat_param3);                 // /^(?:/((?:\d)(?:/(?:\d))*))[/#?]?$/i
const url_0 = '/';
const url_1 = '/1';
const url_2 = '/1/2';
const url_3 = '/1/2/3/4/5';
console.log(repeat_param1.exec(url_0));    // [ '/', undefined, index: 0, input: '/', groups: undefined ]
console.log(repeat_param1.exec(url_1));    // [ '/1', '1', index: 0, input: '/1', groups: undefined ]
console.log(repeat_param1.exec(url_2));    // null
console.log(repeat_param1.exec(url_3));    // null

console.log(repeat_param2.exec(url_0));    // [ '/', undefined, index: 0, input: '/', groups: undefined ]
console.log(repeat_param2.exec(url_1));    // [ '/1', '1', index: 0, input: '/1', groups: undefined ]
console.log(repeat_param2.exec(url_2));    // [ '/1/2', '1/2', index: 0, input: '/1/2', groups: undefined ]
console.log(repeat_param2.exec(url_3));    // ['/1/2/3/4/5', '1/2/3/4/5', index: 0, input: '/1/2/3/4/5', groups: undefined]

console.log(repeat_param3.exec(url_0));    // null
console.log(repeat_param3.exec(url_1));    // [ '/1', '1', index: 0, input: '/1', groups: undefined ]
console.log(repeat_param3.exec(url_2));    // [ '/1/2', '1/2', index: 0, input: '/1/2', groups: undefined ]
console.log(repeat_param3.exec(url_3));    // ['/1/2/3/4/5', '1/2/3/4/5', index: 0, input: '/1/2/3/4/5', groups: undefined]

给自定义匹配参数添加修饰符 可以使 URL的匹配变的更加灵活，除添加修饰符?外,还支持 * 表示参数可以出现零次或多次, +表示参数至少出现一次或多次。

自定义匹配参数前缀、后缀

使用 {} 包装参数，可以创建自定义的 前缀、后缀，假设有以下需求:

http://127.0.0.1/image/1000.png         请求正常尺寸图像
http://127.0.0.1/image/1000.png@small   请求小尺寸图像
http://127.0.0.1/image/1000.png@large   请求大尺寸图像

定义: /image/:id(\d+).png@:size(small|large)? 将导致 @符产生硬性匹配:

const {pathToRegexp, match, parse, compile } = require('path-to-regexp');
const keys = [];

const image_regexp = pathToRegexp('/image/:id(\d+).png@:size(small|large)?', keys);
console.log(image_regexp);  // /^\/image(?:\/(\d+))\.png@(small|large)?[\/#\?]?$/i
console.log(keys);
// [
//     {
//         name: 'id',
//         prefix: '/',
//         suffix: '',
//         pattern: '\d+',
//         modifier: ''
//     },
//     {
//         name: 'size',
//         prefix: '',
//         suffix: '',
//         pattern: 'small|large',
//         modifier: '?'
//     }
// ]
console.log(image_regexp.exec('/image/1000.png'));  // null
console.log(image_regexp.exec('/image/1000.png@small'));
// [
//     '/image/1000.png@small',
//     '1000',
//     'small',
//     index: 0,
//     input: '/image/1000.png@small',
//     groups: undefined
// ]
console.log(image_regexp.exec('/image/1000.png@large'));
// [
//     '/image/1000.png@large',
//     '1000',
//     'large',
//     index: 0,
//     input: '/image/1000.png@large',
//     groups: undefined
// ]

为自定义参数添加前缀:

const {pathToRegexp, match, parse, compile } = require('path-to-regexp');
const keys = [];

const image_regexp = pathToRegexp('/image/:id(\d+).png{@:size(small|large)}?', keys);
console.log(image_regexp);  // /^/image(?:/(\d+)).png(?:@(small|large))?[/#?]?$/i
// 重点在: (?:@(small|large))? 将 @:size(small|large) 放在一个非捕获组中，并添加 修饰符 ?
// 可出现，可不出现
console.log(keys);
// [
//     {
//         name: 'id',
//         prefix: '/',
//         suffix: '',
//         pattern: '\d+',
//         modifier: ''
//     },
//     {
//         name: 'size',
//         pattern: 'small|large',
//         prefix: '@',  // 子捕获组添加前缀
//         suffix: '',
//         modifier: '?'  
//     }
// ]
//     [
//     '/image/1000.png',
//         '1000',
//         undefined,
//         index: 0,
//     input: '/image/1000.png',
//     groups: undefined
// ]
// [
//     '/image/1000.png@small',
//     '1000',
//     'small',
//     index: 0,
//     input: '/image/1000.png@small',
//     groups: undefined
// ]
// [
//     '/image/1000.png@large',
//     '1000',
//     'large',
//     index: 0,
//     input: '/image/1000.png@large',
//     groups: undefined
// ]
console.log(image_regexp.exec('/image/1000.png'));   // 成功匹配
// [
//     '/image/1000.png',
//     '1000',
//     undefined,
//     index: 0,
//     input: '/image/1000.png',
//     groups: undefined
// ]
console.log(image_regexp.exec('/image/1000.png@small'));
// [
//     '/image/1000.png@small',
//     '1000',
//     'small',   // 子捕获组 匹配正确
//     index: 0,
//     input: '/image/1000.png@small',
//     groups: undefined
// ]
console.log(image_regexp.exec('/image/1000.png@large'));
// [
//     '/image/1000.png@large',
//     '1000',
//     'large',
//     index: 0,
//     input: '/image/1000.png@large',
//     groups: undefined
// ]

处理查询字符串时，如果要包含GET请求参数 ? 需要对其进行转义处理:

如下例，请求中带有GET发送的code码:
http://127.0.0.1/email-activation?code=EAD5-DER4-EFZ2-21DE-565A  
code格式:

const {pathToRegexp, match, parse, compile } = require('path-to-regexp');
const keys = [];

const url = '/email-activation?code=EAD5-DER4-EFZ2-21DE-565A';
const code_regexp1 = '(?:[A-Z0-9]{4}-){4}[A-Z0-9]{4}';
const email_activation_regexp = pathToRegexp(`/email-activation{\?code=:code(${code_regexp1})}?`, keys);
// url中出现?匹配 必须对其进行转义
console.log(email_activation_regexp); // /^/email-activation?code=((?:[A-Z0-9]{4}-){4,5}[A-Z0-9]{4})[/#?]?$/i
console.log(keys);
// [
//     {
//         name: 'code',
//         prefix: '',
//         suffix: '',
//         pattern: '(?:[A-Z0-9]{4}-){4}[A-Z0-9]{4}', // 子捕获组中不允许再次使用 捕获组，只能使用非捕获组
//         modifier: ''
//     }
// ]
console.log(email_activation_regexp.exec(url));
// [
//     '/email-activation?code=EAD5-DER4-EFZ2-21DE-565A',
//     'EAD5-DER4-EFZ2-21DE-565A',
//     index: 0,
//     input: '/email-activation?code=EAD5-DER4-EFZ2-21DE-565A',
//     groups: undefined
// ]
//
console.log(email_activation_regexp.exec('/email-activation'));
// [
//     '/email-activation',
//     undefined,
//     index: 0,
//     input: '/email-activation',
//     groups: undefined
// ]

注意: 在自定义匹配参数的捕获组中不允许再次出现()捕获组，应使用(?:)非捕获组添加量词修饰符。

未命名参数

可以编写仅由正则表达式组成的未命名参数。它与命名参数的工作方式相同，除了它将被数字索引：

const {pathToRegexp, match, parse, compile } = require('path-to-regexp');
const keys = [];
const regexp = pathToRegexp("/:foo/(.*)", keys);

console.log(regexp);   // /^(?:/([^/#?]+?))(?:/(.*))[/#?]?$/i
console.log(keys);
// [
//     {
//         name: 'foo',
//         prefix: '/',
//         suffix: '',
//         pattern: '[^\/#\?]+?',
//         modifier: ''
//     },
//     { name: 0, prefix: '/', suffix: '', pattern: '.*', modifier: '' }  
//     非命名捕获组所捕获的对象 name 为数字索引
// ]
console.log(regexp.exec("/test/route"));
// [
//     '/test/route',
//     'test',
//     'route',
//     index: 0,
//     input: '/test/route',
//     groups: undefined
// ]

正则对象可选项

path 字符串、字符串数组或正则表达式
keys 用路径中找到的键填充的数组。
options 配置选项
- sensitive 正则表达式是否区分大小写默认为 false
- strict 正则表达式匹配时是否使用默认分隔符 false
- end 正则表达式将匹配到字符串的未尾默认为 true
- start 正则表达式将从字符串开头开始匹配默认为 true
- delimiter 定义URL段的默认分隔符，默认为 [/#?]
- endsWith 定义URL结尾断言
- encode 一个函数，用于在插入之前对RegExp字符串进行编码默认为 `x => x
- prefixes 解析时自动考虑前缀的字符列表。默认为 ./ 默认的标志位配置:

const keys = [];
const regexp = pathToRegexp("/:foo/(.*)");
console.log(regexp1);
console.log(keys);

/^(?:\/([^\/#\?]+?))(?:\/(.*))[\/#\?]?$/i    <<< 存在 i 标志位 默认不区分大小写匹配
[
  {
    name: 'foo',
    prefix: '/',
    suffix: '',
    pattern: '[^\\/#\\?]+?',
    modifier: ''
  },
  { name: 0, prefix: '/', suffix: '', pattern: '.*', modifier: '' }
]

sensitive 正则是否区分大小写:

const {pathToRegexp, match, parse, compile } = require('path-to-regexp');
const keys = [];
const options = {
    sensitive: true
}
const regexp = pathToRegexp("/:foo/(.*)", keys, options);

console.log(regexp);
console.log(keys);

/^(?:\/([^\/#\?]+?))(?:\/(.*))[\/#\?]?$/   << 默认的标志位 i 不见了
[
  {
    name: 'foo',
    prefix: '/',
    suffix: '',
    pattern: '[^\\/#\\?]+?',
    modifier: ''
  },
  { name: 0, prefix: '/', suffix: '', pattern: '.*', modifier: '' }
]

strict 正则表达式匹配时是否使用自定义分段分隔符:

const {pathToRegexp, match, parse, compile } = require('path-to-regexp');
const keys = [];
const options = {
    strict: true
}
const regexp = pathToRegexp("/:foo/(.*)", keys, options);

console.log(regexp);
console.log(keys);

/^(?:\/([^\/#\?]+?))(?:\/(.*))$/i   << 默认的尾分隔符是 [\/#\?]?
[
  {
    name: 'foo',
    prefix: '/',
    suffix: '',
    pattern: '[^\\/#\\?]+?',
    modifier: ''
  },
  { name: 0, prefix: '/', suffix: '', pattern: '.*', modifier: '' }
]

end 正则表达式将匹配到字符串的未尾：

const {pathToRegexp, match, parse, compile } = require('path-to-regexp');
const keys = [];
const options = {
    end: false
}
const regexp = pathToRegexp("/:foo/(.*)", keys, options);

console.log(regexp);
console.log(keys);

/^(?:\/([^\/#\?]+?))(?:\/(.*))(?:[\/#\?](?=[]|$))?(?=[\/#\?]|[]|$)/i   <<< 结尾的$/i 中的 $ 去掉了
[
  {
    name: 'foo',
    prefix: '/',
    suffix: '',
    pattern: '[^\\/#\\?]+?',
    modifier: ''
  },
  { name: 0, prefix: '/', suffix: '', pattern: '.*', modifier: '' }
]

start 正则表达式将从字符串开头开始匹配 :

const {pathToRegexp, match, parse, compile } = require('path-to-regexp');
const keys = [];
const options = {
    start: false
}
const regexp = pathToRegexp("/:foo/(.*)", keys, options);

console.log(regexp);
console.log(keys);

/(?:\/([^\/#\?]+?))(?:\/(.*))[\/#\?]?$/i   <<< 开头部分的 /^  中的 ^ 被去掉了
[
  {
    name: 'foo',
    prefix: '/',
    suffix: '',
    pattern: '[^\\/#\\?]+?',
    modifier: ''
  },
  { name: 0, prefix: '/', suffix: '', pattern: '.*', modifier: '' }
]

delimiter 定义URL段的默认分隔符:

const {pathToRegexp, match, parse, compile } = require('path-to-regexp');
const keys = [];
const options = {
    delimiter: '[@%]'
}
const regexp = pathToRegexp("/:foo/(.*)", keys, options);

console.log(regexp);
console.log(keys);

/^(?:\/([^\[@%\]]+?))(?:\/(.*))[\[@%\]]?$/i  
[
  {
    name: 'foo',
    prefix: '/',
    suffix: '',
    pattern: '[^\\[@%\\]]+?',
    modifier: ''
  },
  { name: 0, prefix: '/', suffix: '', pattern: '.*', modifier: '' }
]
影响三处位置:
1、捕获组中的 非 @% 分隔符
2、捕获组外分隔 @% 匹配  
3、默认子组中的正则匹配表达式

endsWith 定义URL尾部断言:

const {pathToRegexp, match, parse, compile } = require('path-to-regexp');
const keys = [];
const options = {
    endsWith: '@%'
}
const regexp = pathToRegexp("/:foo/(.*)", keys, options);

console.log(regexp);
console.log(keys);

/^(?:\/([^\/#\?]+?))(?:\/(.*))[\/#\?]?(?=[@%]|$)/i << 添加先行断言 (?=[@%]) 尾部匹配/、@、?且后面必须紧跟 @、% 符号 或者直接$ 结尾。
[
  {
    name: 'foo',
    prefix: '/',
    suffix: '',
    pattern: '[^\\/#\\?]+?',
    modifier: ''
  },
  { name: 0, prefix: '/', suffix: '', pattern: '.*', modifier: '' }
]

encode 一个函数，用于在插入之前对RegExp字符串进行编码：

const {pathToRegexp, match, parse, compile } = require('path-to-regexp');
const keys = [];
const options = {
    encode: x => {console.log(x, '只有硬性的匹配字符可以被函数编码'); return x}
}
const regexp = pathToRegexp("/test/:foo", keys, options);

console.log(regexp);
console.log(keys);

/test 只有硬性的匹配字符可以被函数编码
/ 只有硬性的匹配字符可以被函数编码
 只有硬性的匹配字符可以被函数编码   <<< 自定义URL参数 均被忽略为 空 字符串
/^\/test(?:\/([^\/#\?]+?))[\/#\?]?$/i
[
  {
    name: 'foo',
    prefix: '/',
    suffix: '',
    pattern: '[^\\/#\\?]+?',
    modifier: ''
  }
]

prefixes 解析时自动考虑前缀的字符列表:

const {pathToRegexp, match, parse, compile } = require('path-to-regexp');
const keys = [];
const options = {
    prefixes: '\'
}
const regexp = pathToRegexp("/:foo/(.*)", keys, options);

console.log(regexp);
console.log(keys);

/^\/([^\/#\?]+?)\/(.*)[\/#\?]?$/i   <<< 无论设置什么字符串，最终都是把  每段URL中的匹配 (?:\/(...)) 外的 (?:) 非捕获组去掉
[
  {
    name: 'foo',
    prefix: '',
    suffix: '',
    pattern: '[^\\/#\\?]+?',
    modifier: ''
  },
  { name: 0, prefix: '', suffix: '', pattern: '.*', modifier: '' }
]

URL中的默认分隔符就是/ 不能修改，修改只是去掉了非命名捕获组。

parse 解析函数

parse函数将从路径字符串返回字符串和键的列表:

const {pathToRegexp, match, parse, compile } = require('path-to-regexp');

const tokens = parse("/route/:foo/(.*)");

console.log(tokens[0]);   
//=> "/route"   
// 硬性匹配字符 /route

console.log(tokens[1]);
//=> { name: 'foo', prefix: '/', suffix: '', pattern: '[^\/#\?]+?', modifier: '' }
// 自定义URL参数 foo
console.log(tokens[2]);
//=> { name: 0, prefix: '/', suffix: '', pattern: '.*', modifier: '' }
// 未命名URL参数

Compile(反向解析 pathToRegExp 对象)

该compile函数将返回一个用于将参数转换为有效路径的函数.

encode函数的反向解析

const {pathToRegexp, match, parse, compile } = require('path-to-regexp');

// 确保对路径段进行一致的编码。
const  toPath  =  compile ( "/user/:id" ,  {  encode : encodeURIComponent  } );
// 传递参数为一个字典，字典中的字符 通过 options 中的 encode 进行转码，最终合成 url。
console.log(toPath({id: '123'}));
// "/user/123"
console.log(encodeURIComponent('123'));
console.log(toPath({id: 'café'}));
// "/user/caf%C3%A9"
console.log(encodeURIComponent('café'));
// caf%C3%A9
console.log(toPath({id: '/'}));
// "/user/%2F"
console.log(encodeURIComponent('/'));
// %2F
console.log(toPath({id: ':/'}));
// "/user/%3A%2F"
console.log(encodeURIComponent(':/'));
// %3A%2F
// 如果没有 `encode`，您需要确保输入正确编码。
const  toPathRaw  =  compile ( "/user/:id" ) ;
// 没有任何转码函数的 pathToRegexp 对象, 直接设置转码后的字符串
toPathRaw ({id : "%3A%2F"}) ;
//  "/user/%3A%2F"

量词匹配的反向解析

const {pathToRegexp, match, parse, compile } = require('path-to-regexp');
const toPathRepeated = compile("/:segment+");
// 正则表达式中涉及多次匹配
console.log(toPathRepeated({segment: 'foo'}));
// /foo
console.log(toPathRepeated({segment: ['a', 'b', 'c']}));
// /a/b/c
// 对正则表达式匹配多次的，可以通过数组传递参数

反向解析中的正则验证

const toPathRegexp = compile("/user/:id(\d+)");
// 传递不正确的匹配:
console.log(toPathRegexp({id: 123}));
// /user/123
// 传递类型非字符串,自动调用 toString 转码后匹配

console.log(toPathRegexp({id: "123"}));
// /user/123

// console.log(toPathRegexp({id: 'abc'}));   // 报错

const toPathRegexpValidate = compile("/user/:id(\d+)", {validate: false});
// 不进行正则验证的反向解析解析
console.log(toPathRegexpValidate({id: 'abc'}));   // 报错
// 'abc' 并不匹配 :id(\d+),但也被解析了.

Path-to-RegExp