规则为:
1.标题不能嵌套;
2.标题id生成规则为:h[1-5]Id_*;
const htmlCon = '<header><h2 id="h2Id_2222">st<strong>2222</strong></h2><p></p><p></p><p></p><p></p><h3 id="h3Id_3333"><strong>3333</strong></h3><h4 id="h4Id_444444"><strong>444444</strong></h4><h3 id="h3Id_35333"><strong>35333</strong></h3><h4 id="h4Id_4444444"><strong>444444</strong></h4><h5 id="h5Id_5555"><strong>5555</strong></h5><h6 id="h6Id6666"><strong>6666</strong></h6><h1 id="h1Id_111112">11111</h1><h2 id="h2Id_111113">11111</h2><h1 id="h1Id_111113">11111</h1><p></p><p></p><p><strong><br /><br /><br /><br /></strong></p><p></p>';
console.log('htmlCon', htmlCon);
/**
* 获取当前标题索引值
* @param id 匹配的id
*/
function getTitleIndex(id: string) {
let index = 0;
if (id.indexOf('h1Id') > -1) {
index = 1;
}
if (id.indexOf('h2Id') > -1) {
index = 2;
}
if (id.indexOf('h3Id') > -1) {
index = 3;
}
if (id.indexOf('h4Id') > -1) {
index = 4;
}
if (id.indexOf('h5Id') > -1) {
index = 5;
}
return index;
}
/**
* 获取当前id的父id
* @param data 遍历的数组
* @param id 匹配的id
*/
function getPId(data: Array<any>, id: string) {
let pId = '';
if (!data.length) {
return pId;
}
// 当前查询id索引值
const idIndex = getTitleIndex(id);
// 索引值大id索引值的数组
const greaterData = data.filter(item => item.index < idIndex);
// 取最靠近的值即为父节点id
if (greaterData.length) {
pId = greaterData[greaterData.length - 1].id;
}
return pId;
}
/**
*
* @param htmlContent html文本内容
* @param id 标题id
* @param hIndex 标题索引值,第几级标题
*/
function getTitle(htmlContent: string, id: string, hIndex: number) {
const reg = new RegExp(`<h${hIndex} id="${id}"(.*?)<\\/h${hIndex}>`, 'gi');
const arr = htmlContent.match(reg);
if (!arr) {
return '';
} else {
const htmlObj = arr[0];
let str = htmlObj.replace(/<[^>]*>(([^<])*)/g, function () {
return arguments[1];
});
return str;
}
}
/**
* 获取标题导航栏
* @param htmlContent html字符串
*/
function getTitleTrees(htmlContent: string) {
//匹配h1-h5(g表示匹配所有结果i表示区分大小写)
const hReg = /<h[1-5].*?(?:>|\/>)/gi;
//匹配id属性正则
const idReg = /id=['"]?([^'"]*)['"]?/i;
// 正则匹配到的标题数组
const hArr = htmlContent.match(hReg);
// json标题数组
const hSrc = [];
if (hArr) {
for (let i = 0; i < hArr.length; i++) {
const obj = hArr[i];
const id = obj.match(idReg);
if (id && id[1]) {
const currentId = id[1];
const hIndex = getTitleIndex(currentId);
const pId = getPId(hSrc, currentId);
const hTitle = getTitle(htmlContent, currentId, hIndex);
hSrc.push({
pId: pId,
id: currentId,
index: hIndex,
value: hTitle
});
}
}
}
return hSrc;
}
const titleData = getTitleTrees(htmlCon);
console.log('titleData', titleData);