最近在Node端开发商品的站点地图服务,sku大概1000w,根据谷歌的标准,5000个sku是一个文件,所以生成约2000个文件,开发中考虑到服务器性能问题,分批次生成,频率为10s生成10个文件。 用到的Node端的包sitemap
用到的方法有两个
//生存地图文件
import { SitemapStream, SitemapIndexStream } from 'sitemap';
import { createWriteStream } from 'fs';
import moment from 'moment-timezone';
export async function createSitemap({
list,
pagePath,
hostname,
routerType = 'product',
}: {
list: any;
pagePath: string;
hostname: string;
routerType?: string;
}) {
const sitemap = new SitemapStream({ hostname, lastmodDateOnly: true });
const writeStream = createWriteStream(pagePath);
sitemap.pipe(writeStream);
for (let index = 0; index < list.length; index++) {
sitemap.write({
url: `/${routerType}/${getSeoSkuUrl(list[index].skuId, list[index].enName)}`,
changefreq: 'daily',
priority: 1.0,
lastmod: moment().format('YYYY-MM-DD'),
});
}
// sitemap.pipe(createGzip());
sitemap.end();
}
//生成索引文件
export async function sitemapIndex({
num,
routerType = 'product',
hostname,
pagePath,
}: {
num: number;
pagePath: string;
hostname: string;
routerType?: string;
}) {
const smis = new SitemapIndexStream();
const writeStream = createWriteStream(pagePath);
smis.pipe(writeStream);
smis.write({ url: hostname });
for (let index = 1; index <= num; index++) {
smis.write({
url: `${hostname}/sitemap-${routerType}s-${index}.xml`,
changefreq: 'daily',
lastmod: moment().format('YYYY-MM-DD'),
});
}
smis.end();
}
/**
*
* @param skuId
* @param skuName
* @returns
*/
function getSeoSkuUrl(skuId: string, skuName: string,) {
try {
skuName = skuName.replace(new RegExp('(\\++)|(/+)|(\\s+)', "g"), '_');
const p = new RegExp(/(([0-9]+.{1}[0-9a-zA-Z]+)|([0-9a-zA-Z]+)|(\\s))/, "g");
const list: Array<string> = [];
let result = skuName.match(p);
do {
result = p.exec(skuName)
result && list.push(result[1]);
} while (result)
return list.join('-') +"_" + skuId + ".html";
} catch (err) {
console.log(err.stack)
}
}
这个包生成索引文件SitemapIndexStream的时候日期格式不能设置,介意的可以考虑其他的包。