html转md: turndown.js

1,269 阅读1分钟

快速上手

安装

npm install turndown

使用

// For Node.js
var TurndownService = require('turndown')

var turndownService = new TurndownService()
var markdown = turndownService.turndown('<h1>Hello world!</h1>')

调整配置

var TurndownService = require('turndown')

const option = {
    headingStyle: 'atx',
    hr: '___',
    bulletListMarker: '-',
    codeBlockStyle: 'fenced',
    fence: '```',
    emDelimiter: '_',
    strongDelimiter: '**',
    linkStyle: 'inlined',
    linkReferenceStyle: 'full',
    preformattedCode: 'false',
}
const turndownService = new TurndownService(option)
var html = `
<h1>Hello world!</h1>
<ul>
    <li>one</li>
    <li>two</li>
    <li>three</li>
</ul>
`
var markdown = turndownService.turndown(html)
console.debug(markdown)
# Hello world!

-   one
-   two
-   three

自定义规则

可以通过addRule(key, rule)方法自定义规则, 默认的规则在源码的commonmark-rules.js文件里;

生成的md里, 列表-后面有三个空格, 希望只保留一个空格, 查看源码发现代码里写死了, 把整段代码复制出来, 添加为自定义规则:

turndownService.addRule("listItem", {
    filter: 'li',
    replacement: function (content, node, options) {
        content = content
            .replace(/^\n+/, '') // remove leading newlines
            .replace(/\n+$/, '\n') // replace trailing newlines with just a single one
            .replace(/\n/gm, '\n    ') // indent
        var prefix = options.bulletListMarker + ' ' //qxx
        var parent = node.parentNode
        if (parent.nodeName === 'OL') {
            var start = parent.getAttribute('start')
            var index = Array.prototype.indexOf.call(parent.children, node)
            prefix = (start ? Number(start) + index : index + 1) + '. ' //qxx
        }
        return (
            prefix + content + (node.nextSibling && !/\n$/.test(content) ? '\n' : '')
        )
    }
})

github地址

mixmark-io/turndown: 🛏 An HTML to Markdown converter written in JavaScript