替换html标签,保留空格,之前有这样的一个需求,通过百度找到一个方法,很好用,记录下来。
/**
* 替换html标签
* @param html
* @return
*/
public static String convert(String html)
{
if (StringUtils.isEmpty(html))
{
return "";
}
Document document = Jsoup.parse(html);
Document.OutputSettings outputSettings = new Document.OutputSettings().prettyPrint(false);
document.outputSettings(outputSettings);
document.select("br").append("\\n");
document.select("p").prepend("\\n");
document.select("p").append("\\n");
String newHtml = document.html().replaceAll("\\\\n", "\n");
String plainText = Jsoup.clean(newHtml, "", Whitelist.none(), outputSettings);
String result = StringEscapeUtils.unescapeHtml(plainText.trim());
return result;
}
导入两个jar包
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.11.3</version>
</dependency>
<dependency>
<groupId>commons-lang</groupId>
<artifactId>commons-lang</artifactId>
<version>2.6</version>
</dependency>
</dependencies>