自己的项目中使用正则表达式解析 HTML,为此实现了对字符串的正则分割,在这里记录一下。
// 定义一个结构体来承载分割后的结果
struct SplitedResult {
let fragment: String
let isMatched: Bool
let captures: [String?]
}
extension String {
/// 正则分割字符串
func split(
usingRegex pattern: String,
options: NSRegularExpression.Options = .dotMatchesLineSeparators
) -> [SplitedResult] {
do {
let regex = try NSRegularExpression(pattern: pattern, options: options)
let matches = regex.matches(in: self, options: [], range: NSRange(location: 0, length: utf16.count))
var currentIndex = startIndex
var range: Range<String.Index>
var captures: [String?] = []
var results: [SplitedResult] = []
for match in matches {
range = Range(match.range, in: self)!
if range.lowerBound > currentIndex {
results.append(SplitedResult(fragment: String(self[currentIndex..<range.lowerBound]), isMatched: false, captures: []))
}
if match.numberOfRanges > 1 {
for i in 1..<match.numberOfRanges {
if let _range = Range(match.range(at: i), in: self) {
captures.append(String(self[_range]))
} else {
captures.append(nil)
}
}
}
results.append(SplitedResult(fragment: String(self[range]), isMatched: true, captures: captures))
currentIndex = range.upperBound
captures.removeAll()
}
if endIndex > currentIndex {
results.append(SplitedResult(fragment: String(self[currentIndex..<endIndex]), isMatched: false, captures: []))
}
return results
} catch {
fatalError("正则表达式有误,请更正后再试!")
}
}
}