js以.|…|!|?|;切分一段英文,但是不切分英文中的小数,并且像A.M.和P.M.这样的特殊字符也不进行切分
<template>
<div>
<Input v-model="value" type="textarea" placeholder="Enter something..." />
<Button type="primary" @click="handle">文字切分</Button>
<Input v-model="value1" />
<Input v-model="value2" type="textarea" />
<Input v-model="value3" type="textarea" />
<Input v-model="value4" type="textarea" />
<Input v-model="value5" type="textarea" />
</div>
</template>
<script>
export default {
data() {
return {
value: `
2.3 I have 2.3 中文20.3aaaa[~amam~] apple.
I have A.M.
A.M. pen;
en!
china....
apple pen.
`,
value1: '',
value2: '',
value3: '',
value4: '',
value5: '',
mapObj: {
'P.E.': '[~PEPE~]',
'A.M.': '[~AMAM~]',
'P.M.': '[~PMPM~]',
'a.m.': '[~amam~]',
'p.m.': '[~pmpm~]',
'U.K.': '[~UKUK~]',
'U.S.': '[~USUS~]',
'No.': '[~NoNo~]',
}
}
},
methods: {
// handleSplit() {
// const { value } = this
// // const arr = value.trim().split(/[.!?]/)
// // const arr = value.trim().split(/\.|\.{3}|\!|\?/)
// // const arr = value.trim().split(/[.|…|!|?|]+/) // *
// const arr = value.trim().split(/<(A\.M\.)[.|…|!|?|]+/)
// const result = arr.reduce((list, n) => {
// n && list.push(n.trim())
// return list
// }, [])
// // console.log(arr, result)
// result.forEach((item, index) => {
// this[`value${index + 1}`] = item
// })
// },
handle() {
const newValue = this.getNewValue()
// const arr = newValue.split(/[.|…|!|?|;]+/)
const arr = this.splitString(newValue)
const result = arr.reduce((list, n) => {
n && list.push(n.trim())
return list
}, [])
result.forEach((item, index) => {
const arr = item.split(' ')
const newItem = arr.map(n => {
const key = this.getKeyByValue(this.mapObj, n)
return key || n
}).join(' ')
this[`value${index + 1}`] = newItem
})
},
// 使用 .|…|!|?|; 切分句子,但是不切分小数
splitString(str) {
// 匹配所有英文句子中的小数,例如 3.14 或者 123.456
const regex = /[a-zA-Z]+\.\d+|\d+\.[a-zA-Z]+|\d+\.\d+/g;
// 将所有小数替换为占位符
const placeholders = []
let match
while (match = regex.exec(str)) {
placeholders.push(match[0])
}
const replacedStr = str.replace(regex, (match) => {
placeholders.push(match);
return `[${placeholders.length - 1}]`
})
// 使用 ".|…|!|?|;" 进行切分,但是排除所有占位符
const parts = replacedStr.split(/[.|…|!|?|;]+/).map(part => {
return placeholders.reduce((prev, placeholder, index) => {
return prev.replace(`[${index}]`, placeholder)
}, part)
})
return parts
},
// 将字符串中的A.M.转为[~AMAM~]
getNewValue() {
const { value, mapObj } = this
const arr = value.trim().split(' ')
const keys = Object.keys(mapObj)
const newArr = arr.map(n => keys.includes(n) ? mapObj[n] : n)
return newArr.join(' ')
},
// 根据对象的值找到对应的键,将[~AMAM~]再转回A.M.
getKeyByValue(object, value) {
return Object.keys(object).find(key => object[key] === value)
}
},
created() {
this.handle()
},
components: {}
}
</script>
<style lang="less" >
.ivu-input-wrapper {
textarea.ivu-input {
height: 100px;
}
&:first-of-type textarea.ivu-input {
height: 500px;
}
}
</style>
将一段字符串进行切割,需要满足以下要求:
- 切分点:英文的. ? ! ; ...
- 遇到这几种特殊字符,不进行切分①P.E. ②A.M. ③P.M. ④a.m. ⑤p.m. ⑥U.K. ⑦U.S. ⑧No.
- 遇到小数,也不进行切分
- 切分后的结果需要把标点符号带上
思路:将'. '转为'. ## ',然后用##进行切割,这样就可以保留.了,并且小数的.也可以保留。在切分前需要将A.M.转为[AMAM]这种特殊字符,切分后再转回来
<template>
<div>
<Input v-model="value" type="textarea" placeholder="Enter something..." />
<Button type="primary" @click="handle">文字切分</Button>
<Input v-for="(item,index) of textareaList" :value="item" :key="index" type="textarea" />
</div>
</template>
<script>
export default {
data() {
return {
value: `
2.3 I have 2.3 中文20.3aaaa[~amam~] apple.
I have A.M.
A.M. pen;
en!
china....
apple pen.
`,
// value: `I have A.M. apple.
// I have pen;
// I have orange?
// en!
// china....
// apple pen.`,
textareaList: [],
mapObj: {
'P.E.': '[~PEPE~]',
'A.M.': '[~AMAM~]',
'P.M.': '[~PMPM~]',
'a.m.': '[~amam~]',
'p.m.': '[~pmpm~]',
'U.K.': '[~UKUK~]',
'U.S.': '[~USUS~]',
'No.': '[~NoNo~]',
},
splitDot: '[~splitDot~]' // 切割点
}
},
methods: {
handle() {
const { splitDot, mapObj, getNewValue, getKeyByValue } = this
// const newValue = getNewValue().replace(/\.\s/g, `.${splitDot} `).replace(/\?\s/g, `?${splitDot} `).replace(/\!\s/g, `!${splitDot} `).replace(/\;\s/g, `;${splitDot} `)
const newValue = getNewValue(value).replace(/([.?!;])\s/g, `$1${splitDot} `)
const arr = newValue.split(splitDot)
const result = arr.reduce((list, item) => {
console.log(item)
const itemArr = item.split(' ')
const newItem = itemArr.map(n => {
const key = getKeyByValue(mapObj, n)
return key || n
})
newItem && list.push(newItem.join(' ').trim())
return list
}, [])
console.log(result)
this.textareaList = result
},
// 将字符串中的A.M.转为[~AMAM~]
getNewValue() {
const { value, mapObj } = this
const arr = value.trim().split(' ')
const keys = Object.keys(mapObj)
const newArr = arr.map(n => keys.includes(n) ? mapObj[n] : n)
return newArr.join(' ')
},
// 根据对象的值找到对应的键,将[~AMAM~]再转回A.M.
getKeyByValue(object, value) {
return Object.keys(object).find(key => object[key] === value)
}
},
created() {
this.handle()
},
components: {}
}
</script>
<style lang="less" >
.ivu-input-wrapper {
textarea.ivu-input {
height: 100px;
}
&:first-of-type textarea.ivu-input {
height: 300px;
}
}
</style>