切分字符串

145 阅读1分钟

js以.|…|!|?|;切分一段英文,但是不切分英文中的小数,并且像A.M.和P.M.这样的特殊字符也不进行切分

<template>
  <div>
    <Input v-model="value" type="textarea" placeholder="Enter something..." />
    <Button type="primary" @click="handle">文字切分</Button>
    <Input v-model="value1" />
    <Input v-model="value2" type="textarea" />
    <Input v-model="value3" type="textarea" />
    <Input v-model="value4" type="textarea" />
    <Input v-model="value5" type="textarea" />
  </div>
</template>
<script>
export default {
  data() {
    return {
      value: `
         2.3 I have 2.3 中文20.3aaaa[~amam~] apple.
        I have  A.M. 
        A.M. pen;
        en!
        china....
        apple pen.
      `,
      value1: '',
      value2: '',
      value3: '',
      value4: '',
      value5: '',
      mapObj: {
        'P.E.': '[~PEPE~]',
        'A.M.': '[~AMAM~]',
        'P.M.': '[~PMPM~]',
        'a.m.': '[~amam~]',
        'p.m.': '[~pmpm~]',
        'U.K.': '[~UKUK~]',
        'U.S.': '[~USUS~]',
        'No.': '[~NoNo~]',
      }
    }
  },
  methods: {
    // handleSplit() {
    //   const { value } = this
    //   // const arr = value.trim().split(/[.!?]/)
    //   // const arr = value.trim().split(/\.|\.{3}|\!|\?/)
    //   // const arr = value.trim().split(/[.|…|!|?|]+/) // *
    //   const arr = value.trim().split(/<(A\.M\.)[.|…|!|?|]+/)
    //   const result = arr.reduce((list, n) => {
    //     n && list.push(n.trim())
    //     return list
    //   }, [])
    //   // console.log(arr, result)
    //   result.forEach((item, index) => {
    //     this[`value${index + 1}`] = item
    //   })
    // },

    handle() {
      const newValue = this.getNewValue()
      // const arr = newValue.split(/[.|…|!|?|;]+/)
      const arr = this.splitString(newValue)
      const result = arr.reduce((list, n) => {
        n && list.push(n.trim())
        return list
      }, [])

      result.forEach((item, index) => {
        const arr = item.split(' ')
        const newItem = arr.map(n => {
          const key = this.getKeyByValue(this.mapObj, n)
          return key || n
        }).join(' ')
        this[`value${index + 1}`] = newItem
      })
    },
    // 使用 .|…|!|?|; 切分句子,但是不切分小数
    splitString(str) {
      // 匹配所有英文句子中的小数,例如 3.14 或者 123.456
      const regex = /[a-zA-Z]+\.\d+|\d+\.[a-zA-Z]+|\d+\.\d+/g;
      // 将所有小数替换为占位符
      const placeholders = []
      let match
      while (match = regex.exec(str)) {
        placeholders.push(match[0])
      }
      const replacedStr = str.replace(regex, (match) => {
        placeholders.push(match);
        return `[${placeholders.length - 1}]`
      })
      // 使用 ".|…|!|?|;" 进行切分,但是排除所有占位符
      const parts = replacedStr.split(/[.|…|!|?|;]+/).map(part => {
        return placeholders.reduce((prev, placeholder, index) => {
          return prev.replace(`[${index}]`, placeholder)
        }, part)
      })
      return parts
    },
    // 将字符串中的A.M.转为[~AMAM~]
    getNewValue() {
      const { value, mapObj } = this
      const arr = value.trim().split(' ')
      const keys = Object.keys(mapObj)
      const newArr = arr.map(n => keys.includes(n) ? mapObj[n] : n)
      return newArr.join(' ')
    },
    // 根据对象的值找到对应的键,将[~AMAM~]再转回A.M.
    getKeyByValue(object, value) {
      return Object.keys(object).find(key => object[key] === value)
    }
  },
  created() {
    this.handle()


  },
  components: {}
}
</script>
<style lang="less" >
.ivu-input-wrapper {
  textarea.ivu-input {
    height: 100px;
  }
  &:first-of-type textarea.ivu-input {
    height: 500px;
  }
}
</style>

将一段字符串进行切割,需要满足以下要求:

  • 切分点:英文的. ? ! ; ...
  • 遇到这几种特殊字符,不进行切分①P.E.   ②A.M.   ③P.M.   ④a.m.   ⑤p.m.   ⑥U.K.   ⑦U.S.   ⑧No.
  • 遇到小数,也不进行切分
  • 切分后的结果需要把标点符号带上

思路:将'. '转为'. ## ',然后用##进行切割,这样就可以保留.了,并且小数的.也可以保留。在切分前需要将A.M.转为[AMAM]这种特殊字符,切分后再转回来

<template>
  <div>
    <Input v-model="value" type="textarea" placeholder="Enter something..." />
    <Button type="primary" @click="handle">文字切分</Button>
    <Input v-for="(item,index) of textareaList" :value="item" :key="index" type="textarea" />
  </div>
</template>
<script>
export default {
  data() {
    return {
      value: `
         2.3 I have 2.3 中文20.3aaaa[~amam~] apple.
        I have  A.M. 
        A.M. pen;
        en!
        china....
        apple pen.
      `,
      // value: `I have A.M. apple.
      //   I have pen;
      //   I have orange?
      //   en!
      //   china....
      //   apple pen.`,
      textareaList: [],
      mapObj: {
        'P.E.': '[~PEPE~]',
        'A.M.': '[~AMAM~]',
        'P.M.': '[~PMPM~]',
        'a.m.': '[~amam~]',
        'p.m.': '[~pmpm~]',
        'U.K.': '[~UKUK~]',
        'U.S.': '[~USUS~]',
        'No.': '[~NoNo~]',
      },
      splitDot: '[~splitDot~]' // 切割点
    }
  },
  methods: {
    handle() {
      const { splitDot, mapObj, getNewValue, getKeyByValue } = this
      // const newValue = getNewValue().replace(/\.\s/g, `.${splitDot} `).replace(/\?\s/g, `?${splitDot} `).replace(/\!\s/g, `!${splitDot} `).replace(/\;\s/g, `;${splitDot} `)
      const newValue = getNewValue(value).replace(/([.?!;])\s/g, `$1${splitDot} `)
      const arr = newValue.split(splitDot)
      const result = arr.reduce((list, item) => {
        console.log(item)
        const itemArr = item.split(' ')
        const newItem = itemArr.map(n => {
          const key = getKeyByValue(mapObj, n)
          return key || n
        })
        newItem && list.push(newItem.join(' ').trim())
        return list
      }, [])
      console.log(result)
      this.textareaList = result
    },
    // 将字符串中的A.M.转为[~AMAM~]
    getNewValue() {
      const { value, mapObj } = this
      const arr = value.trim().split(' ')
      const keys = Object.keys(mapObj)
      const newArr = arr.map(n => keys.includes(n) ? mapObj[n] : n)
      return newArr.join(' ')
    },
    // 根据对象的值找到对应的键,将[~AMAM~]再转回A.M.
    getKeyByValue(object, value) {
      return Object.keys(object).find(key => object[key] === value)
    }
  },
  created() {
    this.handle()
  },
  components: {}
}
</script>
<style lang="less" >
.ivu-input-wrapper {
  textarea.ivu-input {
    height: 100px;
  }
  &:first-of-type textarea.ivu-input {
    height: 300px;
  }
}
</style>