本篇分享一个hanlp分词工具应用的案例,简单来说就是做一图库,让商家轻松方便的配置商品的图片,最好是可以一键完成配置的。
1、图库建设:至少要有图片吧,图片肯定要有关联的商品名称、商品类别、商品规格、关键字等信息。
2、商品分词算法:由于商品名称是商家自己设置的,不是规范的,所以不可能完全匹配,要有好的分词库来找出关键字。还有一点,分词库要能够自定义词库,最好能动态添加。如果读者不知道什么是分词,请自行百度,本文不普及这个。
3、推荐匹配度算法:肯定要最匹配的放在前面,而且要有匹配度分数。商家肯定有图库没有的商品,自动匹配的时候,不能随便配置不相关的图片。
CREATE TABLE `wj_tbl_gallery` (
`gallery_id` int(11) NOT NULL AUTO_INCREMENT COMMENT '
`fileid` int(11) NOT NULL COMMENT '
`ptype` tinyint(4) NOT NULL DEFAULT '0' COMMENT '
`materialsort` varchar(50) DEFAULT NULL COMMENT '
`materialbrand` varchar(50) DEFAULT NULL COMMENT '
`materialname` varchar(100) NOT NULL COMMENT '
`material_spec` varchar(50) DEFAULT NULL COMMENT '
`material_allname` varchar(200) DEFAULT NULL COMMENT '
`status` tinyint(4) NOT NULL DEFAULT '0' COMMENT '
`updatedatetime` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '
`keyword` varchar(200) DEFAULT NULL COMMENT '
`bstorage` tinyint(4) NOT NULL DEFAULT '0' COMMENT '
PRIMARY KEY (`gallery_id`),
KEY `idx_fileid` (`fileid`)
) ENGINE=InnoDB AUTO_INCREMENT=435 DEFAULT CHARSET=utf8 COMMENT='
简单说一下
material_allname
private void addCustomerDictory(){
Integer max = galleryRepository.getMaxGallery();
if(CommonUtils.isNotEmpty(max) && max > 0 && max > SegmentUtils.CACHE_GALLERY_ID){
int oldid = SegmentUtils.CACHE_GALLERY_ID;
SegmentUtils.CACHE_GALLERY_ID = max;
List<String> gallery = galleryRepository.getGallery(oldid,max);
if(CommonUtils.isNotEmpty(gallery)){
Map<String,Boolean> dicMap = new HashMap<>();
for(String w : gallery){
if(CommonUtils.isNotEmpty(w)){
String[] array = w.split(",");
if(CommonUtils.isNotEmpty(array)){
for(String item : array){
String value = item.trim();
if(CommonUtils.isNotEmpty(value)){
dicMap.put(value, true);
}
}
}
}
}
Set<String> keys = dicMap.keySet();
if(CommonUtils.isNotEmpty(keys)){
SegmentUtils.insertCustomDictory(keys);
}
}
}
}
/**
*
*
* @author deng
* @date 2019年3月13日
* @param galleryId
* @return
*/
@Query("select keyword from Gallery a where galleryId > ?1 and galleryId<=?2 and a.keyword !='' and bstorage=0")
public List<String> getGallery(int bgalleryId, int egalleryId);
@Cacheable(value = CacheConstants.CACHE_GALLERY, keyGenerator = CacheConstants.KEY_GENERATOR_METHOD)
@Query(value = "select gallery_id from wj_tbl_gallery a where a.keyword !='' and bstorage=0 order by gallery_id desc limit 1", nativeQuery = true)
public Integer getMaxGallery();
SELECT gallery_id, fileid, materialname, material_allname, score
, ROUND(score / 4 * 100, 0) AS rate
FROM (
SELECT a.gallery_id, a.fileid, materialname, material_allname
, IF(LOCATE('
FROM wj_tbl_gallery a
WHERE a.STATUS = 0
AND (a.material_allname LIKE '%
OR a.material_allname LIKE '%
OR a.material_allname LIKE '%
) b
ORDER BY score DESC, materialname
LIMIT 0, 8
public List<Map<String, Object>> queryList(String searchstr, int pagenumber, int pagesize, String materialsortname,
List<Term> segmentList) {
String name = "%" + searchstr + "%";
//
List<Map<String, Object>> list = queryList(name, pagenumber, pagesize, 100);
if (CommonUtils.isEmpty(list)) {
searchstr = searchstr.replaceAll("\\s", "");
String regEx = "(
searchstr = searchstr.replaceAll(regEx, "");
if (CommonUtils.isNotEmpty(searchstr)) {
name = "%" + searchstr + "%";
//
list = queryList(name, pagenumber, pagesize, 90);
}
//
if (CommonUtils.isEmpty(list)) {
if (CommonUtils.isNotEmpty(segmentList)) {
list = queryListTerm(pagenumber, pagesize, segmentList, materialsortname);
}
//
else if (CommonUtils.isNotEmpty(materialsortname))
list = queryList(materialsortname, pagenumber, pagesize, 10);
}
}
return list;
}
private List<Map<String, Object>> queryList(String name, int pagenumber, int pagesize, int rate) {
String sql = "SELECT\n" + " a.gallery_id,\n" + " a.fileid,a.material_allname,a.materialname \n, " + rate
+ " rate FROM\n" + " wj_tbl_gallery a\n" + "WHERE\n"
+ " a.material_allname LIKE :searchstr and a.status = 0 order by length(materialname) LIMIT :pagenumber,:pagesize ";
Dto param = new BaseDto();
param.put("searchstr", name).put("pagenumber", pagenumber * pagesize).put("pagesize", pagesize);
return namedParameterJdbcTemplate.queryForList(sql, param);
private List<Map<String, Object>> queryListTerm(int pagenumber, int pagesize, List<Term> segmentList,
String materialsortname) {
Dto param = new BaseDto();
StringBuffer sb = new StringBuffer();
StringBuffer wsb = new StringBuffer(" (");
//
int tw = 0;
if (CommonUtils.isNotEmpty(segmentList)) {
for (int i = 0; i < segmentList.size(); i++) {
String str = segmentList.get(i).word;
int w = SegmentUtils.calculateWeight(segmentList.get(i));
str = StringUtils.escapeMysqlSpecialChar(str);
tw += w;
sb.append("if(LOCATE('").append(str).append("', a.material_allname),").append(w).append(",0) ");
wsb.append(" a.material_allname like '%").append(str).append("%' ");
if (i < segmentList.size() - 1) {
sb.append(" + ");
wsb.append(" or ");
}
}
//
//
int emptylen = 3;
if (CommonUtils.isNotEmpty(materialsortname)) {
if (sb.length() > emptylen) {
sb.append(" + ");
wsb.append(" or ");
}
tw += SegmentUtils.DWEIGHT;
materialsortname = StringUtils.escapeMysqlSpecialChar(materialsortname);
sb.append(" if(LOCATE('").append(materialsortname).append("', a.material_allname),")
.append(SegmentUtils.DWEIGHT).append(",0) ");
wsb.append(" a.material_allname like '%").append(materialsortname)
.append("%' ");
}
if (sb.length() > emptylen) {
sb.append(" as score ");
wsb.append(") ");
String scoreSelect = sb.toString();
String scorewhere = wsb.toString();
String sql = "select gallery_id,fileid,materialname,material_allname,score,ROUND(score/" + tw
+ "*100, 0) rate from (SELECT " + " a.gallery_id, "
+ " a.fileid,materialname,material_allname, " + scoreSelect + " FROM "
+ " wj_tbl_gallery a " + "WHERE " + " a.status = 0 and " + scorewhere
+ " ) b order by score desc ,materialname LIMIT " + pagenumber * pagesize + "," + pagesize;
param.put("pagenumber", pagenumber * pagesize).put("pagesize", pagesize);
logger.debug("
List<Map<String, Object>> list = namedParameterJdbcTemplate.queryForList(sql, param);
if (CommonUtils.isNotEmpty(list)) {
return list;
}
}
}
/**
*
* @author deng
* @date 2019
* @param term
* @return
*/
public static int calculateWeight(Term term) {
//
int num = countChinese(term.word);
//
int value = num >= 3 ? 2 + (num - 3) / 2 : DWEIGHT;
//
if (term.nature == Nature.nz && value <= DWEIGHT) {
value = DWEIGHT + 1;
}
return value;
}