lucene,搜索检索引擎

61 阅读2分钟

1.导入jar包

image.png

2.写一个实体类存储标题,索引和内容,并且实现序列化接口

package entity;

import java.io.Serializable;

public class Article implements Serializable {
    /**
     * 
     */
    private static final long serialVersionUID = -360638738368632278L;
    private long aid;
    private String Title;
    private String Content;
    public long getAid() {
        return aid;
    }
    public void setAid(long aid) {
        this.aid = aid;
    }
    public String getTitle() {
        return Title;
    }
    public void setTitle(String title) {
        Title = title;
    }
    public String getContent() {
        return Content;
    }
    public void setContent(String content) {
        Content = content;
    }
    public Article(long aid, String title, String content) {
        super();
        this.aid = aid;
        Title = title;
        Content = content;
    }
    public Article() {
        super();
    }
    @Override
    public String toString() {
        return "Article [aid=" + aid + ", Title=" + Title + ", Content="
                + Content + "]";
    }

}

3.存储和检索代码

package lucenetest;


import java.io.File;
import java.util.ArrayList;
import java.util.List;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;

import entity.Article;



/**
 * 1、 把一个article对象放入到索引库中
 * 2、从索引库中把article对象检索出来
 * @author likang
 * @date   2017-8-17 下午午11:13:13
 */
public class helloworld {

    @Test
    public void testCreateIndex() throws Exception{
        /**
         * 1、创建一个article对象
         * 2、创建一个IndexWriter对象
         * 3、把article对象变成document对象
         * 4、把document对象放入到索引库中
         * 5、关闭资源
         */
        Article article = new Article();
        article.setAid(1L);
        article.setTitle("lucene是一个全文检索引擎");
        article.setContent("百度,谷歌都是很好的全文检索引擎");

        //创建IndexWriter
        /**
         * 第一个参数
         *      索引库的位置
         */
        Directory directory = FSDirectory.open(new File("./Dirindex"));
        /*
         * 第二个参数
         *       分词器
         */
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
        /**
         * 第三个参数
         *       限制索引库中字段的大小
         */
        IndexWriter indexWriter = new IndexWriter(directory, analyzer, MaxFieldLength.LIMITED);
        //把article转化成document
        Document document = new Document();
        /**
         * 参数
         *     name  存储在索引库的名字
         *     value 存储在索引库中的值
         *     //Store.YES 保存 可以查询 可以打印内容
         *     //Store.NO 不保存 可以查询 不可打印内容 由于不保存内容所以节省空间
         *     Index.NOT_ANALYZED : 不分词直接索引
         *     Index.ANALYZED :分词后索引
         *     Index.NO : 根本不索引,所以不会被检索到    
         *     Index.NOT_ANALYZED_NO_NORMS : 类似Index.NOT_ANALYZED,但不存储NORM TERMS,节约内存但不支持Boost,非常常用 
         *     Index.ANALYZED_NO_NORMS : 类似Index.ANALYZED,但不存储NORM TERMS,节约内存但不支持Boost
         */
        Field idField =  new Field("aid", Long.toString(article.getAid()), Store.YES, Index.NOT_ANALYZED);
        Field titleField =  new Field("title", article.getTitle(), Store.YES, Index.NO);
        Field contentField =  new Field("content", article.getContent(), Store.YES, Index.ANALYZED);
        //把上面的field放入到document中
        document.add(idField);
        document.add(titleField);
        document.add(contentField);

        indexWriter.addDocument(document);

        indexWriter.close();
    }

    /**
     * 从索引库中根据关键字把信息检索出来
     */
    @Test
    public void testSearchIndex() throws Exception{
        /**
         * 创建一个IndexSearch对象
         */
        Directory directory = FSDirectory.open(new File("./Dirindex"));
        IndexSearcher indexSearcher = new IndexSearcher(directory);
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
        /**
         * 第一个参数为版本号
         * 第二个参数为在哪个字段中进行检索
         */
        QueryParser queryParser = new QueryParser(Version.LUCENE_30,"content",analyzer);
//      QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_30,new String[]{"title","content"},analyzer);
        /**
         * 关键词
         */
        Query query = queryParser.parse("百度");
        /**
         * 第二个参数  
         *      查找前多少个
         *  TopDocs-->Top Documents
         */
        TopDocs topDocs = indexSearcher.search(query, 1);
        int count = topDocs.totalHits;//根据关键词计算出来的总的记录数
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        List<Article> articles = new ArrayList<Article>();
        for (ScoreDoc scoreDoc : scoreDocs) {
            /**
             * 关键词的索引
             */
            int index = scoreDoc.doc;
            /**
             * 根据关键词的索引查找到document
             */
            Document document = indexSearcher.doc(index);
            //把document转化成article
            Article article = new Article();
            article.setAid(Long.parseLong(document.get("aid")));
            article.setTitle(document.get("title"));
            article.setContent(document.get("content"));
            articles.add(article);
        }

        for (Article article : articles) {
            System.out.println(article.getAid());
            System.out.println(article.getTitle());
            System.out.println(article.getContent());
        }
    }
}