Lucene

163 阅读1分钟

Java使用Lucene

package Baseline;

import CommonUtil.ThreeTuple;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Set;

public class LuceneTool {

    private StandardAnalyzer analyzer = new StandardAnalyzer();
    private Directory index = new RAMDirectory();

    private IndexWriterConfig config = new IndexWriterConfig(analyzer);

    private IndexWriter w = new IndexWriter(index, config);

    public LuceneTool() throws IOException {

    }

    public void addDoc(String title, String isbn) throws IOException {
        Document doc = new Document();
        doc.add(new TextField("title", title, Field.Store.YES));

        doc.add(new StringField("content", isbn, Field.Store.YES));
        w.addDocument(doc);
    }

    public void closeIndexWriter() throws IOException {
        w. close();
    }

    public Set<ThreeTuple<String, String, Double>> getRelatedDocs(ArrayList<String> stringArrayList) throws ParseException, IOException {
        int hitsPerPage = 1000;

        // first: title  second: content  third: similarity
        Set<ThreeTuple<String, String, Double>> relatedDocsSet = new HashSet<>();

        for (String queryWord : stringArrayList) {
            Query q = new QueryParser("title", analyzer).parse(queryWord);
            IndexReader reader = DirectoryReader.open(index);
            IndexSearcher searcher = new IndexSearcher(reader);
            TopDocs docs = searcher.search(q, hitsPerPage);
            ScoreDoc[] hits = docs.scoreDocs;

            for (int i = 0; i < hits.length; ++i) {
                int docId = hits[i].doc;
                Document d = searcher.doc(docId);
                relatedDocsSet.add(new ThreeTuple<>(d.get("title"), d.get("content"), null));
                System.out.println(d.get("content"));
            }
        }
        return relatedDocsSet;
    }
}