本地文件搜索系统搭建

29 阅读3分钟

参考:

目的: 搭建一个本地文本搜索引擎,希望能够快速搜索到本地的文本文件,并且提供初步预览。搜索到文件实际路径,方便后续文本文件修改。

步骤:

    1. elasticsearch安装
      为了方便(减少linux环境不一致可能带来的问题),直接使用docker安装,以后再想办法源码安装elasticsearch到特定路径

    • 首先确定安装路径 mkdir ~/Install/Disk_essearch

    • 编辑docker-compose文件
      路径~/Install/Disk_essearch/Es7/docker-compose.yml

version: '3'
services:
  elasticsearch:
    image: elasticsearch:7.1.1
    environment:
        - ES_JAVA_OPTS = "-Xms512m -Xmx512m"
        - discovery.type=single-node
    restart: always
    ports:
      - "9200:9200"
    volumes:
      - {prefix}/Es7/es_data:/usr/share/elasticsearch/data
volumes:
  elasticsearch:

文本保存之后进入Es7目录运行docker-compose up -d启动。然后测试elasticsearch是否成功启动curl http://localhost:9200

    1. FSCrawler安装
      代码:
wget -O fscrawler-es7-2.10-20220110.183202-1.zip "https://s01.oss.sonatype.org/content/repositories/snapshots/fr/pilato/elasticsearch/crawler/fscrawler-es7/2.10-SNAPSHOT/fscrawler-es7-2.10-20220110.183202-1.zip"

unzip fscrawler-es7-2.10-20220110.183202-1.zip -o

cp -frT fscrawler-es7-2.10-SNAPSHOT ~/Install/Disk_essearch/fscrawler-es7-2.10

~/Install/Disk_essearch/fscrawler-es7-2.10/bin/fscrawler file_job --config_dir ~/Install/fscrawler/fscrawler-es7-2.10/data
#edit ~/Install/fscrawler/fscrawler-es7-2.10/data/file_job/_settings.yaml
~/Install/Disk_essearch/fscrawler-es7-2.10/bin/fscrawler file_job --config_dir ~/Install/fscrawler/fscrawler-es7-2.10/data

其中,第一次运行fscrawler会中断,主要是是为了产生_settings.yaml配置文件,编辑好配置文件之后,再运行fscrawler就会顺利启动。测试fscrawler是否连接上了elasticsearch:curl http://localhost:9200/file_job/_count

改好的配置文件_settings.yaml内容:

name: "file_job"
fs:
  url: "/e/Temp/"
  update_rate: "15m"
  includes:
  - "**/*.page"
  - "**/*.md"
  excludes:
  - "*/~*"
  json_support: false
  filename_as_id: false
  add_filesize: true
  remove_deleted: true
  add_as_inner_object: false
  store_source: false
  index_content: true
  attributes_support: false
  raw_metadata: false
  xml_support: false
  index_folders: true
  lang_detect: true
  continue_on_error: false
  ocr:
    language: "eng"
    enabled: true
    pdf_strategy: "ocr_and_text"
  follow_symlinks: false
elasticsearch:
  nodes:
  - url: "http://127.0.0.1:9200"
  bulk_size: 100
  flush_interval: "5s"
  byte_size: "10mb"
  ssl_verification: true

其中里面两个url,一个include,需要更改成你自己的路径

    1. 对elasticsearch进行查询
      原文是使用searchui:github.com/elastic/sea…,但是我看不懂,只能自己写个客户端,下面是kotlin代码

依赖:

@file:Repository("http://maven.aliyun.com/nexus/content/groups/public")

@file:DependsOn("org.jetbrains.kotlinx:kotlinx-coroutines-core:1.4.3")
@file:DependsOn("org.openjfx:javafx-controls:15")
@file:DependsOn("com.google.guava:guava:30.1-jre")
@file:DependsOn("org.elasticsearch.client:elasticsearch-rest-high-level-client:7.17.9")
@file:DependsOn("com.alibaba:fastjson:1.2.3")

File_essearch_client.kt代码:

import org.apache.http.HttpHost
import org.elasticsearch.client.RestClient
import org.elasticsearch.client.RestHighLevelClient
import org.elasticsearch.action.search.SearchRequest
import org.elasticsearch.client.RequestOptions
import org.elasticsearch.index.query.QueryBuilders
import org.elasticsearch.search.builder.SearchSourceBuilder
import javafx.application.Application
import javafx.scene.Scene

import javafx.stage.Stage
import com.alibaba.fastjson.JSONObject

class MyClientController: Application(){
    lateinit var my_exec:java.util.concurrent.ExecutorService
    lateinit var my_viewer: MyClientGameViewer

    companion object{
        public var my_eventBus = com.google.common.eventbus.EventBus()
        public var MY_GAMEVIEWER_WIDTH: Double = 500.0
        public var MY_GAMEVIEWER_HEIGHT: Double = 20.0

        @JvmStatic
        fun <T:Application> launchApp(appClass: Class<T>, vararg args:String){
            com.sun.javafx.application.PlatformImpl.setImplicitExit(false)
            com.sun.javafx.application.PlatformImpl.startup({

                try {
                    // Create the application instance
                    var app:Application = appClass.newInstance();

                    // Call the init method
                    app.init();

                    // Create a dummy stage
                    var primaryStage:Stage = Stage();
                    // Set the user agent stylesheet
                    setUserAgentStylesheet("");

                    // Call the start method
                    app.start(primaryStage);
                } catch (ex:Exception) {
                    // Handle any exceptions
                    ex.printStackTrace()

                }
            });


        }
    }

    init{
        my_exec = java.util.concurrent.Executors.newCachedThreadPool()
    }

    override fun start(primaryStage: Stage) {
        var controller = MyClientController()
        my_viewer = controller.my_initViewer(primaryStage)

        // 设置窗口关闭事件的监听器
        primaryStage.setOnCloseRequest({
            System.out.println("Window is closing...");

            MyClientController.my_eventBus.unregister(my_viewer)


            // 可以在这里添加自定义的关闭逻辑
        });
    }
    override fun stop(){
        super.stop()
    }

    fun my_initViewer(stage: javafx.stage.Stage): MyClientGameViewer{
        my_viewer = MyClientGameViewer(stage)
        my_viewer.my_controller = this
        MyClientController.my_eventBus.register(my_viewer)
        MyClientController.my_eventBus.register(this)
        return my_viewer
    }
}

class MyClientGameViewer(stage: Stage){
    public lateinit var my_controller: MyClientController
    public var my_world_pane: javafx.scene.layout.Pane = javafx.scene.layout.Pane();
    var my_iptextField = javafx.scene.control.TextField()
    var my_keywordField = javafx.scene.control.TextField();
    var my_borderPane = javafx.scene.layout.BorderPane()
    var my_searchButton: javafx.scene.control.Button = javafx.scene.control.Button("搜索")

    init{
        my_borderPane = javafx.scene.layout.BorderPane().let{ tmp_borderPane ->
            javafx.scene.layout.HBox(10.0).let{ tmp_hbox ->

                my_iptextField.setStyle("-fx-border-color: black; -fx-border-width: 1px;");
                my_keywordField.setStyle("-fx-border-color: black; -fx-border-width: 1px;");
                my_searchButton.setOnAction(object: javafx.event.EventHandler<javafx.event.ActionEvent> {
                    override fun handle(event: javafx.event.ActionEvent) {
                        println("connectButton")
                        my_search()
                    }
                })
                tmp_hbox.getChildren().addAll(
                        my_iptextField, my_keywordField, my_searchButton)
                tmp_borderPane.setBottom(tmp_hbox)
                tmp_hbox
            }
            tmp_borderPane
        }



        my_borderPane.setCenter(my_world_pane)

        val scene = Scene(my_borderPane, MyClientController.MY_GAMEVIEWER_WIDTH, MyClientController.MY_GAMEVIEWER_HEIGHT)
        stage.title = "Disk Search Client"
        stage.scene = scene
        stage.show()
    }

    fun escapeXml(string: String): String {
        val escapeMapping = mapOf(
                "&" to "&amp;",
                "<" to "&lt;",
                ">" to "&gt;",
                "\"" to "&quot;",
                "'" to "&apos;"
        )

        var result = string
        escapeMapping.forEach { (key, value) ->
            result = result.replace(key, value)
        }

        return result
    }

    fun my_search(){
        var ip_text = my_iptextField.text
        var keyword = my_keywordField.text
        val client = RestHighLevelClient(
                RestClient.builder(
                        HttpHost(ip_text, 9200, "http")
                )
        )
        var searchRequest = SearchRequest("file_job")
        val searchSourceBuilder = SearchSourceBuilder()
        searchSourceBuilder.size(50)
        searchSourceBuilder.query(QueryBuilders.matchQuery("content",keyword)) //ok
       
        searchRequest.source(searchSourceBuilder)

        val response = client.search(searchRequest, RequestOptions.DEFAULT)
        val searchHits = response.hits.hits
        println("size:${searchHits.size}")
        var xml = ""
        var i = 0;
        for (hit in searchHits) {
            i++
            println(i)
            hit.id.let{println(it)}
            val source = hit.sourceAsString
            var obj = com.alibaba.fastjson.JSONObject.parseObject(hit.sourceAsString)

            var file_path = (obj.get("file") as JSONObject).get("url").toString()
            var file_content = obj.get("content") as String
            file_content = escapeXml(file_content)
            file_path = Regex("///e/").replaceFirst(file_path, "///e:/")
            file_path = Regex("///d/").replaceFirst(file_path, "///d:/")
            xml += """
            <li>
                <a href="${file_path}" onclick="change_display(${hit.id}_pre)">${file_path}</a>
                <a href="#" onclick="change_display('${hit.id}_pre')">fold</a>
                <pre id="${hit.id}_pre" style="display: none">${file_content}</pre>
            </li>
"""

        }
        var script = """
<script>
var change_display = function(id){
    var attr = document.getElementById(id).getAttribute("style")
    if(attr == "display: none"){
        document.getElementById(id).setAttribute("style", "display: block")
    }else{
        document.getElementById(id).setAttribute("style", "display: none")
    }
}
</script>
        """
        var html = """
<html>
    <head>
        <meta charset='utf-8'/>
        ${script}
    </head>
    <body>
        <h4><pre>${keyword}</pre></h4>
        ${xml}
    </body>
</html> 
        """
        var chaset = "utf8"
        java.io.PrintWriter(java.io.File("result.html"), chaset).use { printWriter ->
            printWriter.println(html)
        }
        client.close()
    }

}
fun main(){
    MyClientController.launchApp(MyClientController::class.java, *arrayOf(""))
}