参考:
目的: 搭建一个本地文本搜索引擎,希望能够快速搜索到本地的文本文件,并且提供初步预览。搜索到文件实际路径,方便后续文本文件修改。
步骤:
-
-
elasticsearch安装
为了方便(减少linux环境不一致可能带来的问题),直接使用docker安装,以后再想办法源码安装elasticsearch到特定路径
-
首先确定安装路径
mkdir ~/Install/Disk_essearch
-
编辑docker-compose文件
路径~/Install/Disk_essearch/Es7/docker-compose.yml
:
-
version: '3'
services:
elasticsearch:
image: elasticsearch:7.1.1
environment:
- ES_JAVA_OPTS = "-Xms512m -Xmx512m"
- discovery.type=single-node
restart: always
ports:
- "9200:9200"
volumes:
- {prefix}/Es7/es_data:/usr/share/elasticsearch/data
volumes:
elasticsearch:
文本保存之后进入Es7
目录运行docker-compose up -d
启动。然后测试elasticsearch是否成功启动curl http://localhost:9200
-
- FSCrawler安装
代码:
- FSCrawler安装
wget -O fscrawler-es7-2.10-20220110.183202-1.zip "https://s01.oss.sonatype.org/content/repositories/snapshots/fr/pilato/elasticsearch/crawler/fscrawler-es7/2.10-SNAPSHOT/fscrawler-es7-2.10-20220110.183202-1.zip"
unzip fscrawler-es7-2.10-20220110.183202-1.zip -o
cp -frT fscrawler-es7-2.10-SNAPSHOT ~/Install/Disk_essearch/fscrawler-es7-2.10
~/Install/Disk_essearch/fscrawler-es7-2.10/bin/fscrawler file_job --config_dir ~/Install/fscrawler/fscrawler-es7-2.10/data
#edit ~/Install/fscrawler/fscrawler-es7-2.10/data/file_job/_settings.yaml
~/Install/Disk_essearch/fscrawler-es7-2.10/bin/fscrawler file_job --config_dir ~/Install/fscrawler/fscrawler-es7-2.10/data
其中,第一次运行fscrawler会中断,主要是是为了产生_settings.yaml
配置文件,编辑好配置文件之后,再运行fscrawler就会顺利启动。测试fscrawler是否连接上了elasticsearch:curl http://localhost:9200/file_job/_count
改好的配置文件_settings.yaml
内容:
name: "file_job"
fs:
url: "/e/Temp/"
update_rate: "15m"
includes:
- "**/*.page"
- "**/*.md"
excludes:
- "*/~*"
json_support: false
filename_as_id: false
add_filesize: true
remove_deleted: true
add_as_inner_object: false
store_source: false
index_content: true
attributes_support: false
raw_metadata: false
xml_support: false
index_folders: true
lang_detect: true
continue_on_error: false
ocr:
language: "eng"
enabled: true
pdf_strategy: "ocr_and_text"
follow_symlinks: false
elasticsearch:
nodes:
- url: "http://127.0.0.1:9200"
bulk_size: 100
flush_interval: "5s"
byte_size: "10mb"
ssl_verification: true
其中里面两个url,一个include,需要更改成你自己的路径
-
- 对elasticsearch进行查询
原文是使用searchui:github.com/elastic/sea…,但是我看不懂,只能自己写个客户端,下面是kotlin代码
- 对elasticsearch进行查询
依赖:
@file:Repository("http://maven.aliyun.com/nexus/content/groups/public")
@file:DependsOn("org.jetbrains.kotlinx:kotlinx-coroutines-core:1.4.3")
@file:DependsOn("org.openjfx:javafx-controls:15")
@file:DependsOn("com.google.guava:guava:30.1-jre")
@file:DependsOn("org.elasticsearch.client:elasticsearch-rest-high-level-client:7.17.9")
@file:DependsOn("com.alibaba:fastjson:1.2.3")
File_essearch_client.kt
代码:
import org.apache.http.HttpHost
import org.elasticsearch.client.RestClient
import org.elasticsearch.client.RestHighLevelClient
import org.elasticsearch.action.search.SearchRequest
import org.elasticsearch.client.RequestOptions
import org.elasticsearch.index.query.QueryBuilders
import org.elasticsearch.search.builder.SearchSourceBuilder
import javafx.application.Application
import javafx.scene.Scene
import javafx.stage.Stage
import com.alibaba.fastjson.JSONObject
class MyClientController: Application(){
lateinit var my_exec:java.util.concurrent.ExecutorService
lateinit var my_viewer: MyClientGameViewer
companion object{
public var my_eventBus = com.google.common.eventbus.EventBus()
public var MY_GAMEVIEWER_WIDTH: Double = 500.0
public var MY_GAMEVIEWER_HEIGHT: Double = 20.0
@JvmStatic
fun <T:Application> launchApp(appClass: Class<T>, vararg args:String){
com.sun.javafx.application.PlatformImpl.setImplicitExit(false)
com.sun.javafx.application.PlatformImpl.startup({
try {
// Create the application instance
var app:Application = appClass.newInstance();
// Call the init method
app.init();
// Create a dummy stage
var primaryStage:Stage = Stage();
// Set the user agent stylesheet
setUserAgentStylesheet("");
// Call the start method
app.start(primaryStage);
} catch (ex:Exception) {
// Handle any exceptions
ex.printStackTrace()
}
});
}
}
init{
my_exec = java.util.concurrent.Executors.newCachedThreadPool()
}
override fun start(primaryStage: Stage) {
var controller = MyClientController()
my_viewer = controller.my_initViewer(primaryStage)
// 设置窗口关闭事件的监听器
primaryStage.setOnCloseRequest({
System.out.println("Window is closing...");
MyClientController.my_eventBus.unregister(my_viewer)
// 可以在这里添加自定义的关闭逻辑
});
}
override fun stop(){
super.stop()
}
fun my_initViewer(stage: javafx.stage.Stage): MyClientGameViewer{
my_viewer = MyClientGameViewer(stage)
my_viewer.my_controller = this
MyClientController.my_eventBus.register(my_viewer)
MyClientController.my_eventBus.register(this)
return my_viewer
}
}
class MyClientGameViewer(stage: Stage){
public lateinit var my_controller: MyClientController
public var my_world_pane: javafx.scene.layout.Pane = javafx.scene.layout.Pane();
var my_iptextField = javafx.scene.control.TextField()
var my_keywordField = javafx.scene.control.TextField();
var my_borderPane = javafx.scene.layout.BorderPane()
var my_searchButton: javafx.scene.control.Button = javafx.scene.control.Button("搜索")
init{
my_borderPane = javafx.scene.layout.BorderPane().let{ tmp_borderPane ->
javafx.scene.layout.HBox(10.0).let{ tmp_hbox ->
my_iptextField.setStyle("-fx-border-color: black; -fx-border-width: 1px;");
my_keywordField.setStyle("-fx-border-color: black; -fx-border-width: 1px;");
my_searchButton.setOnAction(object: javafx.event.EventHandler<javafx.event.ActionEvent> {
override fun handle(event: javafx.event.ActionEvent) {
println("connectButton")
my_search()
}
})
tmp_hbox.getChildren().addAll(
my_iptextField, my_keywordField, my_searchButton)
tmp_borderPane.setBottom(tmp_hbox)
tmp_hbox
}
tmp_borderPane
}
my_borderPane.setCenter(my_world_pane)
val scene = Scene(my_borderPane, MyClientController.MY_GAMEVIEWER_WIDTH, MyClientController.MY_GAMEVIEWER_HEIGHT)
stage.title = "Disk Search Client"
stage.scene = scene
stage.show()
}
fun escapeXml(string: String): String {
val escapeMapping = mapOf(
"&" to "&",
"<" to "<",
">" to ">",
"\"" to """,
"'" to "'"
)
var result = string
escapeMapping.forEach { (key, value) ->
result = result.replace(key, value)
}
return result
}
fun my_search(){
var ip_text = my_iptextField.text
var keyword = my_keywordField.text
val client = RestHighLevelClient(
RestClient.builder(
HttpHost(ip_text, 9200, "http")
)
)
var searchRequest = SearchRequest("file_job")
val searchSourceBuilder = SearchSourceBuilder()
searchSourceBuilder.size(50)
searchSourceBuilder.query(QueryBuilders.matchQuery("content",keyword)) //ok
searchRequest.source(searchSourceBuilder)
val response = client.search(searchRequest, RequestOptions.DEFAULT)
val searchHits = response.hits.hits
println("size:${searchHits.size}")
var xml = ""
var i = 0;
for (hit in searchHits) {
i++
println(i)
hit.id.let{println(it)}
val source = hit.sourceAsString
var obj = com.alibaba.fastjson.JSONObject.parseObject(hit.sourceAsString)
var file_path = (obj.get("file") as JSONObject).get("url").toString()
var file_content = obj.get("content") as String
file_content = escapeXml(file_content)
file_path = Regex("///e/").replaceFirst(file_path, "///e:/")
file_path = Regex("///d/").replaceFirst(file_path, "///d:/")
xml += """
<li>
<a href="${file_path}" onclick="change_display(${hit.id}_pre)">${file_path}</a>
<a href="#" onclick="change_display('${hit.id}_pre')">fold</a>
<pre id="${hit.id}_pre" style="display: none">${file_content}</pre>
</li>
"""
}
var script = """
<script>
var change_display = function(id){
var attr = document.getElementById(id).getAttribute("style")
if(attr == "display: none"){
document.getElementById(id).setAttribute("style", "display: block")
}else{
document.getElementById(id).setAttribute("style", "display: none")
}
}
</script>
"""
var html = """
<html>
<head>
<meta charset='utf-8'/>
${script}
</head>
<body>
<h4><pre>${keyword}</pre></h4>
${xml}
</body>
</html>
"""
var chaset = "utf8"
java.io.PrintWriter(java.io.File("result.html"), chaset).use { printWriter ->
printWriter.println(html)
}
client.close()
}
}
fun main(){
MyClientController.launchApp(MyClientController::class.java, *arrayOf(""))
}