在Whoose搜索引擎和jieba分词的基础上使用Django Haystack实现网络搜索引擎
配置要求:
pip install django-haystack
pip install whoose
pip install jieba
settings.py
#配置搜索引擎
HAYSTACK_CONNECTIONS = {
'default':{
#设置搜索引擎
'ENGINE':'index.whoosh_cn_backend.WhooshEngine',
'PATH':os.path.join(BASE_DIR,'whoosh_index'),
'INCLUDE_SPELLING':True,
},
}
#设置每页显示的数据量
HAYSTACK_SEARCH_RESULTS_PER_PAGE = 4
#当数据库改变时,自动更新索引
HAYSTACK_SIGNAL_PROCESSOR = 'haystack.signals.RealtimeSignalProcessor'
搜索引擎对应WhooshEngine,但是该类不提供中文搜索,自定义类继承该类,利用jieba重写build_schema方法,使其支持中文搜索
#whoosh_cn_backend.py名称不可修改
from haystack.backends.whoosh_backend import *
from jieba.analyse import ChineseAnalyzer
class MyWhooshSearchBackend(WhooshSearchBackend):
def build_schema(self, fields):
schema_fields = {
ID:WHOOSH_ID(stored=True,unique=True),
DJANGO_CT:WHOOSH_ID(stored=True),
DJANGO_ID:WHOOSH_ID(stored=True),
}
initial_key_count = len(schema_fields)
content_field_name = ''
for field_name, field_class in fields.items():
if field_class.is_multivalued:
if field_class.indexed is False:
schema_fields[field_class.index_fieldname] = IDLIST(stored=True, field_boost=field_class.boost)
else:
schema_fields[field_class.index_fieldname] = KEYWORD(stored=True, commas=True, scorable=True, field_boost=field_class.boost)
elif field_class.field_type in ['date', 'datetime']:
schema_fields[field_class.index_fieldname] = DATETIME(stored=field_class.stored, sortable=True)
elif field_class.field_type == 'integer':
schema_fields[field_class.index_fieldname] = NUMERIC(stored=field_class.stored, numtype=int, field_boost=field_class.boost)
elif field_class.field_type == 'float':
schema_fields[field_class.index_fieldname] = NUMERIC(stored=field_class.stored, numtype=float, field_boost=field_class.boost)
elif field_class.field_type == 'boolean':
# Field boost isn't supported on BOOLEAN as of 1.8.2.
schema_fields[field_class.index_fieldname] = BOOLEAN(stored=field_class.stored)
elif field_class.field_type == 'ngram':
schema_fields[field_class.index_fieldname] = NGRAM(minsize=3, maxsize=15, stored=field_class.stored, field_boost=field_class.boost)
elif field_class.field_type == 'edge_ngram':
schema_fields[field_class.index_fieldname] = NGRAMWORDS(minsize=2, maxsize=15, at='start', stored=field_class.stored, field_boost=field_class.boost)
else:
schema_fields[field_class.index_fieldname] = TEXT(stored=True, analyzer=ChineseAnalyzer(),field_boost=field_class.boost, sortable=True)
if field_class.document is True:
content_field_name = field_class.index_fieldname
schema_fields[field_class.index_fieldname].spelling = True
# Fail more gracefully than relying on the backend to die if no fields
# are found.
if len(schema_fields) <= initial_key_count:
raise SearchBackendError("No fields were found in any search_indexes. Please correct this before attempting to search.")
return (content_field_name, Schema(**schema_fields))
class WhooshEngine(BaseEngine):
backend = MyWhooshSearchBackend
query = WhooshSearchQuery
建立例子模型Product
from django.db import models
class Product(models.Model):
id = models.AutoField('序号',primary_key=True)
name = models.CharField('名称',max_length=50)
weight = models.CharField('重量',max_length=20)
describe =models.CharField('描述',max_length=500)
def __str__(self):
return self.name
利用搜索引擎创建索引
search_indexes.py名称不可修改
from haystack import indexes
from .models import Product
#创建索引类
#类名必须是模型名+Index
class ProductIndex(indexes.SearchIndex,indexes.Indexable):
text = indexes.CharField(document=True,use_template=True)
#设置模型
def get_model(self):
return Product
#设置查找范围
def index_queryset(self, using=None):
return self.get_model().objects.all()
在templates/search/indexes/index/下建product_text.txt,此文件中定义引擎的索引字段
{{ object.name }}
{{ object.describe }}
创建完后利用python manage.py rebuild_index完成索引文件的创建
完成创建后在views中定义响应方法类,该类继承SearchView类
from django.shortcuts import render
from django.core.paginator import *
from django.conf import settings
from .models import *
from haystack.views import SearchView
#通用视图实现
class MySearchView(SearchView):
#模板文件
template = 'search.html'
#重写响应方式
#如果请求q为空,返回全部,否则返回相应请求
def create_response(self):
if not self.request.GET.get('q',''):
show_all = True
product = Product.objects.all().order_by('id')
per = settings.HAYSTACK_SEARCH_RESULTS_PER_PAGE
p = Paginator(product,per)
try:
num = int(self.request.GET.get('page',1))
page = p.page(num)
except PageNotAnInteger:
page = p.page(1)
except EmptyPage:
page = p.page(p.num_pages)
return render(self.request,self.template,locals())
else:
show_all = False
qs = super(MySearchView,self).create_response()
return qs