lucenc.net 全文检索 创建索引、 查询、分页
jopen
10年前
#region 创建、跟新词库 /// <summary> /// 创建、跟新词库 /// </summary> private void CreateIndexData() { //索引库文件夹 FSDirectory dir = FSDirectory.Open(new DirectoryInfo(path), new NativeFSLockFactory()); //是否存在索引库 bool has = IndexReader.IndexExists(dir); if (has) { //解锁被锁定的索引库 if (IndexWriter.IsLocked(dir)) { IndexWriter.Unlock(dir); } } //索引库写对象 IndexWriter iw = new IndexWriter(dir, new PanGuAnalyzer(), !has, IndexWriter.MaxFieldLength.UNLIMITED); List<LuceneModel> list = LuceneBll.Instance.Get(); foreach (var o in list) { Document d = new Document(); //Field.Store.YES: 存储字段值(未分词前的字段值) //Field.Store.NO: 不存储,存储与索引没有关系 //Field.Store.COMPRESS: 压缩存储,用于长文本或二进制,但性能受损 //Field.Index ANALYZED; 分词建索引 //Field.Index ANALYZED_NO_NORMS; 分词压缩建索引 //Field.Index NO; //Field.Index NOT_ANALYZED; 不分词建索引 //Field.Index NOT_ANALYZED_NO_NORMS; 不分词压缩建索引 //Field.TermVector NO; //Field.TermVector WITH_OFFSETS; 存储偏移量 //Field.TermVector WITH_POSITIONS; 存储位置 //Field.TermVector WITH_POSITIONS_OFFSETS; 存储位置和偏移量 //Field.TermVector YES; 为每个文档(Document)存储该字段的TermVector d.Add(new Field("Id", o.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); d.Add(new Field("Title", o.Title, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); d.Add(new Field("Message", o.Message, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); iw.AddDocument(d); } iw.Optimize(); //释放文件 iw.Close(); dir.Close(); } #endregion
#region 查询 /// <summary> /// 查询 /// </summary> /// <param name="str">要查询的关键字</param> /// <param name="index">页索引(从1开始)</param> /// <param name="count">每页显示数据条数</param> /// <returns></returns> private List<LuceneModel> Search(string str, int index = 1, int count = 3) { List<LuceneModel> list = null; //索引库文件夹 FSDirectory dir = FSDirectory.Open(new DirectoryInfo(path), new NativeFSLockFactory()); //是否存在索引库 bool has = IndexReader.IndexExists(dir); //不存在则创建 if (!has) { //创建索引库 CreateIndexData(); } //索引库读对象 IndexReader ir = IndexReader.Open(dir, true); //索引库查询对象 IndexSearcher searcher = new IndexSearcher(ir); //搜索条件 PhraseQuery query = new PhraseQuery(); //分词 { Segment segment = new Segment(); ICollection<WordInfo> words = segment.DoSegment(str); foreach (var o in words) { query.Add(new Term("Message", o.ToString())); } } //指定关键词相隔最大距离 query.SetSlop(100); //盛放查询结果的容器 TopScoreDocCollector collector = TopScoreDocCollector.create(index * count, false); //根据query查询条件进行查询 searcher.Search(query, null, collector); ScoreDoc[] docs = collector.TopDocs(count * (index - 1), count * (index - 1) + count).scoreDocs; if (docs.Length > 0) { list = new List<LuceneModel>(); foreach (var o in docs) { Document document = searcher.Doc(o.doc); LuceneModel m = new LuceneModel(); m.Id = Convert.ToInt32(document.Get("Id")); m.Title = document.Get("Title"); m.Message = document.Get("Message"); list.Add(m); } } //释放文件 ir.Close(); searcher.Close(); dir.Close(); return list; } #endregion来自:http://blog.csdn.net/pigkeli/article/details/34848141