lucene再查询分页搜索和lucene的搜索_基于searchAfter的实现
jopen
9年前
package com.dhb.search; import java.io.File; import java.io.FileReader; import java.io.IOException; import org.apache.commons.io.FileUtils; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.NumericField; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.util.Version; public class FileIndexUtils { private static Directory directory = null; static { try { directory = FSDirectory.open(new File("D:/luceneData/files/")); } catch (IOException e) { e.printStackTrace(); } } public static Directory getDirectory() { return directory; } public static void index(boolean hasNew) { IndexWriter writer = null; try { IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)); writer = new IndexWriter(directory, iwc); //是否新建索引 if(hasNew) { writer.deleteAll(); } Document doc = null; File f = new File("D:/luceneData/example"); for (File file : f.listFiles()) { doc = new Document(); doc.add(new Field("content", new FileReader(file))); doc.add(new Field("filename", file.getName(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("path",file.getAbsolutePath(),Field.Store.YES,Field.Index.NOT_ANALYZED)); doc.add(new NumericField("date", Field.Store.YES, true).setLongValue(file.lastModified())); doc.add(new NumericField("size", Field.Store.YES, true).setIntValue((int) (file.length()/1024))); writer.addDocument(doc); } } catch (CorruptIndexException e) { e.printStackTrace(); } catch (LockObtainFailedException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { if(writer!=null) try { writer.close(); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } } } [java] view plaincopy /** * 分页查询 */ public IndexSearcher getSearcher(Directory dir) { try { if(reader==null) { reader = IndexReader.open(dir); } else { IndexReader tr = IndexReader.openIfChanged(reader); if(tr!=null) { reader.close(); //关闭原来的reader reader = tr; } } return new IndexSearcher(reader); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return null; } public void searchPage(String query, int pageIndex, int pageSize) { Directory directory = FileIndexUtils.getDirectory(); IndexSearcher searcher = getSearcher(directory); QueryParser parser = new QueryParser(Version.LUCENE_35, "content", new StandardAnalyzer(Version.LUCENE_35)); try { Query q = parser.parse(query); TopDocs tds = searcher.search(q, 500); ScoreDoc[] sds = tds.scoreDocs; int start = (pageIndex-1)*pageSize; int end = pageIndex*pageSize; //这个for循环是一个bug for (int i = start; i < end; i++) { Document doc = searcher.doc(sds[i].doc); System.out.println(i+": "+doc.get("path")+"-->"+doc.get("filename")+"---"+sds[i].doc); } } catch (org.apache.lucene.queryParser.ParseException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } public void searchNoPage(String query) { Directory directory = FileIndexUtils.getDirectory(); IndexSearcher searcher = getSearcher(directory); QueryParser parser = new QueryParser(Version.LUCENE_35, "content", new StandardAnalyzer(Version.LUCENE_35)); try { Query q = parser.parse(query); TopDocs tds = searcher.search(q, 500); ScoreDoc[] sds = tds.scoreDocs; for (int i = 0; i < sds.length; i++) { Document doc = searcher.doc(sds[i].doc); System.out.println(i+": "+doc.get("path")+"-->"+doc.get("filename")); } } catch (org.apache.lucene.queryParser.ParseException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } public void searchPageByAfter(String query, int pageIndex, int pageSize) { Directory directory = FileIndexUtils.getDirectory(); IndexSearcher searcher = getSearcher(directory); QueryParser parser = new QueryParser(Version.LUCENE_35, "content", new StandardAnalyzer(Version.LUCENE_35)); try { Query q = parser.parse(query); TopDocs tds = searcher.search(q, 500); int last = (pageIndex-1)*pageSize-1; ScoreDoc[] sds = tds.scoreDocs; //int start = (pageIndex-1)*pageSize; //int end = pageIndex*pageSize; tds = searcher.searchAfter(sds[last], q, 10); for (ScoreDoc sd : tds.scoreDocs) { Document doc = searcher.doc(sd.doc); System.out.println(doc.get("path")+"-->"+doc.get("filename")+"---"+sd.doc); } } catch (org.apache.lucene.queryParser.ParseException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } /** * 根据页码和分页大小获取上一次的最后一个ScoreDoc * */ private ScoreDoc getLastScoreDoc(int pageIndex, int pageSize, Query query, IndexSearcher searcher) throws IOException { if(pageIndex == 1) return null; int num = (pageIndex-1)*pageSize; TopDocs tds = searcher.search(query, num); return tds.scoreDocs[num-1]; } public void searchPageByAfter_2(String query, int pageIndex, int pageSize) { Directory directory = FileIndexUtils.getDirectory(); IndexSearcher searcher = getSearcher(directory); QueryParser parser = new QueryParser(Version.LUCENE_35, "content", new StandardAnalyzer(Version.LUCENE_35)); try { Query q = parser.parse(query); //先获取上一页的最后一个元素 ScoreDoc lastSd = getLastScoreDoc(pageIndex, pageSize, q, searcher); //通过最后一个元素搜索下一页的pageSize元素 TopDocs tds = searcher.searchAfter(lastSd, q, pageSize); for (ScoreDoc sd : tds.scoreDocs) { Document doc = searcher.doc(sd.doc); System.out.println(doc.get("path")+"-->"+doc.get("filename")+"---"+sd.doc); } } catch (org.apache.lucene.queryParser.ParseException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } }