操作lucene索引的工具类
jopen
10年前
public class OperatorIndex { public static final String INDEX_PATH = "D:/indexDir"; // 创建分词器 private Analyzer analyzer = null; // 索引保存目录 private File indexFile = null; // 目录对象 private Directory directory = null; // 创建indexWriterConfig 只能实例化一次用一次 private IndexWriterConfig indexWriterConfig = null; SimpleDateFormat simpleDateFormat; private IndexSearcher indexSearcher; public void init() throws IOException { analyzer = new IKAnalyzer(); indexFile = new File(INDEX_PATH); directory = FSDirectory.open(indexFile); simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); System.out.println("##初始化完成##"); } // 数据库中需要建立索引的比表封装成Document对象 public Document createDocument(Article article) { Document document = new Document(); document.add(new TextField("id", article.getId() + "", Field.Store.YES)); document.add(new TextField("title", article.getTitle(), Field.Store.YES)); document.add(new TextField("content", article.getContent(), Field.Store.YES)); return document; } // 获得日期 public String getDate() { return simpleDateFormat.format(new Date()); } // 查询所有索引 public void openIndexFile() throws IOException { System.out.println("读取索引开始..."); IndexReader indexReader = IndexReader.open(directory); // 获取索引个数 int maxDoc = indexReader.maxDoc(); System.out.println("maxDoc:" + maxDoc); Article article = null; for (int i = 0; i < maxDoc; i++) { Document document = indexReader.document(i); article = new Article(); if (document.get("id") == null) { System.out.println("id 为空"); } article.setId(Integer.parseInt(document.get("id"))); article.setTitle(document.get("title")); article.setContent(document.get("content")); System.out.println(article); } indexReader.close(); System.out.println("读取索引结束"); } // 创建索引 public void createIndex(Article article) throws IOException { indexWriterConfig = new IndexWriterConfig(Version.LATEST, analyzer); IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig); // 创建索引前删除以前的索引 // indexWriter.deleteAll(); Document createDocument = createDocument(article); indexWriter.addDocument(createDocument); // indexWriter.commit(); indexWriter.close(); System.out.println("[" + getDate() + "]" + "lucene写入索引到" + "[" + indexFile.getAbsolutePath() + "]" + "成功"); } // 批量创建索引 public void createIndexes(List<Article> articles) throws IOException { // indexWriterConfig = new IndexWriterConfig(Version.LATEST, analyzer); // IndexWriter indexWriter = new IndexWriter(directory, // indexWriterConfig); // indexWriter.deleteAll(); for (Article article : articles) { createIndex(article); } } // 删除索引 public void deleteIndex(int id) throws IOException { if (indexFile.exists()) { IndexWriterConfig indexWriterConfig = new IndexWriterConfig( Version.LATEST, analyzer); IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig); indexWriter.deleteDocuments(new Term("id", String.valueOf(id))); System.out.println("[" + getDate() + "]" + "lucene删除索引成功"); indexWriter.close(); } else { System.out.println("删除索引失败"); } } // 批量删除索引 public void deleteIndexes(List<Article> articles) { if (articles == null || articles.size() == 0) { return; } for (Article article : articles) { try { deleteIndex(article.getId()); } catch (IOException e) { // TODO Auto-generated catch block System.out.println("删除索引失败"); e.printStackTrace(); } } } // 更新索引 先删除索引在添加索引 public void updateIndex(Article article) throws IOException { deleteIndex(article.getId()); createIndex(article); } //查询索引 public void searchIndex(String keyword) { IndexReader indexReader = null; try { indexReader = IndexReader.open(directory); IndexSearcher indexSearcher = new IndexSearcher(indexReader); QueryParser queryParser = new QueryParser(Version.LATEST, "title", new IKAnalyzer(false)); Query query = queryParser.parse(keyword.trim()); TopDocs topDocs = indexSearcher.search(query, 100); ScoreDoc[] scoreDocs = topDocs.scoreDocs; if (scoreDocs == null || scoreDocs.length == 0) { System.out.println("很遗憾!没有找到!"); } for (ScoreDoc scoreDoc : scoreDocs) { Document document = indexSearcher.doc(scoreDoc.doc); System.out.println(document.toString()); System.out.println("[title:" + document.get("title") + ",content:" + document.get("content") + "]"); } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (ParseException e) { // TODO Auto-generated catch block e.printStackTrace(); } finally { if (indexReader != null) { try { indexReader.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } } //分页查询 public void paginationQuery(String keyword, int pageSize, int currentPage) { String[] fields = { "title", "content" }; IndexReader indexReader = null; try { MultiFieldQueryParser multiFieldQueryParser = new MultiFieldQueryParser( Version.LATEST, fields, new IKAnalyzer(false)); Query query = multiFieldQueryParser.parse(keyword.trim()); indexReader = IndexReader.open(directory); IndexSearcher indexSearcher = new IndexSearcher(indexReader); // 搜索返回的结果并取前100的结果 TopDocs topDocs = indexSearcher.search(query, 100); TopDocs allDocs = indexSearcher .search(new MatchAllDocsQuery(), 100); int totalHits = topDocs.totalHits; System.out.println("总数:" + totalHits); // 搜索返回的结果集合 ScoreDoc[] scoreDocs = topDocs.scoreDocs; int begin = (currentPage - 1) * pageSize; int end = Math.min(begin + pageSize, scoreDocs.length); for (int i = begin; i < end; i++) { Document document = indexSearcher.doc(scoreDocs[i].doc); System.out.println("[title:" + document.get("title") + "]"); } } catch (ParseException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } finally { if (indexReader != null) { try { indexReader.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } } //查询高亮显示 public void highlighterSearch() { IndexReader indexReader = null; try { indexReader = IndexReader.open(directory); IndexSearcher indexSearcher = new IndexSearcher(indexReader); Term term = new Term("content", "中国"); TermQuery termQuery = new TermQuery(term); TopDocs topDocs = indexSearcher .search(termQuery, Integer.MAX_VALUE); System.out.println("查询结果数:" + topDocs.totalHits); System.out.println("最大的评分" + topDocs.getMaxScore()); ScoreDoc[] scoreDocs = topDocs.scoreDocs; for (ScoreDoc scoreDoc : scoreDocs) { Document document = indexSearcher.doc(scoreDoc.doc); System.out.println("检索条件:" + term.toString()); String content = document.get("content"); System.out.println("content:" + document.get("content")); // 高亮展示 SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter( "【", "】"); Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(termQuery)); highlighter.setTextFragmenter(new SimpleFragmenter(content .length())); if (!"".equals(content)) { TokenStream tokenStream = new IKAnalyzer().tokenStream( content, new StringReader(content)); String bestFragment = highlighter.getBestFragment( tokenStream, content); System.out.println("高亮显示:" + "检索结果如下所示:"); System.out.println(bestFragment); // 结束关键字高亮 System.out.println("文件内容:" + content); // 匹配相关度 System.out.println(scoreDoc.score); } } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (InvalidTokenOffsetsException e) { // TODO Auto-generated catch block e.printStackTrace(); } finally { if (indexReader != null) { try { indexReader.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } } // 释放资源 public void destory() throws IOException { analyzer.close(); directory.close(); System.out.println("销毁成功"); } public static void main(String[] args) throws IOException { OperatorIndex index = new OperatorIndex(); index.init(); // Article article = new Article(); // article.setId(1); // article.setTitle("hello"); // article.setContent("hello world!"); // // index.createIndex(article); // index.openIndexFile(); // index.deleteIndex(1); // index.openIndexFile(); // article.setContent("hello"); // index.updateIndex(article); // index.openIndexFile(); List<Article> articles = new ArrayList<Article>(); Article article = new Article(1, "中国", "11111gdfjs中国"); Article article1 = new Article(2, "我爱你中国", "11111gdfjs我爱你中国"); Article article2 = new Article(3, "国中之国", "fdsab;1gdfjs国中之国"); Article article3 = new Article(4, "44", "111gdsa11gdfjs中国将成为世界上最强大的国家"); Article article4 = new Article(5, "55", "111gdas11gdfjs"); articles.add(article); articles.add(article1); articles.add(article2); articles.add(article3); articles.add(article4); index.deleteIndexes(articles); index.createIndexes(articles); index.openIndexFile(); index.searchIndex("中国"); index.paginationQuery("中国", 1, 1); index.highlighterSearch(); // index.destory(); } } Article.java实体 public class Article { private int id; private String title; private String content; public int getId() { return id; } public void setId(int id) { this.id = id; } public String getTitle() { return title; } public void setTitle(String title) { this.title = title; } public String getContent() { return content; } public void setContent(String content) { this.content = content; } public Article(int id, String title, String content) { super(); this.id = id; this.title = title; this.content = content; } public Article() { // TODO Auto-generated constructor stub } @Override public String toString() { // TODO Auto-generated method stub return "article[id:" + id + ",title:" + title + ",content:" + content + "]"; } }