把其他格式轉(zhuǎn)化為lucene需要的document.式
document.doc = new
document.); //每一個doc相當于數(shù)據(jù)庫的一條記錄
doc.add(new Field("uid", line.getUid().toString(),
Store.YES,Index.NO)); //每一個field,相當于數(shù)據(jù)庫的字段
doc.add(new Field("title", line.getTitle(),
Store.NO,Index.ANALYZED));
doc.add(new Field("content", line.getContent(),Store.NO,
Index.ANALYZED));
document.doc = new document.); //每一個doc相當于數(shù)據(jù)庫的一條記錄
doc.add(new Field("uid", line.getUid().toString(),
Store.YES,Index.NO)); //每一個field,相當于數(shù)據(jù)庫的字段
doc.add(new Field("title",
line.getTitle(), Store.NO,Index.ANALYZED));
doc.add(new Field("content", line.getContent(),Store.NO,
Index.ANALYZED));
向IndexWriter添加doc,可以插入多條doc
writer.adddocument.doc);
writer.adddocument.doc2);
writer.adddocument.doc3);
writer.adddocument.doc);
writer.adddocument.doc2);
writer.adddocument.doc3);
開始寫入(close的時候為實際寫入過程)
writer.close();
writer = null;
writer.close();
writer = null;
讀取寫入的索引數(shù)
writer.numDocs()
writer.maxDoc()
writer.numDocs()
writer.maxDoc()
在close之前可以進行優(yōu)化(不建議在建立索引時候使用)
writer.optimize()
2、清空索引
Directory directory = new SimpleFSDirectory(new File(path),new
SimpleFSLockFactory());
IndexWriter.unlock(directory); //關(guān)鍵是這一步要進行目錄解鎖,這里解的是write.lock鎖
IndexWriter writer = new IndexWriter(directory,new WhitespaceAnalyzer(),
false,IndexWriter.MaxFieldLength.LIMITED);
writer.deleteAll(); //標識刪除全部
writer.optimize(); //這個步驟才是實際刪除的過程
writer.close();
Directory directory = new SimpleFSDirectory(new File(path),new
SimpleFSLockFactory());
IndexWriter.unlock(directory); //關(guān)鍵是這一步要進行目錄解鎖,這里解的是write.lock鎖
IndexWriter writer = new IndexWriter(directory,new WhitespaceAnalyzer(),
false,IndexWriter.MaxFieldLength.LIMITED);
writer.deleteAll(); //標識刪除全部
writer.optimize(); //這個步驟才是實際刪除的過程
writer.close();
3、刪除指定索引(和清空差不多)
writer.deletedocument.(new Term("uri", uri)); //這里是刪除term滿足條件的一條或多條
writer.deletedocument.(query); //這里是刪除一個查詢出來的內(nèi)容
writer.deletedocument.(new Term("uri", uri)); //這里是刪除term滿足條件的一條或多條
writer.deletedocument.(query); //這里是刪除一個查詢出來的內(nèi)容
4、更新索引
就是先刪除再添加的過程,沒有直接update的辦法
5、讀取建立的索引分詞
TermEnum terms = indexReader.terms(new Term(index, ""));
Term term = terms.term(); //獲取一條索引
term().field(); //獲取索引的field(字段名)
term().text(); //獲取索引的值
TermEnum terms = indexReader.terms(new Term(index, ""));
Term term = terms.term(); //獲取一條索引
term().field(); //獲取索引的field(字段名)
term().text(); //獲取索引的值
6、搜索
最新2.9的IndexSearcher 建立方式:
Directory directory = new
SimpleFSDirectory(new File(path),new SimpleFSLockFactory());
IndexSearcher indexSearcher = new IndexSearcher(directory, true);
Directory directory = new SimpleFSDirectory(new File(path),new
SimpleFSLockFactory());
IndexSearcher indexSearcher = new IndexSearcher(directory, true);
創(chuàng)建查詢條件(這里建一個最復(fù)雜的,根據(jù)多個限定條件查找,并
且有的限定條件放在多個field中查找,有精確限定和范圍限定)
BooleanQuery bQuery = new BooleanQuery();
Query query1 = null, query2 = null, query3 = null;
BooleanClause.Occur[] flags = new BooleanClause.Occur[]
{BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD };
query1 = MultiFieldQueryParser.parse(params.get("keywords"),new String[]
{ "title", "content" }, flags, new WhitespaceAnalyzer());
bQuery.add(query1, Occur.MUST); //query1是把關(guān)鍵字分別在title和content中匹配!
query2 = new TermQuery(new Term("startgui", params.get("startgui")));
bQuery.add(query2, Occur.MUST); //query2是精確匹配
Long minPriceLong = Long.parseLong(params.get("minPrice"));
Long maxPriceLong = Long.parseLong(params.get("maxPrice"));
query5 = NumericRangeQuery.newLongRange("price", minPriceLong,
maxPriceLong, true, true);
bQuery.add(query5, Occur.MUST); //query3是按范圍匹配
BooleanQuery bQuery = new BooleanQuery();
Query query1 = null, query2 = null, query3 = null;
BooleanClause.Occur[] flags = new BooleanClause.Occur[]
{BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD };
query1 = MultiFieldQueryParser.parse(params.get("keywords"),new String[]
{ "title", "content" }, flags, new WhitespaceAnalyzer());
bQuery.add(query1, Occur.MUST); //query1是把關(guān)鍵字分別在title和content中匹配!
query2 = new TermQuery(new Term("startgui", params.get("startgui")));
bQuery.add(query2, Occur.MUST); //query2是精確匹配
Long minPriceLong = Long.parseLong(params.get("minPrice"));
Long maxPriceLong = Long.parseLong(params.get("maxPrice"));
query5 = NumericRangeQuery.newLongRange("price", minPriceLong,
maxPriceLong, true, true);
bQuery.add(query5, Occur.MUST); //query3是按范圍匹配
排序情況
SortField[] sortField = new
SortField[] { SortField.FIELD_SCORE,new SortField(null, SortField.DOC,
true) }; // 默認排序
SortField sortPriceField = new SortField("sortPrice",SortField.LONG,
sortPrice);
sortField = new SortField[] { sortPriceField,SortField.FIELD_SCORE,new
SortField(null, SortField.DOC, true) }; //按自定義價格排序
SortField[] sortField = new SortField[] { SortField.FIELD_SCORE,new
SortField(null, SortField.DOC, true) }; // 默認排序
SortField sortPriceField = new SortField("sortPrice",SortField.LONG,
sortPrice);
sortField = new SortField[] { sortPriceField,SortField.FIELD_SCORE,new
SortField(null, SortField.DOC, true) }; //按自定義價格排序
2.9最新查詢方式,只是獲取id
TopFieldDocs docs =
indexSearcher.search(query, null, indexSearcher.maxDoc(), new
Sort(sortField));
ScoreDoc[] scoreDocs = docs.scoreDocs;
docCount = scoreDocs.length;
TopFieldDocs docs = indexSearcher.search(query, null,
indexSearcher.maxDoc(), new Sort(sortField));
ScoreDoc[] scoreDocs = docs.scoreDocs;
docCount = scoreDocs.length;
加入分頁
List<document.gt; docList =
new ArrayList<document.gt;();
int max = ((startIndex + pageSize) >= docCount) ? docCount :
(startIndex + pageSize); // max防止arrayindexoutofbounds
for (int i = startIndex; i < max; i++) {
ScoreDoc scoredoc = scoreDocs[i];
document.doc = indexSearcher.doc(scoredoc.doc); // 新的使用方法
docList.add(doc);
}
List<document.gt; docList = new ArrayList<document.gt;();
int max = ((startIndex + pageSize) >= docCount) ? docCount :
(startIndex + pageSize); // max防止arrayindexoutofbounds
for (int i = startIndex; i < max; i++) {
ScoreDoc scoredoc = scoreDocs[i];
document.doc = indexSearcher.doc(scoredoc.doc); // 新的使用方法
docList.add(doc);
}
循環(huán)解析docList中的document.取所需要的值
doc.get("title");
...
7、關(guān)于分詞
注意建立索引和搜索時候的analyzer必須一致,而且建立索引和搜索時候目錄也要保持一致
lucene自帶的一些分詞器
StandardAnalyzer()
會按空格和標點符號劃分
WhitespaceAnalyzer() 會按空格劃分
中文分詞這里使用的是paoding的中文分詞
是先按詞庫劃分,當詞庫中不存在時按二分法進行劃分