Lucene.Net支援 Regular Expression Search,可說大大提高搜尋的彈性
最近我有一個需求,我需要搜尋全文檢索資料庫某一欄位字元長度小於9或包含a-z任一字元,
面對這需求我一開始就想要使用正則表示來處理(須注意效能),下面直接看Code
public List<LuceneDocument> RegexSelect(string pattern)
{
List<LuceneDocument> results = new List<LuceneDocument>();
var searcher = new IndexSearcher(_cacheDirectory, true);
RegexQuery query = new RegexQuery(new Term("FileName", pattern));//Regular Expression Search
IndexSearcher[] Searchers = { searcher };
ParallelMultiSearcher parallelmultisearcher = new ParallelMultiSearcher(Searchers);
var collector = new IntegralCollector();
parallelmultisearcher.Search(query, collector);
if (collector == null || collector.Docs.Count <= 0)
{
return null;
}
for (int i = 0; i < collector.Docs.Count; i++)
{
int docId = collector.Docs[i];
Document doc = parallelmultisearcher.Doc(docId, null);
if (doc == null)
continue;
LuceneDocument _LuceneDocument = new LuceneDocument
{
DocID = docId,
FileExtension = doc.Get("FileExtension"),
FileName = doc.Get("FileName"),
Content = doc.Get("Content"),
CreateDate = doc.Get("CreateDate")
};
results.Add(_LuceneDocument);
}
return results;
}
var mydatas=RegexSelect(".*[a-z].*")//包含a到z任一字元
var mydatas=RegexSelect("^[0-9]{2,8}$")//0到9字元且長度介於2到8