[Lucene.Net]正則表示式搜尋

  • 738
  • 0
  • C#
  • 2017-01-10

Lucene.Net支援 Regular Expression Search,可說大大提高搜尋的彈性

最近我有一個需求,我需要搜尋全文檢索資料庫某一欄位字元長度小於9或包含a-z任一字元,

面對這需求我一開始就想要使用正則表示來處理(須注意效能),下面直接看Code

public List<LuceneDocument> RegexSelect(string pattern)
        {
            List<LuceneDocument> results = new List<LuceneDocument>();        
            var searcher = new IndexSearcher(_cacheDirectory, true);
            RegexQuery query = new RegexQuery(new Term("FileName", pattern));//Regular Expression Search
            IndexSearcher[] Searchers = { searcher };
            ParallelMultiSearcher parallelmultisearcher = new ParallelMultiSearcher(Searchers);
            var collector = new IntegralCollector();
            parallelmultisearcher.Search(query, collector);
            if (collector == null || collector.Docs.Count <= 0)
            {              
                return null;
            }
            for (int i = 0; i < collector.Docs.Count; i++)
            {
                int docId = collector.Docs[i];
                Document doc = parallelmultisearcher.Doc(docId, null);
                if (doc == null)
                    continue;
                LuceneDocument _LuceneDocument = new LuceneDocument
                {
                    DocID = docId,
                    FileExtension = doc.Get("FileExtension"),
                    FileName = doc.Get("FileName"),
                    Content = doc.Get("Content"),
                    CreateDate = doc.Get("CreateDate")
                };
                results.Add(_LuceneDocument);
            }
            return results;
        }

var mydatas=RegexSelect(".*[a-z].*")//包含a到z任一字元

var mydatas=RegexSelect("^[0-9]{2,8}$")//0到9字元且長度介於2到8