/// <summary> Check whether the word exists in the index.</summary> /// <param name="word">String /// </param> /// <throws> IOException </throws> /// <returns> true iff the word exists in the index /// </returns> public virtual bool Exist(System.String word) { // obtainSearcher calls ensureOpen IndexSearcher indexSearcher = ObtainSearcher(); try { return(indexSearcher.DocFreq(F_WORD_TERM.CreateTerm(word)) > 0); } finally { ReleaseSearcher(indexSearcher); } }
public override Query Rewrite(IndexReader reader) { if (_termIsPrefix) { MultiTermQuery rewritten = new PrefixQuery(internalTerm.CreateTerm(internalTerm.Text.Substring(0, internalTerm.Text.IndexOf('*')))); rewritten.Boost = Boost; rewritten.RewriteMethod = RewriteMethod; return(rewritten); } else { return(base.Rewrite(reader)); } }
public override void IndexDocuments(IndexingBatch batch) { var count = 0; var sourceCount = 0; var sw = Stopwatch.StartNew(); var start = DateTime.UtcNow; Write((indexWriter, analyzer) => { //TODO: The following would be a perfect candidate for a TPL DataFlow impl. Too bad we're currently on .Net 3.5 var processedKeys = new HashSet <string>(); var docIdTerm = new Lucene.Net.Index.Term(Constants.DocumentIdFieldName); var documentsWrapped = batch.Documents.Select((doc, i) => { Interlocked.Increment(ref sourceCount); if (doc.DocumentId == null) { throw new ArgumentException( string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc)); } var documentId = doc.DocumentId.ToString(CultureInfo.InvariantCulture); if (processedKeys.Add(documentId) == false) { return(doc); } if (doc.SkipDeleteFromIndex == false) { indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant())); } return(doc); }) .ToList(); foreach (var document in documentsWrapped) { Interlocked.Increment(ref count); LogIndexedDocument(document.DocumentId, document.Document); AddDocumentToIndex(indexWriter, document.Document, analyzer); indexWriter.Commit(); } return(sourceCount); }); AddindexingPerformanceStat(new IndexingPerformanceStats { OutputCount = count, InputCount = sourceCount, Duration = sw.Elapsed, Operation = "Index", Started = start }); LogIndexing.Debug("Indexed {0} documents for {1}", count, Name); }