public override BitArray Bits(IndexReader reader) { // reader.GetVersion could be used to cache // Debug.WriteLine(reader.GetVersion()); // could be used to cache // if (cached reader == reader && _revFirst == if (_revFirst == All || _revLast == All) // optimization { return(new BitArray(reader.MaxDoc(), true)); } BitArray last_bits = new BitArray(reader.MaxDoc(), false); TermEnum t = reader.Terms(new Term(FieldName.RevisionLast, _revFirst.ToString(RevFormat))); TermDocs d = reader.TermDocs(); //if (t.SkipTo((new Term(FieldName.RevisionLast, revision.ToString(RevFormat))))) // extremely slow if (t.Term() != null) { while (t.Term().Field() == FieldName.RevisionLast) { d.Seek(t); while (d.Next()) { last_bits[d.Doc()] = true; } if (!t.Next()) { break; } } } // optimization, skip if we just using the head revision if (_revLast == Head) { return(last_bits); } BitArray first_bits = new BitArray(reader.MaxDoc(), true); t = reader.Terms(new Term("rev_first", (_revLast + 1).ToString(RevFormat))); //if (t.SkipTo((new Term("rev_first", (revision + 1).ToString(RevFormat))))) // extremely slow if (t.Term() != null) { while (t.Term().Field() == "rev_first") { d.Seek(t); while (d.Next()) { first_bits[d.Doc()] = false; } if (!t.Next()) { break; } } } return(last_bits.And(first_bits)); }
private static void IndexFile(IndexWriter writer, TermEnum uidIter, FileInfo file, Operation operation) { if (file.FullName.EndsWith(".html") || file.FullName.EndsWith(".htm") || file.FullName.EndsWith(".txt")) { // We've found a file we should index. if (operation == Operation.IncrementalReindex || operation == Operation.RemoveStale) { // We should only get here with an open uidIter. Debug.Assert(uidIter != null, "Expected uidIter != null for operation " + operation); var uid = HTMLDocument.Uid(file); // construct uid for doc while (uidIter.Term != null && uidIter.Term.Field == "uid" && String.CompareOrdinal(uidIter.Term.Text, uid) < 0) { if (operation == Operation.RemoveStale) { Console.Out.WriteLine("deleting " + HTMLDocument.Uid2url(uidIter.Term.Text)); writer.DeleteDocuments(uidIter.Term); } uidIter.Next(); } // The uidIter TermEnum should now be pointing at either // 1) a null term, meaning there are no more uids to check. // 2) a term matching the current file. // 3) a term not matching us. if (uidIter.Term != null && uidIter.Term.Field == "uid" && String.CompareOrdinal(uidIter.Term.Text, uid) == 0) { // uidIter points to the current document, we should move one // step ahead to keep state consistant, and carry on. uidIter.Next(); } else if (operation == Operation.IncrementalReindex) { // uidIter does not point to the current document, and we're // currently indexing documents. var doc = HTMLDocument.Document(file); Console.Out.WriteLine("adding " + doc.Get("path")); writer.AddDocument(doc); } } else { // We're doing a complete reindexing. We aren't using uidIter, // but for completeness we assert that it's null (as expected). Debug.Assert(uidIter == null, "Expected uidIter == null for operation == " + operation); var doc = HTMLDocument.Document(file); Console.Out.WriteLine("adding " + doc.Get("path")); writer.AddDocument(doc); } } }
//public IEnumerable<Article> LeaderResults(int DocID, List<DocumentClusters> Clusters) //{ // IEnumerable<Article> Articles = ArticleReader.ReadArticles(@"Data\cacm.all"); // List<Article> Results = new List<Article>(); //// foreach (DocumentClusters cluster in Clusters) //// { //// if (cluster.LeaderDocumentID == DocID) //// { //// foreach (DocumentRank ClusteredDoc in cluster) //// { //// foreach (Article article in Articles) //// { //// if (ClusteredDoc.DocumentID == article.Id) //// { //// Results.Add(article); //// } //// } //// //return Results; //// } //// } //// } // return Results; //} public IEnumerable <TermData> GetTermFrequency() { List <TermData> termlist = new List <TermData>(); if (System.IO.Directory.Exists(indexPath)) { luceneIndexDirectory = FSDirectory.Open(indexPath); // writer = new IndexWriter(luceneIndexDirectory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); //IndexReader reader = writer.GetReader(); IndexReader reader = IndexReader.Open(luceneIndexDirectory, true); TermEnum terms = reader.Terms(); while (terms.Next()) { TermData td = new TermData(); Term term = terms.Term; td.TermText = term.Text; td.TermFrequency = reader.DocFreq(term); termlist.Add(td); } reader.Dispose(); return(termlist); } else { throw new NullReferenceException("Index does not exist."); } }
/// <summary>Increments the enumeration to the next element. True if one exists. </summary> public override bool Next() { if (actualEnum == null) { return(false); // the actual enumerator is not initialized! } currentTerm = null; while (currentTerm == null) { if (EndEnum()) { return(false); } if (actualEnum.Next()) { Term term = actualEnum.Term; if (TermCompare(term)) { currentTerm = term; return(true); } } else { return(false); } } currentTerm = null; return(false); }
public void AddWithAnalyzerSuccess() { TestObject t = new TestObject() { Number = 9876, String = "Test Object 9876", }; Assert.AreEqual(0, writer.NumDocs()); writer.Add(t, new KeywordAnalyzer()); writer.Commit(); Assert.AreEqual(1, writer.NumDocs()); IndexReader reader = IndexReader.Open(dir, true); TermEnum terms = reader.Terms(); int nTerms = 0; while (terms.Next()) { if (String.Equals("String", terms.Term.Field)) { Assert.AreEqual("Test Object 9876", terms.Term.Text); nTerms++; } } Assert.AreEqual(1, nTerms); }
internal void InitOverview() { // populate analyzers searchTabPage.Init(); string indexName = indexPath + (_readOnly ? " (R)" : ""); statusBar.Panels[0].Text = resources.GetString("StatusIndexName") + indexName; List <string> fieldNames = new List <string>(indexReader.GetFieldNames(IndexReader.FieldOption.ALL)); SetFieldNames(fieldNames); searchTabPage.SetFieldnames(fieldNames); overviewTabPage.Init(indexName); TermEnum termsEnum = indexReader.Terms(); //TODO: Duplicated int i = 0; while (termsEnum.Next()) { i++; } termsEnum.Close(); documentsTabPage.Init(i, indexReader.NumDocs()); }
/// <summary> /// 查找指定数目的Term /// </summary> /// <param name="num"></param> /// <returns></returns> public TermModel[] FindTerms(int num) { num++; TermInfoQueue queue = new TermInfoQueue(num); TermEnum enum2 = open.Reader.Terms(); int count = 0; while (enum2.Next()) { string str = enum2.Term().Field(); if ((currentField != null) && (!str.Equals(currentField))) { continue; } if (enum2.DocFreq() > count) { queue.Put(new TermModel(enum2.Term(), enum2.DocFreq())); if (queue.Size() < num) { continue; } queue.Pop(); count = ((TermModel)queue.Top()).Count; } } enum2.Close(); TermModel[] modleArray = new TermModel[queue.Size()]; for (int i = 0; i < modleArray.Length; i++) { modleArray[(modleArray.Length - i) - 1] = (TermModel)queue.Pop(); } return(modleArray); }
protected internal override System.Object CreateValue(IndexReader reader, Entry entryKey) { System.String field = StringHelper.Intern(entryKey.field); System.String[] retArray = new System.String[reader.MaxDoc]; TermDocs termDocs = reader.TermDocs(); TermEnum termEnum = reader.Terms(new Term(field)); try { do { Term term = termEnum.Term; if (term == null || (System.Object)term.Field != (System.Object)field) { break; } System.String termval = term.Text; termDocs.Seek(termEnum); while (termDocs.Next()) { retArray[termDocs.Doc] = termval; } }while (termEnum.Next()); } finally { termDocs.Close(); termEnum.Close(); } return(retArray); }
public void AddSuccess() { TestObject t = new TestObject() { Number = 1234, String = "Test Object 1234", }; Assert.AreEqual(0, writer.NumDocs()); writer.Add(t); writer.Commit(); Assert.AreEqual(1, writer.NumDocs()); IndexReader reader = IndexReader.Open(dir, true); TermEnum terms = reader.Terms(); HashSet <string> expectedTerms = new HashSet <string>(new string[] { "test", "object", "1234" }); while (terms.Next()) { if (String.Equals("String", terms.Term.Field)) { Assert.True(expectedTerms.Contains(terms.Term.Text)); expectedTerms.Remove(terms.Term.Text); } } Assert.AreEqual(0, expectedTerms.Count); }
public IEnumerable <TermInfo> GetTerms() { var directory = _openIndexModel.Directory; IndexReader indexReader = null; TermEnum terms = null; try { indexReader = IndexReader.Open(directory, true); // ToDo should i open this only once terms = indexReader.Terms(); while (terms.Next()) { System.Threading.Thread.Sleep(2); var term = terms.Term(); yield return(new TermInfo { Term = term.Text(), Field = term.Field(), Frequency = terms.DocFreq() }); } } finally { if (indexReader != null) { indexReader.Close(); } if (terms != null) { terms.Close(); } } yield break; }
public virtual void TestTerms() { TermEnum terms = _reader.Terms(); Assert.IsTrue(terms != null); while (terms.Next() == true) { Term term = terms.Term; Assert.IsTrue(term != null); //System.out.println("Term: " + term); System.String fieldValue = (System.String)DocHelper.NameValues[term.Field]; Assert.IsTrue(fieldValue.IndexOf(term.Text) != -1); } TermDocs termDocs = _reader.TermDocs(); Assert.IsTrue(termDocs != null); termDocs.Seek(new Term(DocHelper.TextField1Key, "field")); Assert.IsTrue(termDocs.Next() == true); termDocs.Seek(new Term(DocHelper.NoNormsKey, DocHelper.NoNormsText)); Assert.IsTrue(termDocs.Next() == true); TermPositions positions = _reader.TermPositions(); positions.Seek(new Term(DocHelper.TextField1Key, "field")); Assert.IsTrue(positions != null); Assert.IsTrue(positions.Doc == 0); Assert.IsTrue(positions.NextPosition() >= 0); }
protected internal override System.Object CreateValue(IndexReader reader, Entry entryKey) { Entry entry = entryKey; System.String field = entry.field; FloatParser parser = (FloatParser)entry.custom; if (parser == null) { try { return(wrapper.GetFloats(reader, field, Lucene.Net.Search.FieldCache_Fields.DEFAULT_FLOAT_PARSER)); } catch (System.FormatException) { return(wrapper.GetFloats(reader, field, Lucene.Net.Search.FieldCache_Fields.NUMERIC_UTILS_FLOAT_PARSER)); } } float[] retArray = null; TermDocs termDocs = reader.TermDocs(); TermEnum termEnum = reader.Terms(new Term(field)); try { do { Term term = termEnum.Term; if (term == null || (System.Object)term.Field != (System.Object)field) { break; } float termval = parser.ParseFloat(term.Text); if (retArray == null) { // late init retArray = new float[reader.MaxDoc]; } termDocs.Seek(termEnum); while (termDocs.Next()) { retArray[termDocs.Doc] = termval; } }while (termEnum.Next()); } catch (StopFillCacheException) { } finally { termDocs.Close(); termEnum.Close(); } if (retArray == null) { // no values retArray = new float[reader.MaxDoc]; } return(retArray); }
public override DocIdSet GetDocIdSet(IndexReader reader) { TermEnum enumerator = query.GetEnum(reader); try { // if current term in enum is null, the enum is empty -> shortcut if (enumerator.Term == null) { return(DocIdSet.EMPTY_DOCIDSET); } // else fill into an OpenBitSet OpenBitSet bitSet = new OpenBitSet(reader.MaxDoc); int[] docs = new int[32]; int[] freqs = new int[32]; TermDocs termDocs = reader.TermDocs(); try { int termCount = 0; do { Term term = enumerator.Term; if (term == null) { break; } termCount++; termDocs.Seek(term); while (true) { int count = termDocs.Read(docs, freqs); if (count != 0) { for (int i = 0; i < count; i++) { bitSet.Set(docs[i]); } } else { break; } } } while (enumerator.Next()); query.IncTotalNumberOfTerms(termCount); // {{Aroush-2.9}} is the use of 'temp' as is right? } finally { termDocs.Close(); } return(bitSet); } finally { enumerator.Close(); } }
public void UpdateWithKindSuccess() { TestObject t = new TestObject() { Number = 1234, String = "Test Object 1234", }; Assert.AreEqual(0, writer.NumDocs()); writer.Add <object>(t); writer.Commit(); Assert.AreEqual(1, writer.NumDocs()); TestObject t2 = new TestObject() { Number = 2345, String = "Something Else 2345", }; writer.Update(t2, DocumentObjectTypeKind.Static, new TermQuery(new Term("String", "1234"))); writer.Commit(); Assert.AreEqual(2, writer.NumDocs()); writer.DeleteDocuments <object>(new MatchAllDocsQuery()); writer.Commit(); Assert.AreEqual(1, writer.NumDocs()); TestObject t3 = new TestObject() { Number = 3456, String = "Completely Different 3456", }; writer.Update(t3, DocumentObjectTypeKind.Actual, new TermQuery(new Term("String", "2345"))); writer.Commit(); Assert.AreEqual(1, writer.NumDocs()); IndexReader reader = IndexReader.Open(dir, true); TermEnum terms = reader.Terms(); HashSet <string> expectedTerms = new HashSet <string>(new string[] { "completely", "different", "3456" }); while (terms.Next()) { if (String.Equals("String", terms.Term.Field)) { if (expectedTerms.Contains(terms.Term.Text)) { expectedTerms.Remove(terms.Term.Text); } } } Assert.AreEqual(0, expectedTerms.Count); }
public void UpdateWithKindWithAnalyzerSuccess() { TestObject t = new TestObject() { Number = 1234, String = "Test Object 1234", }; Assert.AreEqual(0, writer.NumDocs()); writer.Add <object>(t, new KeywordAnalyzer()); writer.Commit(); Assert.AreEqual(1, writer.NumDocs()); TestObject t2 = new TestObject() { Number = 2345, String = "Something Else 2345", }; writer.Update(t2, DocumentObjectTypeKind.Static, new TermQuery(new Term("String", "Test Object 1234")), new KeywordAnalyzer()); writer.Commit(); Assert.AreEqual(2, writer.NumDocs()); writer.DeleteDocuments <object>(new MatchAllDocsQuery()); writer.Commit(); Assert.AreEqual(1, writer.NumDocs()); TestObject t3 = new TestObject() { Number = 3456, String = "Completely Different 3456", }; writer.Update(t3, DocumentObjectTypeKind.Actual, new TermQuery(new Term("String", "Something Else 2345")), new KeywordAnalyzer()); writer.Commit(); Assert.AreEqual(1, writer.NumDocs()); IndexReader reader = IndexReader.Open(dir, true); TermEnum terms = reader.Terms(); int nTerms = 0; while (terms.Next()) { if (String.Equals("String", terms.Term.Field)) { if (String.Equals("Completely Different 3456", terms.Term.Text)) { nTerms++; } } } Assert.AreEqual(1, nTerms); }
public void DisplayInternalIndex() { Directory mainIndexDir = SearchFactory.GetSearchFactory(sessions).GetDirectoryProvider(typeof(Book)).Directory; IndexReader reader = IndexReader.Open(mainIndexDir); TermEnum terms = reader.Terms(); while (terms.Next()) { Term term = terms.Term(); log.Debug("In " + term.Field() + ": " + term.Text()); } }
public Term Next() { //in Lucene 3, a call to reader.terms(term) is already pre-positioned, you don't call next first if (initialState) { initialState = false; return(Term()); } else { return(termEnum.Next() ? Term() : null); } }
internal bool Next() { if (termEnum.Next()) { term = termEnum.Term(); return(true); } else { term = null; return(false); } }
static void CheckIsHeadOnly(IndexSearcher searcher) { TermEnum t = searcher.Reader.Terms(new Term(FieldName.RevisionLast, "0")); Assert.IsNotNull(t); Assert.AreEqual(FieldName.RevisionLast, t.Term().Field()); while (t.Term().Field() == FieldName.RevisionLast) { Assert.AreEqual(Revision.HeadString, t.Term().Text()); if (t.Next()) { continue; } } }
/// <summary> /// Converts TermEnum to an enumerable of Term instances. /// </summary> /// <param name="termEnum">TermEnum instance which should be converted to an IEnumerable</param> /// <returns>Enumerable of Term objects.</returns> public static IEnumerable <Term> ToEnumerable(this TermEnum termEnum) { if (termEnum != null) { using ( termEnum ) { do { if (termEnum.Term != null) { yield return(termEnum.Term); } }while(termEnum.Next()); } } }
public HashSet <String> GetListOfItemsNameFromIndex() { IndexReader reader = IndexReader.Open(DirectoryFs, true); TermEnum terms = reader.Terms(); HashSet <String> uniqueTerms = new HashSet <String>(); while (terms.Next()) { Term term = terms.Term; if (term.Field.Equals("LineText")) { uniqueTerms.Add(term.Text); } } return(uniqueTerms); }
protected int GetDictValueCount(IndexReader reader, string field) { int ret = 0; using (TermEnum termEnum = reader.Terms(new Term(field, ""))) { do { Term term = termEnum.Term; if (term == null || string.CompareOrdinal(term.Field, field) != 0) { break; } ret++; } while (termEnum.Next()); } return(ret); }
/// <summary> /// 查找指定字段Term /// </summary> /// <param name="field"></param> /// <param name="text"></param> /// <param name="current"></param> /// <returns></returns> public TermModel FindTerm(string field, string text, bool current) { TermEnum enum2 = open.Reader.Terms(); if (enum2.SkipTo(new Term(field, text))) { TermModel modle2 = null; while ((!current && enum2.Next() && field.Equals(enum2.Term().Field())) || current) { modle2 = new TermModel(enum2.Term(), enum2.DocFreq()); break; } enum2.Close(); return(modle2); } return(null); }
/// <summary> /// Converts TermEnum to an enumerable of Term instances. Stops enumeration when the Term does not belong to the specified field. /// </summary> /// <param name="termEnum">TermEnum instance which should be converted to an IEnumerable</param> /// <param name="field">Field which should be enumerated.</param> /// <returns>Enumerable of Term objects.</returns> public static IEnumerable <Term> ToEnumerable(this TermEnum termEnum, string field) { if (termEnum != null) { do { Term term = termEnum.Term; if (term != null) { if (term.Field != field) { break; } yield return(term); } }while(termEnum.Next()); } }
private OpenBitSet FastBits(IndexReader reader) { OpenBitSet bits = new OpenBitSet(reader.MaxDoc); bits.Set(0, reader.MaxDoc); //assume all are valid Term startTerm = new Term(fieldName); TermEnum te = reader.Terms(startTerm); if (te != null) { Term currTerm = te.Term; while ((currTerm != null) && (currTerm.Field == startTerm.Field)) //term fieldnames are interned { if (te.DocFreq() > 1) { int lastDoc = -1; //unset potential duplicates TermDocs td = reader.TermDocs(currTerm); td.Next(); if (keepMode == KM_USE_FIRST_OCCURRENCE) { td.Next(); } do { lastDoc = td.Doc; bits.Clear(lastDoc); } while (td.Next()); if (keepMode == KM_USE_LAST_OCCURRENCE) { //restore the last bit bits.Set(lastDoc); } } if (!te.Next()) { break; } currTerm = te.Term; } } return(bits); }
private void buttonFirstTerm_Click(object sender, System.EventArgs e) { if (_luke.IndexReader == null) { _luke.ShowStatus(_luke.resources.GetString("NoIndex")); return; } try { TermEnum te = _luke.IndexReader.Terms(); te.Next(); Term t = te.Term(); _ShowTerm(t); } catch (Exception exc) { _luke.ShowStatus(exc.Message); } }
public IEnumerable <string> Terms(string field) { if (!index.Storage.Exists) { yield break; } IndexReader reader = index.Storage.OpenReader(); TermEnum terms = reader.Terms(new Term(field)); do { if (terms.Term.Field != field) { yield break; } yield return(terms.Term.Text); } while (terms.Next()); }
protected internal override System.Object CreateValue(IndexReader reader, Entry entryKey) { Entry entry = entryKey; System.String field = entry.field; ShortParser parser = (ShortParser)entry.custom; if (parser == null) { return(wrapper.GetShorts(reader, field, Lucene.Net.Search.FieldCache_Fields.DEFAULT_SHORT_PARSER)); } short[] retArray = new short[reader.MaxDoc]; TermDocs termDocs = reader.TermDocs(); TermEnum termEnum = reader.Terms(new Term(field)); try { do { Term term = termEnum.Term; if (term == null || (System.Object)term.Field != (System.Object)field) { break; } short termval = parser.ParseShort(term.Text); termDocs.Seek(termEnum); while (termDocs.Next()) { retArray[termDocs.Doc] = termval; } }while (termEnum.Next()); } catch (StopFillCacheException) { } finally { termDocs.Close(); termEnum.Close(); } return(retArray); }
internal void Init(string indexName) { TermEnum termsEnum = _luke.IndexReader.Terms(); //TODO: Duplicated int i = 0; while (termsEnum.Next()) { i++; } termsEnum.Close(); TermsNumber = i; IndexName = indexName; IndexVersion = IndexReader.GetCurrentVersion(_luke.Directory).ToString(); HasDeletions = _luke.IndexReader.HasDeletions().ToString(); DocumentsNumber = _luke.IndexReader.NumDocs(); LastModified = IndexReader.LastModified(_luke.Directory); ShowTopTerms(); }
private IEnumerable <string> GetIndexTerms() { List <string> termsList = new List <string>(); using (Directory luceneIndexDirectory = FSDirectory.Open(indexPath)) { using (IndexReader reader = IndexReader.Open(luceneIndexDirectory, true)) { TermEnum terms = reader.Terms(); while (terms.Next()) { Term term = terms.Term; string termText = term.Text; termsList.Add(termText); } return(termsList); } } }