// Tokenizes the fields of a document into Postings. private void InvertDocument(Document doc) { foreach (Field field in doc.Fields()) { System.String fieldName = field.Name(); int fieldNumber = fieldInfos.FieldNumber(fieldName); int length = fieldLengths[fieldNumber]; // length of Field int position = fieldPositions[fieldNumber]; // position in Field if (field.IsIndexed()) { if (!field.IsTokenized()) { // un-tokenized Field AddPosition(fieldName, field.StringValue(), position++); length++; } else { System.IO.TextReader reader; // find or make Reader if (field.ReaderValue() != null) { reader = field.ReaderValue(); } else if (field.StringValue() != null) { reader = new System.IO.StringReader(field.StringValue()); } else { throw new System.ArgumentException("Field must have either String or Reader value"); } // Tokenize Field and add to postingTable TokenStream stream = analyzer.TokenStream(fieldName, reader); try { for (Token t = stream.Next(); t != null; t = stream.Next()) { position += (t.GetPositionIncrement() - 1); AddPosition(fieldName, t.TermText(), position++); if (++length > maxFieldLength) { break; } } } finally { stream.Close(); } } fieldLengths[fieldNumber] = length; // save Field length fieldPositions[fieldNumber] = position; // save Field position fieldBoosts[fieldNumber] *= field.GetBoost(); } } }
public QueryTermVector(System.String queryString, Analyzer analyzer) { if (analyzer != null) { TokenStream stream = analyzer.TokenStream("", new System.IO.StringReader(queryString)); if (stream != null) { Token next = null; System.Collections.ArrayList terms = new System.Collections.ArrayList(); try { while ((next = stream.Next()) != null) { terms.Add(next.TermText()); } ProcessTerms((System.String[]) terms.ToArray(typeof(System.String))); } catch (System.IO.IOException) { } } } }
public QueryTermVector(System.String queryString, Analyzer analyzer) { if (analyzer != null) { TokenStream stream = analyzer.TokenStream("", new System.IO.StringReader(queryString)); if (stream != null) { Token next = null; System.Collections.ArrayList terms = new System.Collections.ArrayList(); try { while ((next = stream.Next()) != null) { terms.Add(next.TermText()); } ProcessTerms((System.String[])terms.ToArray(typeof(System.String))); } catch (System.IO.IOException) { } } } }