public virtual void TestLazyFields() { Assert.IsTrue(dir != null); Assert.IsTrue(fieldInfos != null); FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos, null); Assert.IsTrue(reader != null); Assert.IsTrue(reader.Size() == 1); ISet <string> loadFieldNames = Support.Compatibility.SetFactory.CreateHashSet <string>(); loadFieldNames.Add(DocHelper.TEXT_FIELD_1_KEY); loadFieldNames.Add(DocHelper.TEXT_FIELD_UTF1_KEY); ISet <string> lazyFieldNames = Support.Compatibility.SetFactory.CreateHashSet <string>(); //new String[]{DocHelper.LARGE_LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_BINARY_KEY}; lazyFieldNames.Add(DocHelper.LARGE_LAZY_FIELD_KEY); lazyFieldNames.Add(DocHelper.LAZY_FIELD_KEY); lazyFieldNames.Add(DocHelper.LAZY_FIELD_BINARY_KEY); lazyFieldNames.Add(DocHelper.TEXT_FIELD_UTF2_KEY); SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(loadFieldNames, lazyFieldNames); Document doc = reader.Doc(0, fieldSelector, null); Assert.IsTrue(doc != null, "doc is null and it shouldn't be"); IFieldable field = doc.GetFieldable(DocHelper.LAZY_FIELD_KEY); Assert.IsTrue(field != null, "field is null and it shouldn't be"); Assert.IsTrue(field.IsLazy, "field is not lazy and it should be"); System.String value_Renamed = field.StringValue(null); Assert.IsTrue(value_Renamed != null, "value is null and it shouldn't be"); Assert.IsTrue(value_Renamed.Equals(DocHelper.LAZY_FIELD_TEXT) == true, value_Renamed + " is not equal to " + DocHelper.LAZY_FIELD_TEXT); field = doc.GetFieldable(DocHelper.TEXT_FIELD_1_KEY); Assert.IsTrue(field != null, "field is null and it shouldn't be"); Assert.IsTrue(field.IsLazy == false, "Field is lazy and it should not be"); field = doc.GetFieldable(DocHelper.TEXT_FIELD_UTF1_KEY); Assert.IsTrue(field != null, "field is null and it shouldn't be"); Assert.IsTrue(field.IsLazy == false, "Field is lazy and it should not be"); Assert.IsTrue(field.StringValue(null).Equals(DocHelper.FIELD_UTF1_TEXT) == true, field.StringValue(null) + " is not equal to " + DocHelper.FIELD_UTF1_TEXT); field = doc.GetFieldable(DocHelper.TEXT_FIELD_UTF2_KEY); Assert.IsTrue(field != null, "field is null and it shouldn't be"); Assert.IsTrue(field.IsLazy == true, "Field is lazy and it should not be"); Assert.IsTrue(field.StringValue(null).Equals(DocHelper.FIELD_UTF2_TEXT) == true, field.StringValue(null) + " is not equal to " + DocHelper.FIELD_UTF2_TEXT); field = doc.GetFieldable(DocHelper.LAZY_FIELD_BINARY_KEY); Assert.IsTrue(field != null, "field is null and it shouldn't be"); Assert.IsTrue(field.StringValue(null) == null, "stringValue isn't null for lazy binary field"); byte[] bytes = field.GetBinaryValue(null); Assert.IsTrue(bytes != null, "bytes is null and it shouldn't be"); Assert.IsTrue(DocHelper.LAZY_FIELD_BINARY_BYTES.Length == bytes.Length, ""); for (int i = 0; i < bytes.Length; i++) { Assert.IsTrue(bytes[i] == DocHelper.LAZY_FIELD_BINARY_BYTES[i], "byte[" + i + "] is mismatched"); } }
internal void WriteField(FieldInfo fi, IFieldable field, IState state) { fieldsStream.WriteVInt(fi.number); byte bits = 0; if (field.IsTokenized) { bits |= FieldsWriter.FIELD_IS_TOKENIZED; } if (field.IsBinary) { bits |= FieldsWriter.FIELD_IS_BINARY; } fieldsStream.WriteByte(bits); // compression is disabled for the current field if (field.IsBinary) { byte[] data = field.GetBinaryValue(state); int len = field.BinaryLength; int offset = field.BinaryOffset; fieldsStream.WriteVInt(len); fieldsStream.WriteBytes(data, offset, len); } else { fieldsStream.WriteString(field.StringValue(state)); } }
private static object ConvertType(JsonOperationContext context, IFieldable field, FieldType fieldType, IState state) { if (field.IsBinary) { ThrowBinaryValuesNotSupported(); } var stringValue = field.StringValue(state); if (stringValue == Constants.Documents.Indexing.Fields.NullValue || stringValue == null) { return(null); } if (stringValue == Constants.Documents.Indexing.Fields.EmptyString || stringValue == string.Empty) { return(string.Empty); } if (fieldType.IsJson == false) { return(stringValue); } var bytes = Encodings.Utf8.GetBytes(stringValue); var ms = new MemoryStream(bytes); return(context.ReadForMemory(ms, field.Name)); }
public virtual void TestLazyFieldsAfterClose() { Assert.IsTrue(dir != null); Assert.IsTrue(fieldInfos != null); FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos, null); Assert.IsTrue(reader != null); Assert.IsTrue(reader.Size() == 1); ISet <string> loadFieldNames = Support.Compatibility.SetFactory.CreateHashSet <string>(); loadFieldNames.Add(DocHelper.TEXT_FIELD_1_KEY); loadFieldNames.Add(DocHelper.TEXT_FIELD_UTF1_KEY); ISet <string> lazyFieldNames = Support.Compatibility.SetFactory.CreateHashSet <string>(); lazyFieldNames.Add(DocHelper.LARGE_LAZY_FIELD_KEY); lazyFieldNames.Add(DocHelper.LAZY_FIELD_KEY); lazyFieldNames.Add(DocHelper.LAZY_FIELD_BINARY_KEY); lazyFieldNames.Add(DocHelper.TEXT_FIELD_UTF2_KEY); SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(loadFieldNames, lazyFieldNames); Document doc = reader.Doc(0, fieldSelector, null); Assert.IsTrue(doc != null, "doc is null and it shouldn't be"); IFieldable field = doc.GetFieldable(DocHelper.LAZY_FIELD_KEY); Assert.IsTrue(field != null, "field is null and it shouldn't be"); Assert.IsTrue(field.IsLazy, "field is not lazy and it should be"); reader.Dispose(); Assert.Throws <AlreadyClosedException>(() => { var value = field.StringValue(null); }, "did not hit AlreadyClosedException as expected"); }
internal virtual void ValidateField(IFieldable f) { System.String val = f.StringValue(null); if (!val.StartsWith("^") || !val.EndsWith("$")) { throw new System.SystemException("Invalid field:" + f.ToString() + " val=" + val); } }
public virtual void TestLoadSize() { FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos, null); Document doc; doc = reader.Doc(0, new AnonymousClassFieldSelector(this), null); IFieldable f1 = doc.GetFieldable(DocHelper.TEXT_FIELD_1_KEY); IFieldable f3 = doc.GetFieldable(DocHelper.TEXT_FIELD_3_KEY); IFieldable fb = doc.GetFieldable(DocHelper.LAZY_FIELD_BINARY_KEY); Assert.IsTrue(f1.IsBinary); Assert.IsTrue(!f3.IsBinary); Assert.IsTrue(fb.IsBinary); AssertSizeEquals(2 * DocHelper.FIELD_1_TEXT.Length, f1.GetBinaryValue(null)); Assert.AreEqual(DocHelper.FIELD_3_TEXT, f3.StringValue(null)); AssertSizeEquals(DocHelper.LAZY_FIELD_BINARY_BYTES.Length, fb.GetBinaryValue(null)); reader.Dispose(); }
private static object ConvertType(JsonOperationContext context, IFieldable field, FieldType fieldType, IState state) { if (field.IsBinary) { ThrowBinaryValuesNotSupported(); } var stringValue = field.StringValue(state); if (stringValue == null) { return(null); } if (stringValue == string.Empty) { return(string.Empty); } if (field.IsTokenized == false) { // NULL_VALUE and EMPTY_STRING fields aren't tokenized // this will prevent converting fields with a "NULL_VALUE" string to null switch (stringValue) { case Constants.Documents.Indexing.Fields.NullValue: return(null); case Constants.Documents.Indexing.Fields.EmptyString: return(string.Empty); } } if (fieldType.IsJson == false) { return(stringValue); } return(context.ReadForMemory(stringValue, field.Name)); }
public virtual void DoTest(int[] docs) { if (dataset.Count == 0) { for (int i = 0; i < data.Length; i++) { dataset.Add(data[i], data[i]); } } Directory dir = MakeIndex(); IndexReader reader = IndexReader.Open(dir, true, null); for (int i = 0; i < docs.Length; i++) { Document d = reader.Document(docs[i], SELECTOR, null); d.Get(MAGIC_FIELD, null); var fields = d.GetFields(); for (System.Collections.IEnumerator fi = fields.GetEnumerator(); fi.MoveNext();) { IFieldable f = null; try { f = (IFieldable)fi.Current; System.String fname = f.Name; System.String fval = f.StringValue(null); Assert.IsNotNull(docs[i] + " FIELD: " + fname, fval); System.String[] vals = fval.Split('#'); Assert.IsTrue(dataset.Contains(vals[0]) || dataset.Contains(vals[1]), "FIELD:" + fname + ",VAL:" + fval); } catch (System.Exception e) { throw new Exception(docs[i] + " WTF: " + f.Name, e); } } } reader.Close(); }
public static void VerifyEquals(Document d1, Document d2) { var ff1 = d1.GetFields().OrderBy(x => x.Name).ToList(); var ff2 = d2.GetFields().OrderBy(x => x.Name).ToList(); if (ff1.Count != ff2.Count) { System.Console.Out.WriteLine("[" + String.Join(",", ff1.Select(x => x.ToString()).ToArray()) + "]"); System.Console.Out.WriteLine("[" + String.Join(",", ff2.Select(x => x.ToString()).ToArray()) + "]"); Assert.AreEqual(ff1.Count, ff2.Count); } for (int i = 0; i < ff1.Count; i++) { IFieldable f1 = (IFieldable)ff1[i]; IFieldable f2 = (IFieldable)ff2[i]; if (f1.IsBinary) { System.Diagnostics.Debug.Assert(f2.IsBinary); //TODO } else { System.String s1 = f1.StringValue(null); System.String s2 = f2.StringValue(null); if (!s1.Equals(s2)) { // print out whole doc on error System.Console.Out.WriteLine("[" + String.Join(",", ff1.Select(x => x.ToString()).ToArray()) + "]"); System.Console.Out.WriteLine("[" + String.Join(",", ff2.Select(x => x.ToString()).ToArray()) + "]"); Assert.AreEqual(s1, s2); } } } }
private void assertCompressedFields29(Directory dir, bool shouldStillBeCompressed) { int count = 0; int TEXT_PLAIN_LENGTH = TEXT_TO_COMPRESS.Length * 2; // FieldSelectorResult.SIZE returns 2*number_of_chars for String fields: int BINARY_PLAIN_LENGTH = BINARY_TO_COMPRESS.Length; IndexReader reader = IndexReader.Open(dir, true, null); try { // look into sub readers and check if raw merge is on/off var readers = new System.Collections.Generic.List <IndexReader>(); ReaderUtil.GatherSubReaders(readers, reader); foreach (IndexReader ir in readers) { FieldsReader fr = ((SegmentReader)ir).GetFieldsReader(null); Assert.IsTrue(shouldStillBeCompressed != fr.CanReadRawDocs(), "for a 2.9 index, FieldsReader.canReadRawDocs() must be false and other way round for a trunk index"); } // test that decompression works correctly for (int i = 0; i < reader.MaxDoc; i++) { if (!reader.IsDeleted(i)) { Document d = reader.Document(i, null); if (d.Get("content3", null) != null) { continue; } count++; IFieldable compressed = d.GetFieldable("compressed"); if (int.Parse(d.Get("id", null)) % 2 == 0) { Assert.IsFalse(compressed.IsBinary); Assert.AreEqual(TEXT_TO_COMPRESS, compressed.StringValue(null), "incorrectly decompressed string"); } else { Assert.IsTrue(compressed.IsBinary); Assert.IsTrue(BINARY_TO_COMPRESS.SequenceEqual(compressed.GetBinaryValue(null)), "incorrectly decompressed binary"); } } } //check if field was decompressed after optimize for (int i = 0; i < reader.MaxDoc; i++) { if (!reader.IsDeleted(i)) { Document d = reader.Document(i, new AnonymousFieldSelector(), null); if (d.Get("content3", null) != null) { continue; } count++; // read the size from the binary value using BinaryReader (this prevents us from doing the shift ops ourselves): // ugh, Java uses Big-Endian streams, so we need to do it manually. byte[] encodedSize = d.GetFieldable("compressed").GetBinaryValue(null).Take(4).Reverse().ToArray(); int actualSize = BitConverter.ToInt32(encodedSize, 0); int compressedSize = int.Parse(d.Get("compressedSize", null)); bool binary = int.Parse(d.Get("id", null)) % 2 > 0; int shouldSize = shouldStillBeCompressed ? compressedSize : (binary ? BINARY_PLAIN_LENGTH : TEXT_PLAIN_LENGTH); Assert.AreEqual(shouldSize, actualSize, "size incorrect"); if (!shouldStillBeCompressed) { Assert.IsFalse(compressedSize == actualSize, "uncompressed field should have another size than recorded in index"); } } } Assert.AreEqual(34 * 2, count, "correct number of tests"); } finally { reader.Dispose(); } }
public virtual void TestLazyPerformance() { System.String tmpIODir = AppSettings.Get("tempDir", Path.GetTempPath()); System.String path = tmpIODir + System.IO.Path.DirectorySeparatorChar.ToString() + "lazyDir" + Guid.NewGuid(); System.IO.DirectoryInfo file = new System.IO.DirectoryInfo(path); _TestUtil.RmDir(file); FSDirectory tmpDir = FSDirectory.Open(file); Assert.IsTrue(tmpDir != null); IndexWriter writer = new IndexWriter(tmpDir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null); writer.UseCompoundFile = false; writer.AddDocument(testDoc, null); writer.Close(); Assert.IsTrue(fieldInfos != null); FieldsReader reader; long lazyTime = 0; long regularTime = 0; int length = 50; ISet <string> lazyFieldNames = Support.Compatibility.SetFactory.CreateHashSet <string>(); lazyFieldNames.Add(DocHelper.LARGE_LAZY_FIELD_KEY); SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(Support.Compatibility.SetFactory.CreateHashSet <string>(), lazyFieldNames); for (int i = 0; i < length; i++) { reader = new FieldsReader(tmpDir, TEST_SEGMENT_NAME, fieldInfos, null); Assert.IsTrue(reader != null); Assert.IsTrue(reader.Size() == 1); Document doc; doc = reader.Doc(0, null, null); //Load all of them Assert.IsTrue(doc != null, "doc is null and it shouldn't be"); IFieldable field = doc.GetFieldable(DocHelper.LARGE_LAZY_FIELD_KEY); Assert.IsTrue(field.IsLazy == false, "field is lazy"); System.String value_Renamed; long start; long finish; start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); //On my machine this was always 0ms. value_Renamed = field.StringValue(null); finish = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); Assert.IsTrue(value_Renamed != null, "value is null and it shouldn't be"); Assert.IsTrue(field != null, "field is null and it shouldn't be"); regularTime += (finish - start); reader.Dispose(); reader = null; doc = null; //Hmmm, are we still in cache??? System.GC.Collect(); reader = new FieldsReader(tmpDir, TEST_SEGMENT_NAME, fieldInfos, null); doc = reader.Doc(0, fieldSelector, null); field = doc.GetFieldable(DocHelper.LARGE_LAZY_FIELD_KEY); Assert.IsTrue(field.IsLazy == true, "field is not lazy"); start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); //On my machine this took around 50 - 70ms value_Renamed = field.StringValue(null); finish = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); Assert.IsTrue(value_Renamed != null, "value is null and it shouldn't be"); lazyTime += (finish - start); reader.Dispose(); } System.Console.Out.WriteLine("Average Non-lazy time (should be very close to zero): " + regularTime / length + " ms for " + length + " reads"); System.Console.Out.WriteLine("Average Lazy Time (should be greater than zero): " + lazyTime / length + " ms for " + length + " reads"); }
public override void ProcessFields(IFieldable[] fields, int count, IState state) { fieldState.Reset(docState.doc.Boost); int maxFieldLength = docState.maxFieldLength; bool doInvert = consumer.Start(fields, count); for (int i = 0; i < count; i++) { IFieldable field = fields[i]; // TODO FI: this should be "genericized" to querying // consumer if it wants to see this particular field // tokenized. if (field.IsIndexed && doInvert) { bool anyToken; if (fieldState.length > 0) { fieldState.position += docState.analyzer.GetPositionIncrementGap(fieldInfo.name); } if (!field.IsTokenized) { // un-tokenized field System.String stringValue = field.StringValue(state); int valueLength = stringValue.Length; perThread.singleToken.Reinit(stringValue, 0, valueLength); fieldState.attributeSource = perThread.singleToken; consumer.Start(field); bool success = false; try { consumer.Add(); success = true; } finally { if (!success) { docState.docWriter.SetAborting(); } } fieldState.offset += valueLength; fieldState.length++; fieldState.position++; anyToken = valueLength > 0; } else { // tokenized field TokenStream stream; TokenStream streamValue = field.TokenStreamValue; if (streamValue != null) { stream = streamValue; } else { // the field does not have a TokenStream, // so we have to obtain one from the analyzer System.IO.TextReader reader; // find or make Reader System.IO.TextReader readerValue = field.ReaderValue; if (readerValue != null) { reader = readerValue; } else { System.String stringValue = field.StringValue(state); if (stringValue == null) { throw new System.ArgumentException("field must have either TokenStream, String or Reader value"); } perThread.stringReader.Init(stringValue); reader = perThread.stringReader; } // Tokenize field and add to postingTable stream = docState.analyzer.ReusableTokenStream(fieldInfo.name, reader); } // reset the TokenStream to the first token stream.Reset(); int startLength = fieldState.length; try { int offsetEnd = fieldState.offset - 1; bool hasMoreTokens = stream.IncrementToken(); fieldState.attributeSource = stream; IOffsetAttribute offsetAttribute = fieldState.attributeSource.AddAttribute <IOffsetAttribute>(); IPositionIncrementAttribute posIncrAttribute = fieldState.attributeSource.AddAttribute <IPositionIncrementAttribute>(); consumer.Start(field); for (; ;) { // If we hit an exception in stream.next below // (which is fairly common, eg if analyzer // chokes on a given document), then it's // non-aborting and (above) this one document // will be marked as deleted, but still // consume a docID if (!hasMoreTokens) { break; } int posIncr = posIncrAttribute.PositionIncrement; fieldState.position += posIncr; if (fieldState.position > 0) { fieldState.position--; } if (posIncr == 0) { fieldState.numOverlap++; } bool success = false; try { // If we hit an exception in here, we abort // all buffered documents since the last // flush, on the likelihood that the // internal state of the consumer is now // corrupt and should not be flushed to a // new segment: consumer.Add(); success = true; } finally { if (!success) { docState.docWriter.SetAborting(); } } fieldState.position++; offsetEnd = fieldState.offset + offsetAttribute.EndOffset; if (++fieldState.length >= maxFieldLength) { if (docState.infoStream != null) { docState.infoStream.WriteLine("maxFieldLength " + maxFieldLength + " reached for field " + fieldInfo.name + ", ignoring following tokens"); } break; } hasMoreTokens = stream.IncrementToken(); } // trigger streams to perform end-of-stream operations stream.End(); fieldState.offset += offsetAttribute.EndOffset; anyToken = fieldState.length > startLength; } finally { stream.Close(); } } if (anyToken) { fieldState.offset += docState.analyzer.GetOffsetGap(field); } fieldState.boost *= field.Boost; } // LUCENE-2387: don't hang onto the field, so GC can // reclaim fields[i] = null; } consumer.Finish(); endConsumer.Finish(); }