Beispiel #1
0
        public virtual void  TestLazyFields()
        {
            Assert.IsTrue(dir != null);
            Assert.IsTrue(fieldInfos != null);
            FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos, null);

            Assert.IsTrue(reader != null);
            Assert.IsTrue(reader.Size() == 1);
            ISet <string> loadFieldNames = Support.Compatibility.SetFactory.CreateHashSet <string>();

            loadFieldNames.Add(DocHelper.TEXT_FIELD_1_KEY);
            loadFieldNames.Add(DocHelper.TEXT_FIELD_UTF1_KEY);
            ISet <string> lazyFieldNames = Support.Compatibility.SetFactory.CreateHashSet <string>();

            //new String[]{DocHelper.LARGE_LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_BINARY_KEY};
            lazyFieldNames.Add(DocHelper.LARGE_LAZY_FIELD_KEY);
            lazyFieldNames.Add(DocHelper.LAZY_FIELD_KEY);
            lazyFieldNames.Add(DocHelper.LAZY_FIELD_BINARY_KEY);
            lazyFieldNames.Add(DocHelper.TEXT_FIELD_UTF2_KEY);
            SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(loadFieldNames, lazyFieldNames);
            Document doc = reader.Doc(0, fieldSelector, null);

            Assert.IsTrue(doc != null, "doc is null and it shouldn't be");
            IFieldable field = doc.GetFieldable(DocHelper.LAZY_FIELD_KEY);

            Assert.IsTrue(field != null, "field is null and it shouldn't be");
            Assert.IsTrue(field.IsLazy, "field is not lazy and it should be");
            System.String value_Renamed = field.StringValue(null);
            Assert.IsTrue(value_Renamed != null, "value is null and it shouldn't be");
            Assert.IsTrue(value_Renamed.Equals(DocHelper.LAZY_FIELD_TEXT) == true, value_Renamed + " is not equal to " + DocHelper.LAZY_FIELD_TEXT);
            field = doc.GetFieldable(DocHelper.TEXT_FIELD_1_KEY);
            Assert.IsTrue(field != null, "field is null and it shouldn't be");
            Assert.IsTrue(field.IsLazy == false, "Field is lazy and it should not be");
            field = doc.GetFieldable(DocHelper.TEXT_FIELD_UTF1_KEY);
            Assert.IsTrue(field != null, "field is null and it shouldn't be");
            Assert.IsTrue(field.IsLazy == false, "Field is lazy and it should not be");
            Assert.IsTrue(field.StringValue(null).Equals(DocHelper.FIELD_UTF1_TEXT) == true, field.StringValue(null) + " is not equal to " + DocHelper.FIELD_UTF1_TEXT);

            field = doc.GetFieldable(DocHelper.TEXT_FIELD_UTF2_KEY);
            Assert.IsTrue(field != null, "field is null and it shouldn't be");
            Assert.IsTrue(field.IsLazy == true, "Field is lazy and it should not be");
            Assert.IsTrue(field.StringValue(null).Equals(DocHelper.FIELD_UTF2_TEXT) == true, field.StringValue(null) + " is not equal to " + DocHelper.FIELD_UTF2_TEXT);

            field = doc.GetFieldable(DocHelper.LAZY_FIELD_BINARY_KEY);
            Assert.IsTrue(field != null, "field is null and it shouldn't be");
            Assert.IsTrue(field.StringValue(null) == null, "stringValue isn't null for lazy binary field");

            byte[] bytes = field.GetBinaryValue(null);
            Assert.IsTrue(bytes != null, "bytes is null and it shouldn't be");
            Assert.IsTrue(DocHelper.LAZY_FIELD_BINARY_BYTES.Length == bytes.Length, "");
            for (int i = 0; i < bytes.Length; i++)
            {
                Assert.IsTrue(bytes[i] == DocHelper.LAZY_FIELD_BINARY_BYTES[i], "byte[" + i + "] is mismatched");
            }
        }
Beispiel #2
0
        internal void  WriteField(FieldInfo fi, IFieldable field, IState state)
        {
            fieldsStream.WriteVInt(fi.number);
            byte bits = 0;

            if (field.IsTokenized)
            {
                bits |= FieldsWriter.FIELD_IS_TOKENIZED;
            }
            if (field.IsBinary)
            {
                bits |= FieldsWriter.FIELD_IS_BINARY;
            }

            fieldsStream.WriteByte(bits);

            // compression is disabled for the current field
            if (field.IsBinary)
            {
                byte[] data   = field.GetBinaryValue(state);
                int    len    = field.BinaryLength;
                int    offset = field.BinaryOffset;

                fieldsStream.WriteVInt(len);
                fieldsStream.WriteBytes(data, offset, len);
            }
            else
            {
                fieldsStream.WriteString(field.StringValue(state));
            }
        }
Beispiel #3
0
        private static object ConvertType(JsonOperationContext context, IFieldable field, FieldType fieldType, IState state)
        {
            if (field.IsBinary)
            {
                ThrowBinaryValuesNotSupported();
            }

            var stringValue = field.StringValue(state);

            if (stringValue == Constants.Documents.Indexing.Fields.NullValue || stringValue == null)
            {
                return(null);
            }
            if (stringValue == Constants.Documents.Indexing.Fields.EmptyString || stringValue == string.Empty)
            {
                return(string.Empty);
            }

            if (fieldType.IsJson == false)
            {
                return(stringValue);
            }

            var bytes = Encodings.Utf8.GetBytes(stringValue);
            var ms    = new MemoryStream(bytes);

            return(context.ReadForMemory(ms, field.Name));
        }
Beispiel #4
0
        public virtual void  TestLazyFieldsAfterClose()
        {
            Assert.IsTrue(dir != null);
            Assert.IsTrue(fieldInfos != null);
            FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos, null);

            Assert.IsTrue(reader != null);
            Assert.IsTrue(reader.Size() == 1);
            ISet <string> loadFieldNames = Support.Compatibility.SetFactory.CreateHashSet <string>();

            loadFieldNames.Add(DocHelper.TEXT_FIELD_1_KEY);
            loadFieldNames.Add(DocHelper.TEXT_FIELD_UTF1_KEY);
            ISet <string> lazyFieldNames = Support.Compatibility.SetFactory.CreateHashSet <string>();

            lazyFieldNames.Add(DocHelper.LARGE_LAZY_FIELD_KEY);
            lazyFieldNames.Add(DocHelper.LAZY_FIELD_KEY);
            lazyFieldNames.Add(DocHelper.LAZY_FIELD_BINARY_KEY);
            lazyFieldNames.Add(DocHelper.TEXT_FIELD_UTF2_KEY);
            SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(loadFieldNames, lazyFieldNames);
            Document doc = reader.Doc(0, fieldSelector, null);

            Assert.IsTrue(doc != null, "doc is null and it shouldn't be");
            IFieldable field = doc.GetFieldable(DocHelper.LAZY_FIELD_KEY);

            Assert.IsTrue(field != null, "field is null and it shouldn't be");
            Assert.IsTrue(field.IsLazy, "field is not lazy and it should be");
            reader.Dispose();

            Assert.Throws <AlreadyClosedException>(() => { var value = field.StringValue(null); },
                                                   "did not hit AlreadyClosedException as expected");
        }
Beispiel #5
0
 internal virtual void  ValidateField(IFieldable f)
 {
     System.String val = f.StringValue(null);
     if (!val.StartsWith("^") || !val.EndsWith("$"))
     {
         throw new System.SystemException("Invalid field:" + f.ToString() + " val=" + val);
     }
 }
Beispiel #6
0
        public virtual void  TestLoadSize()
        {
            FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos, null);
            Document     doc;

            doc = reader.Doc(0, new AnonymousClassFieldSelector(this), null);
            IFieldable f1 = doc.GetFieldable(DocHelper.TEXT_FIELD_1_KEY);
            IFieldable f3 = doc.GetFieldable(DocHelper.TEXT_FIELD_3_KEY);
            IFieldable fb = doc.GetFieldable(DocHelper.LAZY_FIELD_BINARY_KEY);

            Assert.IsTrue(f1.IsBinary);
            Assert.IsTrue(!f3.IsBinary);
            Assert.IsTrue(fb.IsBinary);
            AssertSizeEquals(2 * DocHelper.FIELD_1_TEXT.Length, f1.GetBinaryValue(null));
            Assert.AreEqual(DocHelper.FIELD_3_TEXT, f3.StringValue(null));
            AssertSizeEquals(DocHelper.LAZY_FIELD_BINARY_BYTES.Length, fb.GetBinaryValue(null));

            reader.Dispose();
        }
        private static object ConvertType(JsonOperationContext context, IFieldable field, FieldType fieldType, IState state)
        {
            if (field.IsBinary)
            {
                ThrowBinaryValuesNotSupported();
            }

            var stringValue = field.StringValue(state);

            if (stringValue == null)
            {
                return(null);
            }

            if (stringValue == string.Empty)
            {
                return(string.Empty);
            }

            if (field.IsTokenized == false)
            {
                // NULL_VALUE and EMPTY_STRING fields aren't tokenized
                // this will prevent converting fields with a "NULL_VALUE" string to null
                switch (stringValue)
                {
                case Constants.Documents.Indexing.Fields.NullValue:
                    return(null);

                case Constants.Documents.Indexing.Fields.EmptyString:
                    return(string.Empty);
                }
            }

            if (fieldType.IsJson == false)
            {
                return(stringValue);
            }

            return(context.ReadForMemory(stringValue, field.Name));
        }
Beispiel #8
0
        public virtual void  DoTest(int[] docs)
        {
            if (dataset.Count == 0)
            {
                for (int i = 0; i < data.Length; i++)
                {
                    dataset.Add(data[i], data[i]);
                }
            }

            Directory   dir    = MakeIndex();
            IndexReader reader = IndexReader.Open(dir, true, null);

            for (int i = 0; i < docs.Length; i++)
            {
                Document d = reader.Document(docs[i], SELECTOR, null);
                d.Get(MAGIC_FIELD, null);

                var fields = d.GetFields();
                for (System.Collections.IEnumerator fi = fields.GetEnumerator(); fi.MoveNext();)
                {
                    IFieldable f = null;
                    try
                    {
                        f = (IFieldable)fi.Current;
                        System.String fname = f.Name;
                        System.String fval  = f.StringValue(null);
                        Assert.IsNotNull(docs[i] + " FIELD: " + fname, fval);
                        System.String[] vals = fval.Split('#');
                        Assert.IsTrue(dataset.Contains(vals[0]) || dataset.Contains(vals[1]), "FIELD:" + fname + ",VAL:" + fval);
                    }
                    catch (System.Exception e)
                    {
                        throw new Exception(docs[i] + " WTF: " + f.Name, e);
                    }
                }
            }
            reader.Close();
        }
        public static void  VerifyEquals(Document d1, Document d2)
        {
            var ff1 = d1.GetFields().OrderBy(x => x.Name).ToList();
            var ff2 = d2.GetFields().OrderBy(x => x.Name).ToList();


            if (ff1.Count != ff2.Count)
            {
                System.Console.Out.WriteLine("[" + String.Join(",", ff1.Select(x => x.ToString()).ToArray()) + "]");
                System.Console.Out.WriteLine("[" + String.Join(",", ff2.Select(x => x.ToString()).ToArray()) + "]");
                Assert.AreEqual(ff1.Count, ff2.Count);
            }


            for (int i = 0; i < ff1.Count; i++)
            {
                IFieldable f1 = (IFieldable)ff1[i];
                IFieldable f2 = (IFieldable)ff2[i];
                if (f1.IsBinary)
                {
                    System.Diagnostics.Debug.Assert(f2.IsBinary);
                    //TODO
                }
                else
                {
                    System.String s1 = f1.StringValue(null);
                    System.String s2 = f2.StringValue(null);
                    if (!s1.Equals(s2))
                    {
                        // print out whole doc on error
                        System.Console.Out.WriteLine("[" + String.Join(",", ff1.Select(x => x.ToString()).ToArray()) + "]");
                        System.Console.Out.WriteLine("[" + String.Join(",", ff2.Select(x => x.ToString()).ToArray()) + "]");
                        Assert.AreEqual(s1, s2);
                    }
                }
            }
        }
Beispiel #10
0
        private void assertCompressedFields29(Directory dir, bool shouldStillBeCompressed)
        {
            int count             = 0;
            int TEXT_PLAIN_LENGTH = TEXT_TO_COMPRESS.Length * 2;
            // FieldSelectorResult.SIZE returns 2*number_of_chars for String fields:
            int BINARY_PLAIN_LENGTH = BINARY_TO_COMPRESS.Length;

            IndexReader reader = IndexReader.Open(dir, true, null);

            try
            {
                // look into sub readers and check if raw merge is on/off
                var readers = new System.Collections.Generic.List <IndexReader>();
                ReaderUtil.GatherSubReaders(readers, reader);
                foreach (IndexReader ir in readers)
                {
                    FieldsReader fr = ((SegmentReader)ir).GetFieldsReader(null);
                    Assert.IsTrue(shouldStillBeCompressed != fr.CanReadRawDocs(),
                                  "for a 2.9 index, FieldsReader.canReadRawDocs() must be false and other way round for a trunk index");
                }

                // test that decompression works correctly
                for (int i = 0; i < reader.MaxDoc; i++)
                {
                    if (!reader.IsDeleted(i))
                    {
                        Document d = reader.Document(i, null);
                        if (d.Get("content3", null) != null)
                        {
                            continue;
                        }
                        count++;
                        IFieldable compressed = d.GetFieldable("compressed");
                        if (int.Parse(d.Get("id", null)) % 2 == 0)
                        {
                            Assert.IsFalse(compressed.IsBinary);
                            Assert.AreEqual(TEXT_TO_COMPRESS, compressed.StringValue(null),
                                            "incorrectly decompressed string");
                        }
                        else
                        {
                            Assert.IsTrue(compressed.IsBinary);
                            Assert.IsTrue(BINARY_TO_COMPRESS.SequenceEqual(compressed.GetBinaryValue(null)),
                                          "incorrectly decompressed binary");
                        }
                    }
                }

                //check if field was decompressed after optimize
                for (int i = 0; i < reader.MaxDoc; i++)
                {
                    if (!reader.IsDeleted(i))
                    {
                        Document d = reader.Document(i, new AnonymousFieldSelector(), null);
                        if (d.Get("content3", null) != null)
                        {
                            continue;
                        }
                        count++;
                        // read the size from the binary value using BinaryReader (this prevents us from doing the shift ops ourselves):
                        // ugh, Java uses Big-Endian streams, so we need to do it manually.
                        byte[] encodedSize    = d.GetFieldable("compressed").GetBinaryValue(null).Take(4).Reverse().ToArray();
                        int    actualSize     = BitConverter.ToInt32(encodedSize, 0);
                        int    compressedSize = int.Parse(d.Get("compressedSize", null));
                        bool   binary         = int.Parse(d.Get("id", null)) % 2 > 0;
                        int    shouldSize     = shouldStillBeCompressed
                                             ? compressedSize
                                             : (binary ? BINARY_PLAIN_LENGTH : TEXT_PLAIN_LENGTH);
                        Assert.AreEqual(shouldSize, actualSize, "size incorrect");
                        if (!shouldStillBeCompressed)
                        {
                            Assert.IsFalse(compressedSize == actualSize,
                                           "uncompressed field should have another size than recorded in index");
                        }
                    }
                }
                Assert.AreEqual(34 * 2, count, "correct number of tests");
            }
            finally
            {
                reader.Dispose();
            }
        }
Beispiel #11
0
        public virtual void  TestLazyPerformance()
        {
            System.String           tmpIODir = AppSettings.Get("tempDir", Path.GetTempPath());
            System.String           path     = tmpIODir + System.IO.Path.DirectorySeparatorChar.ToString() + "lazyDir" + Guid.NewGuid();
            System.IO.DirectoryInfo file     = new System.IO.DirectoryInfo(path);
            _TestUtil.RmDir(file);
            FSDirectory tmpDir = FSDirectory.Open(file);

            Assert.IsTrue(tmpDir != null);

            IndexWriter writer = new IndexWriter(tmpDir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null);

            writer.UseCompoundFile = false;
            writer.AddDocument(testDoc, null);
            writer.Close();

            Assert.IsTrue(fieldInfos != null);
            FieldsReader  reader;
            long          lazyTime       = 0;
            long          regularTime    = 0;
            int           length         = 50;
            ISet <string> lazyFieldNames = Support.Compatibility.SetFactory.CreateHashSet <string>();

            lazyFieldNames.Add(DocHelper.LARGE_LAZY_FIELD_KEY);
            SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(Support.Compatibility.SetFactory.CreateHashSet <string>(), lazyFieldNames);

            for (int i = 0; i < length; i++)
            {
                reader = new FieldsReader(tmpDir, TEST_SEGMENT_NAME, fieldInfos, null);
                Assert.IsTrue(reader != null);
                Assert.IsTrue(reader.Size() == 1);

                Document doc;
                doc = reader.Doc(0, null, null);                 //Load all of them
                Assert.IsTrue(doc != null, "doc is null and it shouldn't be");
                IFieldable field = doc.GetFieldable(DocHelper.LARGE_LAZY_FIELD_KEY);
                Assert.IsTrue(field.IsLazy == false, "field is lazy");
                System.String value_Renamed;
                long          start;
                long          finish;
                start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);
                //On my machine this was always 0ms.
                value_Renamed = field.StringValue(null);
                finish        = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);
                Assert.IsTrue(value_Renamed != null, "value is null and it shouldn't be");
                Assert.IsTrue(field != null, "field is null and it shouldn't be");
                regularTime += (finish - start);
                reader.Dispose();
                reader = null;
                doc    = null;
                //Hmmm, are we still in cache???
                System.GC.Collect();
                reader = new FieldsReader(tmpDir, TEST_SEGMENT_NAME, fieldInfos, null);
                doc    = reader.Doc(0, fieldSelector, null);
                field  = doc.GetFieldable(DocHelper.LARGE_LAZY_FIELD_KEY);
                Assert.IsTrue(field.IsLazy == true, "field is not lazy");
                start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);
                //On my machine this took around 50 - 70ms
                value_Renamed = field.StringValue(null);
                finish        = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);
                Assert.IsTrue(value_Renamed != null, "value is null and it shouldn't be");
                lazyTime += (finish - start);
                reader.Dispose();
            }
            System.Console.Out.WriteLine("Average Non-lazy time (should be very close to zero): " + regularTime / length + " ms for " + length + " reads");
            System.Console.Out.WriteLine("Average Lazy Time (should be greater than zero): " + lazyTime / length + " ms for " + length + " reads");
        }
        public override void  ProcessFields(IFieldable[] fields, int count, IState state)
        {
            fieldState.Reset(docState.doc.Boost);

            int maxFieldLength = docState.maxFieldLength;

            bool doInvert = consumer.Start(fields, count);

            for (int i = 0; i < count; i++)
            {
                IFieldable field = fields[i];

                // TODO FI: this should be "genericized" to querying
                // consumer if it wants to see this particular field
                // tokenized.
                if (field.IsIndexed && doInvert)
                {
                    bool anyToken;

                    if (fieldState.length > 0)
                    {
                        fieldState.position += docState.analyzer.GetPositionIncrementGap(fieldInfo.name);
                    }

                    if (!field.IsTokenized)
                    {
                        // un-tokenized field
                        System.String stringValue = field.StringValue(state);
                        int           valueLength = stringValue.Length;
                        perThread.singleToken.Reinit(stringValue, 0, valueLength);
                        fieldState.attributeSource = perThread.singleToken;
                        consumer.Start(field);

                        bool success = false;
                        try
                        {
                            consumer.Add();
                            success = true;
                        }
                        finally
                        {
                            if (!success)
                            {
                                docState.docWriter.SetAborting();
                            }
                        }
                        fieldState.offset += valueLength;
                        fieldState.length++;
                        fieldState.position++;
                        anyToken = valueLength > 0;
                    }
                    else
                    {
                        // tokenized field
                        TokenStream stream;
                        TokenStream streamValue = field.TokenStreamValue;

                        if (streamValue != null)
                        {
                            stream = streamValue;
                        }
                        else
                        {
                            // the field does not have a TokenStream,
                            // so we have to obtain one from the analyzer
                            System.IO.TextReader reader;                             // find or make Reader
                            System.IO.TextReader readerValue = field.ReaderValue;

                            if (readerValue != null)
                            {
                                reader = readerValue;
                            }
                            else
                            {
                                System.String stringValue = field.StringValue(state);
                                if (stringValue == null)
                                {
                                    throw new System.ArgumentException("field must have either TokenStream, String or Reader value");
                                }
                                perThread.stringReader.Init(stringValue);
                                reader = perThread.stringReader;
                            }

                            // Tokenize field and add to postingTable
                            stream = docState.analyzer.ReusableTokenStream(fieldInfo.name, reader);
                        }

                        // reset the TokenStream to the first token
                        stream.Reset();

                        int startLength = fieldState.length;

                        try
                        {
                            int offsetEnd = fieldState.offset - 1;

                            bool hasMoreTokens = stream.IncrementToken();

                            fieldState.attributeSource = stream;

                            IOffsetAttribute            offsetAttribute  = fieldState.attributeSource.AddAttribute <IOffsetAttribute>();
                            IPositionIncrementAttribute posIncrAttribute = fieldState.attributeSource.AddAttribute <IPositionIncrementAttribute>();

                            consumer.Start(field);

                            for (; ;)
                            {
                                // If we hit an exception in stream.next below
                                // (which is fairly common, eg if analyzer
                                // chokes on a given document), then it's
                                // non-aborting and (above) this one document
                                // will be marked as deleted, but still
                                // consume a docID

                                if (!hasMoreTokens)
                                {
                                    break;
                                }

                                int posIncr = posIncrAttribute.PositionIncrement;
                                fieldState.position += posIncr;
                                if (fieldState.position > 0)
                                {
                                    fieldState.position--;
                                }

                                if (posIncr == 0)
                                {
                                    fieldState.numOverlap++;
                                }

                                bool success = false;
                                try
                                {
                                    // If we hit an exception in here, we abort
                                    // all buffered documents since the last
                                    // flush, on the likelihood that the
                                    // internal state of the consumer is now
                                    // corrupt and should not be flushed to a
                                    // new segment:
                                    consumer.Add();
                                    success = true;
                                }
                                finally
                                {
                                    if (!success)
                                    {
                                        docState.docWriter.SetAborting();
                                    }
                                }
                                fieldState.position++;
                                offsetEnd = fieldState.offset + offsetAttribute.EndOffset;
                                if (++fieldState.length >= maxFieldLength)
                                {
                                    if (docState.infoStream != null)
                                    {
                                        docState.infoStream.WriteLine("maxFieldLength " + maxFieldLength + " reached for field " + fieldInfo.name + ", ignoring following tokens");
                                    }
                                    break;
                                }

                                hasMoreTokens = stream.IncrementToken();
                            }
                            // trigger streams to perform end-of-stream operations
                            stream.End();

                            fieldState.offset += offsetAttribute.EndOffset;
                            anyToken           = fieldState.length > startLength;
                        }
                        finally
                        {
                            stream.Close();
                        }
                    }

                    if (anyToken)
                    {
                        fieldState.offset += docState.analyzer.GetOffsetGap(field);
                    }
                    fieldState.boost *= field.Boost;
                }

                // LUCENE-2387: don't hang onto the field, so GC can
                // reclaim
                fields[i] = null;
            }

            consumer.Finish();
            endConsumer.Finish();
        }