Ejemplo n.º 1
0
 public static void AfterClassCountingFacetsAggregatorTest()
 {
     IOUtils.Close(indexDir, taxoDir);
 }
        public virtual void TestConcurrency()
        {
            int                 ncats   = AtLeast(100000); // add many categories
            int                 range   = ncats * 3;       // affects the categories selection
            AtomicInteger       numCats = new AtomicInteger(ncats);
            Directory           dir     = NewDirectory();
            var                 values  = new ConcurrentDictionary <string, string>();
            double              d       = Random().NextDouble();
            TaxonomyWriterCache cache;

            if (d < 0.7)
            {
                // this is the fastest, yet most memory consuming
                cache = new Cl2oTaxonomyWriterCache(1024, 0.15f, 3);
            }
            else if (TEST_NIGHTLY && d > 0.98)
            {
                // this is the slowest, but tests the writer concurrency when no caching is done.
                // only pick it during NIGHTLY tests, and even then, with very low chances.
                cache = NO_OP_CACHE;
            }
            else
            {
                // this is slower than CL2O, but less memory consuming, and exercises finding categories on disk too.
                cache = new LruTaxonomyWriterCache(ncats / 10);
            }
            if (VERBOSE)
            {
                Console.WriteLine("TEST: use cache=" + cache);
            }
            var tw = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE, cache);

            ThreadClass[] addThreads = new ThreadClass[AtLeast(4)];
            for (int z = 0; z < addThreads.Length; z++)
            {
                addThreads[z] = new ThreadAnonymousInnerClassHelper(this, range, numCats, values, tw);
            }

            foreach (var t in addThreads)
            {
                t.Start();
            }
            foreach (var t in addThreads)
            {
                t.Join();
            }
            tw.Dispose();

            DirectoryTaxonomyReader dtr = new DirectoryTaxonomyReader(dir);

            // +1 for root category
            if (values.Count + 1 != dtr.Size)
            {
                foreach (string value in values.Keys)
                {
                    FacetLabel label = new FacetLabel(FacetsConfig.StringToPath(value));
                    if (dtr.GetOrdinal(label) == -1)
                    {
                        Console.WriteLine("FAIL: path=" + label + " not recognized");
                    }
                }
                Fail("mismatch number of categories");
            }

            int[] parents = dtr.ParallelTaxonomyArrays.Parents();
            foreach (string cat in values.Keys)
            {
                FacetLabel cp = new FacetLabel(FacetsConfig.StringToPath(cat));
                Assert.True(dtr.GetOrdinal(cp) > 0, "category not found " + cp);
                int        level     = cp.Length;
                int        parentOrd = 0; // for root, parent is always virtual ROOT (ord=0)
                FacetLabel path      = new FacetLabel();
                for (int i = 0; i < level; i++)
                {
                    path = cp.Subpath(i + 1);
                    int ord = dtr.GetOrdinal(path);
                    Assert.AreEqual(parentOrd, parents[ord], "invalid parent for cp=" + path);
                    parentOrd = ord; // next level should have this parent
                }
            }

            IOUtils.Close(dtr, dir);
        }
Ejemplo n.º 3
0
        public override void Build(InputIterator iterator)
        {
            if (iterator.HasPayloads)
            {
                throw new System.ArgumentException("this suggester doesn't support payloads");
            }
            if (iterator.HasContexts)
            {
                throw new System.ArgumentException("this suggester doesn't support contexts");
            }
            File tempInput  = File.CreateTempFile(typeof(FSTCompletionLookup).Name, ".input", OfflineSorter.defaultTempDir());
            File tempSorted = File.CreateTempFile(typeof(FSTCompletionLookup).Name, ".sorted", OfflineSorter.defaultTempDir());

            OfflineSorter.ByteSequencesWriter writer = new OfflineSorter.ByteSequencesWriter(tempInput);
            OfflineSorter.ByteSequencesReader reader = null;
            ExternalRefSorter sorter = null;

            // Push floats up front before sequences to sort them. For now, assume they are non-negative.
            // If negative floats are allowed some trickery needs to be done to find their byte order.
            bool success = false;

            count = 0;
            try
            {
                sbyte[]             buffer = new sbyte[0];
                ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
                BytesRef            spare;
                while ((spare = iterator.Next()) != null)
                {
                    if (spare.Length + 4 >= buffer.Length)
                    {
                        buffer = ArrayUtil.Grow(buffer, spare.Length + 4);
                    }

                    output.Reset(buffer);
                    output.WriteInt(EncodeWeight(iterator.Weight));
                    output.WriteBytes(spare.Bytes, spare.Offset, spare.Length);
                    writer.Write(buffer, 0, output.Position);
                }
                writer.Dispose();

                // We don't know the distribution of scores and we need to bucket them, so we'll sort
                // and divide into equal buckets.
                OfflineSorter.SortInfo info = (new OfflineSorter()).Sort(tempInput, tempSorted);
                tempInput.Delete();
                FSTCompletionBuilder builder = new FSTCompletionBuilder(buckets, sorter = new ExternalRefSorter(new OfflineSorter()), sharedTailLength);

                int inputLines = info.Lines;
                reader = new OfflineSorter.ByteSequencesReader(tempSorted);
                long line                = 0;
                int  previousBucket      = 0;
                int  previousScore       = 0;
                ByteArrayDataInput input = new ByteArrayDataInput();
                BytesRef           tmp1  = new BytesRef();
                BytesRef           tmp2  = new BytesRef();
                while (reader.Read(tmp1))
                {
                    input.Reset(tmp1.Bytes);
                    int currentScore = input.ReadInt();

                    int bucket;
                    if (line > 0 && currentScore == previousScore)
                    {
                        bucket = previousBucket;
                    }
                    else
                    {
                        bucket = (int)(line * buckets / inputLines);
                    }
                    previousScore  = currentScore;
                    previousBucket = bucket;

                    // Only append the input, discard the weight.
                    tmp2.Bytes  = tmp1.Bytes;
                    tmp2.Offset = input.Position;
                    tmp2.Length = tmp1.Length - input.Position;
                    builder.Add(tmp2, bucket);

                    line++;
                    count++;
                }

                // The two FSTCompletions share the same automaton.
                this.higherWeightsCompletion = builder.Build();
                this.normalCompletion        = new FSTCompletion(higherWeightsCompletion.FST, false, exactMatchFirst);

                success = true;
            }
            finally
            {
                if (success)
                {
                    IOUtils.Close(reader, writer, sorter);
                }
                else
                {
                    IOUtils.CloseWhileHandlingException(reader, writer, sorter);
                }

                tempInput.Delete();
                tempSorted.Delete();
            }
        }
Ejemplo n.º 4
0
        /// <summary>
        /// Build the suggest index, using up to the specified
        ///  amount of temporary RAM while building.  Note that
        ///  the weights for the suggestions are ignored.
        /// </summary>
        public virtual void Build(InputIterator iterator, double ramBufferSizeMB)
        {
            if (iterator.HasPayloads())
            {
                throw new System.ArgumentException("this suggester doesn't support payloads");
            }
            if (iterator.HasContexts())
            {
                throw new System.ArgumentException("this suggester doesn't support contexts");
            }

            string prefix    = this.GetType().Name;
            var    directory = OfflineSorter.DefaultTempDir();
            // TODO: messy ... java7 has Files.createTempDirectory
            // ... but 4.x is java6:
            File   tempIndexPath = null;
            Random random        = new Random();

            while (true)
            {
                tempIndexPath = new File(directory, prefix + ".index." + random.Next(int.MaxValue));
                if (tempIndexPath.mkdir())
                {
                    break;
                }
            }

            Directory dir = FSDirectory.Open(tempIndexPath);

            IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_CURRENT, indexAnalyzer);

            iwc.OpenMode        = IndexWriterConfig.OpenMode.CREATE;
            iwc.RAMBufferSizeMB = ramBufferSizeMB;
            IndexWriter writer = new IndexWriter(dir, iwc);

            FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);

            // TODO: if only we had IndexOptions.TERMS_ONLY...
            ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS;
            ft.OmitNorms    = true;
            ft.Freeze();

            Document doc   = new Document();
            Field    field = new Field("body", "", ft);

            doc.Add(field);

            totTokens = 0;
            IndexReader reader = null;

            bool success = false;

            count = 0;
            try
            {
                while (true)
                {
                    BytesRef surfaceForm = iterator.Next();
                    if (surfaceForm == null)
                    {
                        break;
                    }
                    field.StringValue = surfaceForm.Utf8ToString();
                    writer.AddDocument(doc);
                    count++;
                }
                reader = DirectoryReader.Open(writer, false);

                Terms terms = MultiFields.GetTerms(reader, "body");
                if (terms == null)
                {
                    throw new System.ArgumentException("need at least one suggestion");
                }

                // Move all ngrams into an FST:
                TermsEnum termsEnum = terms.Iterator(null);

                Outputs <long?> outputs = PositiveIntOutputs.Singleton;
                Builder <long?> builder = new Builder <long?>(FST.INPUT_TYPE.BYTE1, outputs);

                IntsRef scratchInts = new IntsRef();
                while (true)
                {
                    BytesRef term = termsEnum.next();
                    if (term == null)
                    {
                        break;
                    }
                    int ngramCount = countGrams(term);
                    if (ngramCount > grams)
                    {
                        throw new System.ArgumentException("tokens must not contain separator byte; got token=" + term + " but gramCount=" + ngramCount + ", which is greater than expected max ngram size=" + grams);
                    }
                    if (ngramCount == 1)
                    {
                        totTokens += termsEnum.TotalTermFreq();
                    }

                    builder.Add(Util.ToIntsRef(term, scratchInts), encodeWeight(termsEnum.TotalTermFreq()));
                }

                fst = builder.Finish();
                if (fst == null)
                {
                    throw new System.ArgumentException("need at least one suggestion");
                }
                //System.out.println("FST: " + fst.getNodeCount() + " nodes");

                /*
                 * PrintWriter pw = new PrintWriter("/x/tmp/out.dot");
                 * Util.toDot(fst, pw, true, true);
                 * pw.close();
                 */

                success = true;
            }
            finally
            {
                try
                {
                    if (success)
                    {
                        IOUtils.Close(writer, reader);
                    }
                    else
                    {
                        IOUtils.CloseWhileHandlingException(writer, reader);
                    }
                }
                finally
                {
                    foreach (string file in dir.ListAll())
                    {
                        File path = new File(tempIndexPath, file);
                        if (path.Delete() == false)
                        {
                            throw new InvalidOperationException("failed to remove " + path);
                        }
                    }

                    if (tempIndexPath.Delete() == false)
                    {
                        throw new InvalidOperationException("failed to remove " + tempIndexPath);
                    }

                    dir.Dispose();
                }
            }
        }
        /// <summary>
        /// expert: instantiates a new reader </summary>
        protected internal Lucene45DocValuesProducer(SegmentReadState state, string dataCodec, string dataExtension, string metaCodec, string metaExtension)
        {
            string metaName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, metaExtension);
            // read in the entries from the metadata file.
            ChecksumIndexInput @in = state.Directory.OpenChecksumInput(metaName, state.Context);

            this.MaxDoc = state.SegmentInfo.DocCount;
            bool success = false;

            try
            {
                Version    = CodecUtil.CheckHeader(@in, metaCodec, Lucene45DocValuesFormat.VERSION_START, Lucene45DocValuesFormat.VERSION_CURRENT);
                Numerics   = new Dictionary <int, NumericEntry>();
                Ords       = new Dictionary <int, NumericEntry>();
                OrdIndexes = new Dictionary <int, NumericEntry>();
                Binaries   = new Dictionary <int, BinaryEntry>();
                SortedSets = new Dictionary <int, SortedSetEntry>();
                ReadFields(@in, state.FieldInfos);

                if (Version >= Lucene45DocValuesFormat.VERSION_CHECKSUM)
                {
                    CodecUtil.CheckFooter(@in);
                }
                else
                {
                    CodecUtil.CheckEOF(@in);
                }

                success = true;
            }
            finally
            {
                if (success)
                {
                    IOUtils.Close(@in);
                }
                else
                {
                    IOUtils.CloseWhileHandlingException(@in);
                }
            }

            success = false;
            try
            {
                string dataName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, dataExtension);
                Data = state.Directory.OpenInput(dataName, state.Context);
                int version2 = CodecUtil.CheckHeader(Data, dataCodec, Lucene45DocValuesFormat.VERSION_START, Lucene45DocValuesFormat.VERSION_CURRENT);
                if (Version != version2)
                {
                    throw new Exception("Format versions mismatch");
                }

                success = true;
            }
            finally
            {
                if (!success)
                {
                    IOUtils.CloseWhileHandlingException(this.Data);
                }
            }

            RamBytesUsed_Renamed = new AtomicLong(RamUsageEstimator.ShallowSizeOfInstance(this.GetType()));
        }
        internal MemoryDocValuesProducer(SegmentReadState state, string dataCodec, string dataExtension,
                                         string metaCodec, string metaExtension)
        {
            maxDoc = state.SegmentInfo.DocCount;
            string metaName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, metaExtension);
            // read in the entries from the metadata file.
            ChecksumIndexInput @in = state.Directory.OpenChecksumInput(metaName, state.Context);
            bool success           = false;

            try
            {
                version  = CodecUtil.CheckHeader(@in, metaCodec, VERSION_START, VERSION_CURRENT);
                numerics = new Dictionary <>();
                binaries = new Dictionary <>();
                fsts     = new Dictionary <>();
                ReadFields(@in, state.FieldInfos);
                if (version >= VERSION_CHECKSUM)
                {
                    CodecUtil.CheckFooter(@in);
                }
                else
                {
                    CodecUtil.CheckEOF(@in);
                }
                ramBytesUsed_Renamed = new AtomicLong(RamUsageEstimator.ShallowSizeOfInstance(this.GetType()));
                success = true;
            }
            finally
            {
                if (success)
                {
                    IOUtils.Close(@in);
                }
                else
                {
                    IOUtils.CloseWhileHandlingException(@in);
                }
            }

            success = false;
            try
            {
                string dataName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix,
                                                                 dataExtension);
                data = state.Directory.OpenInput(dataName, state.Context);

                int version2 = CodecUtil.CheckHeader(data, dataCodec, VERSION_START, VERSION_CURRENT);
                if (version != version2)
                {
                    throw new CorruptIndexException("Format versions mismatch");
                }

                success = true;
            }
            finally
            {
                if (!success)
                {
                    IOUtils.CloseWhileHandlingException(this.data);
                }
            }
        }