Exemple #1
0
 private static void CheckTaxoWriter(ITaxonomyWriter taxoWriter) // LUCENENET: CA1822: Mark members as static
 {
     if (taxoWriter == null)
     {
         throw new ThreadStateException("a non-null ITaxonomyWriter must be provided when indexing FacetField or AssociationFacetField");
     }
 }
Exemple #2
0
 private void CheckTaxoWriter(ITaxonomyWriter taxoWriter)
 {
     if (taxoWriter == null)
     {
         throw new ThreadStateException("a non-null ITaxonomyWriter must be provided when indexing FacetField or AssociationFacetField");
     }
 }
        private Document NewDocument(ITaxonomyWriter taxoWriter, int id)
        {
            Document doc = new Document();

            doc.Add(new FacetField("A", id.ToString("X")));
            return(config.Build(taxoWriter, doc));
        }
Exemple #4
0
        private Document NewDocument(ITaxonomyWriter taxoWriter)
        {
            FacetsConfig config = new FacetsConfig();
            Document     doc    = new Document();

            doc.Add(new FacetField("A", "1"));
            return(config.Build(taxoWriter, doc));
        }
 public IndexerThread(IndexWriter w, FacetsConfig config, ITaxonomyWriter tw, ReferenceManager <SearcherAndTaxonomy> mgr, int ordLimit, AtomicBoolean stop)
 {
     this.w        = w;
     this.config   = config;
     this.tw       = tw;
     this.mgr      = mgr;
     this.ordLimit = ordLimit;
     this.stop     = stop;
 }
Exemple #6
0
 private static void seedIndex(ITaxonomyWriter tw, RandomIndexWriter iw, FacetsConfig config)
 {
     foreach (FacetField ff in CATEGORIES)
     {
         Document doc = new Document();
         doc.Add(ff);
         doc.Add(new TextField("content", "alpha", Field.Store.YES));
         iw.AddDocument(config.Build(tw, doc));
     }
 }
Exemple #7
0
        private void doTestReadRecreatedTaxonomy(Random random, bool closeReader)
        {
            Directory       dir = null;
            ITaxonomyWriter tw  = null;
            TaxonomyReader  tr  = null;

            // prepare a few categories
            int n = 10;

            FacetLabel[] cp = new FacetLabel[n];
            for (int i = 0; i < n; i++)
            {
                cp[i] = new FacetLabel("a", Convert.ToString(i, CultureInfo.InvariantCulture));
            }

            try
            {
                dir = NewDirectory();

                tw = new DirectoryTaxonomyWriter(dir);
                tw.AddCategory(new FacetLabel("a"));
                tw.Dispose();

                tr = new DirectoryTaxonomyReader(dir);
                int baseNumCategories = tr.Count;

                for (int i = 0; i < n; i++)
                {
                    int k = random.Next(n);
                    tw = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE);
                    for (int j = 0; j <= k; j++)
                    {
                        tw.AddCategory(cp[j]);
                    }
                    tw.Dispose();
                    if (closeReader)
                    {
                        tr.Dispose();
                        tr = new DirectoryTaxonomyReader(dir);
                    }
                    else
                    {
                        var newtr = TaxonomyReader.OpenIfChanged(tr);
                        Assert.IsNotNull(newtr);
                        tr.Dispose();
                        tr = newtr;
                    }
                    Assert.AreEqual(baseNumCategories + 1 + k, tr.Count, "Wrong #categories in taxonomy (i=" + i + ", k=" + k + ")");
                }
            }
            finally
            {
                IOUtils.Dispose(tr, tw, dir);
            }
        }
Exemple #8
0
        private static void IndexDocsWithFacetsNoTerms(IndexWriter indexWriter, ITaxonomyWriter taxoWriter, IDictionary <string, int?> expectedCounts)
        {
            Random       random  = Random;
            int          numDocs = AtLeast(random, 2);
            FacetsConfig config  = Config;

            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();
                AddFacets(doc, config, false);
                indexWriter.AddDocument(config.Build(taxoWriter, doc));
            }
            indexWriter.Commit(); // flush a segment
        }
Exemple #9
0
        public override int DoLogic()
        {
            ITaxonomyWriter taxonomyWriter = RunData.TaxonomyWriter;

            if (taxonomyWriter != null)
            {
                taxonomyWriter.Commit();
            }
            else
            {
                throw IllegalStateException.Create("TaxonomyWriter is not currently open");
            }

            return(1);
        }
Exemple #10
0
        public override int DoLogic()
        {
            ITaxonomyWriter taxonomyWriter = RunData.TaxonomyWriter;

            if (taxonomyWriter != null)
            {
                taxonomyWriter.Commit();
            }
            else
            {
                throw new InvalidOperationException("TaxonomyWriter is not currently open");
            }

            return(1);
        }
Exemple #11
0
        private static void IndexDocsWithFacetsAndSomeTerms(IndexWriter indexWriter, ITaxonomyWriter taxoWriter, IDictionary <string, int?> expectedCounts)
        {
            Random       random  = Random;
            int          numDocs = AtLeast(random, 2);
            FacetsConfig config  = Config;

            for (int i = 0; i < numDocs; i++)
            {
                Document doc        = new Document();
                bool     hasContent = random.NextBoolean();
                if (hasContent)
                {
                    AddField(doc);
                }
                AddFacets(doc, config, hasContent);
                indexWriter.AddDocument(config.Build(taxoWriter, doc));
            }
            indexWriter.Commit(); // flush a segment
        }
        private void indexTwoDocs(ITaxonomyWriter taxoWriter, IndexWriter indexWriter, FacetsConfig config, bool withContent)
        {
            for (int i = 0; i < 2; i++)
            {
                Document doc = new Document();
                if (withContent)
                {
                    doc.Add(new StringField("f", "a", Field.Store.NO));
                }
                if (config != null)
                {
                    doc.Add(new FacetField("A", Convert.ToString(i)));
                    indexWriter.AddDocument(config.Build(taxoWriter, doc));
                }
                else
                {
                    indexWriter.AddDocument(doc);
                }
            }

            indexWriter.Commit();
        }
Exemple #13
0
        private void ProcessAssocFacetFields(ITaxonomyWriter taxoWriter, IDictionary <string, IList <AssociationFacetField> > byField, Document doc)
        {
            foreach (KeyValuePair <string, IList <AssociationFacetField> > ent in byField)
            {
                byte[] bytes          = new byte[16];
                int    upto           = 0;
                string indexFieldName = ent.Key;
                foreach (AssociationFacetField field in ent.Value)
                {
                    // NOTE: we don't add parents for associations
                    CheckTaxoWriter(taxoWriter);
                    FacetLabel label   = new FacetLabel(field.Dim, field.Path);
                    int        ordinal = taxoWriter.AddCategory(label);
                    if (upto + 4 > bytes.Length)
                    {
                        bytes = ArrayUtil.Grow(bytes, upto + 4);
                    }
                    // big-endian:
                    bytes[upto++] = (byte)(ordinal >> 24);
                    bytes[upto++] = (byte)(ordinal >> 16);
                    bytes[upto++] = (byte)(ordinal >> 8);
                    bytes[upto++] = (byte)ordinal;
                    if (upto + field.Assoc.Length > bytes.Length)
                    {
                        bytes = ArrayUtil.Grow(bytes, upto + field.Assoc.Length);
                    }
                    Array.Copy(field.Assoc.Bytes, field.Assoc.Offset, bytes, upto, field.Assoc.Length);
                    upto += field.Assoc.Length;

                    // Drill down:
                    for (int i = 1; i <= label.Length; i++)
                    {
                        doc.Add(new StringField(indexFieldName, PathToString(label.Components, i), Field.Store.NO));
                    }
                }
                doc.Add(new BinaryDocValuesField(indexFieldName, new BytesRef(bytes, 0, upto)));
            }
        }
Exemple #14
0
        // clean old stuff, reopen
        public virtual void Reinit(bool eraseIndex)
        {
            // cleanup index
            IOUtils.Dispose(indexWriter, indexReader, directory);
            indexWriter = null;
            indexReader = null;

            IOUtils.Dispose(taxonomyWriter, taxonomyReader, taxonomyDir);
            taxonomyWriter = null;
            taxonomyReader = null;

            // directory (default is ram-dir).
            directory   = CreateDirectory(eraseIndex, "index", "directory");
            taxonomyDir = CreateDirectory(eraseIndex, "taxo", "taxonomy.directory");

            // inputs
            ResetInputs();

            // release unused stuff
            GC.Collect();

            // Re-init clock
            SetStartTimeMillis();
        }
        private void indexTwoDocs(ITaxonomyWriter taxoWriter, IndexWriter indexWriter, FacetsConfig config, bool withContent)
        {
            for (int i = 0; i < 2; i++)
            {
                Document doc = new Document();
                if (withContent)
                {
                    doc.Add(new StringField("f", "a", Field.Store.NO));
                }
                if (config != null)
                {
                    doc.Add(new FacetField("A", Convert.ToString(i)));
                    indexWriter.AddDocument(config.Build(taxoWriter, doc));
                }
                else
                {
                    indexWriter.AddDocument(doc);
                }
            }

            indexWriter.Commit();
        }
 private static void seedIndex(ITaxonomyWriter tw, RandomIndexWriter iw, FacetsConfig config)
 {
     foreach (FacetField ff in CATEGORIES)
     {
         Document doc = new Document();
         doc.Add(ff);
         doc.Add(new TextField("content", "alpha", Field.Store.YES));
         iw.AddDocument(config.Build(tw, doc));
     }
 }
        /// <summary>
        ///  fillTaxonomy adds the categories in the categories[] array, and asserts
        ///  that the additions return exactly the ordinals (in the past - paths)
        ///  specified in expectedPaths[].
        ///  Note that this assumes that fillTaxonomy() is called on an empty taxonomy
        ///  index. Calling it after something else was already added to the taxonomy
        ///  index will surely have this method fail.
        /// </summary>

        public static void FillTaxonomy(ITaxonomyWriter tw)
        {
            for (int i = 0; i < categories.Length; i++)
            {
                int ordinal = tw.AddCategory(new FacetLabel(categories[i]));
                int expectedOrdinal = ExpectedPaths[i][ExpectedPaths[i].Length - 1];
                if (ordinal != expectedOrdinal)
                {
                    Fail("For category " + Showcat(categories[i]) + " expected ordinal " + expectedOrdinal + ", but got " + ordinal);
                }
            }
        }
Exemple #18
0
        private void ProcessFacetFields(ITaxonomyWriter taxoWriter, IDictionary <string, IList <FacetField> > byField, Document doc)
        {
            foreach (KeyValuePair <string, IList <FacetField> > ent in byField)
            {
                string indexFieldName = ent.Key;
                //System.out.println("  indexFieldName=" + indexFieldName + " fields=" + ent.getValue());

                Int32sRef ordinals = new Int32sRef(32);
                foreach (FacetField facetField in ent.Value)
                {
                    FacetsConfig.DimConfig ft = GetDimConfig(facetField.Dim);
                    if (facetField.Path.Length > 1 && ft.IsHierarchical == false)
                    {
                        throw new ArgumentException("dimension \"" + facetField.Dim + "\" is not hierarchical yet has " + facetField.Path.Length + " components");
                    }

                    FacetLabel cp = new FacetLabel(facetField.Dim, facetField.Path);

                    CheckTaxoWriter(taxoWriter);
                    int ordinal = taxoWriter.AddCategory(cp);
                    if (ordinals.Length == ordinals.Int32s.Length)
                    {
                        ordinals.Grow(ordinals.Length + 1);
                    }
                    ordinals.Int32s[ordinals.Length++] = ordinal;
                    //System.out.println("ords[" + (ordinals.length-1) + "]=" + ordinal);
                    //System.out.println("  add cp=" + cp);

                    if (ft.IsMultiValued && (ft.IsHierarchical || ft.RequireDimCount))
                    {
                        //System.out.println("  add parents");
                        // Add all parents too:
                        int parent = taxoWriter.GetParent(ordinal);
                        while (parent > 0)
                        {
                            if (ordinals.Int32s.Length == ordinals.Length)
                            {
                                ordinals.Grow(ordinals.Length + 1);
                            }
                            ordinals.Int32s[ordinals.Length++] = parent;
                            parent = taxoWriter.GetParent(parent);
                        }

                        if (ft.RequireDimCount == false)
                        {
                            // Remove last (dimension) ord:
                            ordinals.Length--;
                        }
                    }

                    // Drill down:
                    for (int i = 1; i <= cp.Length; i++)
                    {
                        doc.Add(new StringField(indexFieldName, PathToString(cp.Components, i), Field.Store.NO));
                    }
                }

                // Facet counts:
                // DocValues are considered stored fields:
                doc.Add(new BinaryDocValuesField(indexFieldName, DedupAndEncode(ordinals)));
            }
        }
Exemple #19
0
        /// <summary>
        /// Translates any added <see cref="FacetField"/>s into normal fields for indexing.
        ///
        /// <para>
        /// <b>NOTE:</b> you should add the returned document to <see cref="Index.IndexWriter"/>, not the
        /// input one!
        /// </para>
        /// </summary>
        public virtual Document Build(ITaxonomyWriter taxoWriter, Document doc)
        {
            // Find all FacetFields, collated by the actual field:
            IDictionary <string, IList <FacetField> > byField = new Dictionary <string, IList <FacetField> >();

            // ... and also all SortedSetDocValuesFacetFields:
            IDictionary <string, IList <SortedSetDocValuesFacetField> > dvByField = new Dictionary <string, IList <SortedSetDocValuesFacetField> >();

            // ... and also all AssociationFacetFields
            IDictionary <string, IList <AssociationFacetField> > assocByField = new Dictionary <string, IList <AssociationFacetField> >();

            var seenDims = new JCG.HashSet <string>();

            foreach (IIndexableField field in doc.Fields)
            {
                if (field.IndexableFieldType == FacetField.TYPE)
                {
                    FacetField             facetField = (FacetField)field;
                    FacetsConfig.DimConfig dimConfig  = GetDimConfig(facetField.Dim);
                    if (dimConfig.IsMultiValued == false)
                    {
                        CheckSeen(seenDims, facetField.Dim);
                    }
                    string indexFieldName = dimConfig.IndexFieldName;
                    if (!byField.TryGetValue(indexFieldName, out IList <FacetField> fields))
                    {
                        fields = new List <FacetField>();
                        byField[indexFieldName] = fields;
                    }
                    fields.Add(facetField);
                }

                if (field.IndexableFieldType == SortedSetDocValuesFacetField.TYPE)
                {
                    var facetField = (SortedSetDocValuesFacetField)field;
                    FacetsConfig.DimConfig dimConfig = GetDimConfig(facetField.Dim);
                    if (dimConfig.IsMultiValued == false)
                    {
                        CheckSeen(seenDims, facetField.Dim);
                    }
                    string indexFieldName = dimConfig.IndexFieldName;
                    if (!dvByField.TryGetValue(indexFieldName, out IList <SortedSetDocValuesFacetField> fields))
                    {
                        fields = new List <SortedSetDocValuesFacetField>();
                        dvByField[indexFieldName] = fields;
                    }
                    fields.Add(facetField);
                }

                if (field.IndexableFieldType == AssociationFacetField.TYPE)
                {
                    AssociationFacetField  facetField = (AssociationFacetField)field;
                    FacetsConfig.DimConfig dimConfig  = GetDimConfig(facetField.Dim);
                    if (dimConfig.IsMultiValued == false)
                    {
                        CheckSeen(seenDims, facetField.Dim);
                    }
                    if (dimConfig.IsHierarchical)
                    {
                        throw new ArgumentException("AssociationFacetField cannot be hierarchical (dim=\"" + facetField.Dim + "\")");
                    }
                    if (dimConfig.RequireDimCount)
                    {
                        throw new ArgumentException("AssociationFacetField cannot requireDimCount (dim=\"" + facetField.Dim + "\")");
                    }

                    string indexFieldName = dimConfig.IndexFieldName;
                    if (!assocByField.TryGetValue(indexFieldName, out IList <AssociationFacetField> fields))
                    {
                        fields = new List <AssociationFacetField>();
                        assocByField[indexFieldName] = fields;
                    }
                    fields.Add(facetField);

                    // Best effort: detect mis-matched types in same
                    // indexed field:
                    string type;
                    if (facetField is Int32AssociationFacetField)
                    {
                        type = "int";
                    }
                    else if (facetField is SingleAssociationFacetField)
                    {
                        type = "float";
                    }
                    else
                    {
                        type = "bytes";
                    }
                    // NOTE: not thread safe, but this is just best effort:
                    if (!assocDimTypes.TryGetValue(indexFieldName, out string curType))
                    {
                        assocDimTypes[indexFieldName] = type;
                    }
                    else if (!curType.Equals(type, StringComparison.Ordinal))
                    {
                        throw new ArgumentException("mixing incompatible types of AssocationFacetField (" + curType + " and " + type + ") in indexed field \"" + indexFieldName + "\"; use FacetsConfig to change the indexFieldName for each dimension");
                    }
                }
            }

            Document result = new Document();

            ProcessFacetFields(taxoWriter, byField, result);
            ProcessSSDVFacetFields(dvByField, result);
            ProcessAssocFacetFields(taxoWriter, assocByField, result);

            //System.out.println("add stored: " + addedStoredFields);

            foreach (IIndexableField field in doc.Fields)
            {
                IIndexableFieldType ft = field.IndexableFieldType;
                if (ft != FacetField.TYPE && ft != SortedSetDocValuesFacetField.TYPE && ft != AssociationFacetField.TYPE)
                {
                    result.Add(field);
                }
            }

            return(result);
        }
 /// <summary>
 /// fillTaxonomyCheckPaths adds the categories in the categories[] array,
 /// and asserts that the additions return exactly paths specified in
 /// expectedPaths[]. This is the same add fillTaxonomy() but also checks
 /// the correctness of getParent(), not just addCategory().
 /// Note that this assumes that fillTaxonomyCheckPaths() is called on an empty
 /// taxonomy index. Calling it after something else was already added to the
 /// taxonomy index will surely have this method fail.
 /// </summary>
 public static void FillTaxonomyCheckPaths(ITaxonomyWriter tw)
 {
     for (int i = 0; i < categories.Length; i++)
     {
         int ordinal = tw.AddCategory(new FacetLabel(categories[i]));
         int expectedOrdinal = ExpectedPaths[i][ExpectedPaths[i].Length - 1];
         if (ordinal != expectedOrdinal)
         {
             Fail("For category " + Showcat(categories[i]) + " expected ordinal " + expectedOrdinal + ", but got " + ordinal);
         }
         for (int j = ExpectedPaths[i].Length - 2; j >= 0; j--)
         {
             ordinal = tw.GetParent(ordinal);
             expectedOrdinal = ExpectedPaths[i][j];
             if (ordinal != expectedOrdinal)
             {
                 Fail("For category " + Showcat(categories[i]) + " expected ancestor level " + (ExpectedPaths[i].Length - 1 - j) + " was " + expectedOrdinal + ", but got " + ordinal);
             }
         }
     }
 }
 // After fillTaxonomy returned successfully, checkPaths() checks that
 // the getParent() calls return as expected, from the table
 public static void CheckPaths(ITaxonomyWriter tw)
 {
     for (int i = 0; i < categories.Length; i++)
     {
         int ordinal = ExpectedPaths[i][ExpectedPaths[i].Length - 1];
         for (int j = ExpectedPaths[i].Length - 2; j >= 0; j--)
         {
             ordinal = tw.GetParent(ordinal);
             int expectedOrdinal = ExpectedPaths[i][j];
             if (ordinal != expectedOrdinal)
             {
                 Fail("For category " + Showcat(categories[i]) + " expected ancestor level " + (ExpectedPaths[i].Length - 1 - j) + " was " + expectedOrdinal + ", but got " + ordinal);
             }
         }
         Assert.AreEqual(TaxonomyReader.ROOT_ORDINAL, tw.GetParent(ExpectedPaths[i][0]));
     }
     Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, tw.GetParent(TaxonomyReader.ROOT_ORDINAL));
 }
        private void CheckWriterParent(TaxonomyReader tr, ITaxonomyWriter tw)
        {
            // check that the parent of the root ordinal is the invalid ordinal:
            Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, tw.GetParent(0));

            // check parent of non-root ordinals:
            for (int ordinal = 1; ordinal < tr.Count; ordinal++)
            {
                FacetLabel me = tr.GetPath(ordinal);
                int parentOrdinal = tw.GetParent(ordinal);
                FacetLabel parent = tr.GetPath(parentOrdinal);
                if (parent == null)
                {
                    Fail("Parent of " + ordinal + " is " + parentOrdinal + ", but this is not a valid category.");
                }
                // verify that the parent is indeed my parent, according to the
                // strings
                if (!me.Subpath(me.Length - 1).Equals(parent))
                {
                    Fail("Got parent " + parentOrdinal + " for ordinal " + ordinal + " but categories are " + Showcat(parent) + " and " + Showcat(me) + " respectively.");
                }
            }

            // check parent of of invalid ordinals:
            try
            {
                tw.GetParent(-1);
                Fail("getParent for -1 should throw exception");
            }
            catch (System.IndexOutOfRangeException)
            {
                // ok
            }
            try
            {
                tw.GetParent(TaxonomyReader.INVALID_ORDINAL);
                Fail("getParent for INVALID_ORDINAL should throw exception");
            }
            catch (System.IndexOutOfRangeException)
            {
                // ok
            }
            try
            {
                int parent = tw.GetParent(tr.Count);
                Fail("getParent for getSize() should throw exception, but returned " + parent);
            }
            catch (System.IndexOutOfRangeException)
            {
                // ok
            }
        }
Exemple #23
0
 public void Save(ITaxonomyWriter writer)
 {
     writer.Write(this);
 }
 private static void IndexDocsWithFacetsNoTerms(IndexWriter indexWriter, ITaxonomyWriter taxoWriter, IDictionary<string, int?> expectedCounts)
 {
     Random random = Random();
     int numDocs = AtLeast(random, 2);
     FacetsConfig config = Config;
     for (int i = 0; i < numDocs; i++)
     {
         Document doc = new Document();
         AddFacets(doc, config, false);
         indexWriter.AddDocument(config.Build(taxoWriter, doc));
     }
     indexWriter.Commit(); // flush a segment
 }
 private static void IndexDocsWithFacetsAndSomeTerms(IndexWriter indexWriter, ITaxonomyWriter taxoWriter, IDictionary<string, int?> expectedCounts)
 {
     Random random = Random();
     int numDocs = AtLeast(random, 2);
     FacetsConfig config = Config;
     for (int i = 0; i < numDocs; i++)
     {
         Document doc = new Document();
         bool hasContent = random.NextBoolean();
         if (hasContent)
         {
             AddField(doc);
         }
         AddFacets(doc, config, hasContent);
         indexWriter.AddDocument(config.Build(taxoWriter, doc));
     }
     indexWriter.Commit(); // flush a segment
 }