private void checkTaxoWriter(TaxonomyWriter taxoWriter) { if (taxoWriter == null) { throw new ThreadStateException("a non-null TaxonomyWriter must be provided when indexing FacetField or AssociationFacetField"); } }
/// <summary> /// Initializes a new instance of the <see cref="LuceneFacetBuilder"/> class. /// </summary> /// <param name="taxonomyWriter">The taxonomy writer.</param> /// <exception cref="ArgumentNullException"></exception> public LuceneFacetBuilder(TaxonomyWriter taxonomyWriter) { if (taxonomyWriter == null) throw new ArgumentNullException(nameof(taxonomyWriter)); _taxonomyWriter = taxonomyWriter; FacetsConfig = new FacetsConfig(); }
public IndexerThread(IndexWriter w, FacetsConfig config, TaxonomyWriter tw, ReferenceManager <SearcherAndTaxonomy> mgr, int ordLimit, AtomicBoolean stop) { this.w = w; this.config = config; this.tw = tw; this.mgr = mgr; this.ordLimit = ordLimit; this.stop = stop; }
public IndexerThread(IndexWriter w, FacetsConfig config, TaxonomyWriter tw, ReferenceManager<SearcherAndTaxonomy> mgr, int ordLimit, AtomicBoolean stop) { this.w = w; this.config = config; this.tw = tw; this.mgr = mgr; this.ordLimit = ordLimit; this.stop = stop; }
private static void seedIndex(TaxonomyWriter tw, RandomIndexWriter iw, FacetsConfig config) { foreach (FacetField ff in CATEGORIES) { Document doc = new Document(); doc.Add(ff); doc.Add(new TextField("content", "alpha", Field.Store.YES)); iw.AddDocument(config.Build(tw, doc)); } }
private void doTestReadRecreatedTaxonomy(Random random, bool closeReader) { Directory dir = null; TaxonomyWriter tw = null; TaxonomyReader tr = null; // prepare a few categories int n = 10; FacetLabel[] cp = new FacetLabel[n]; for (int i = 0; i < n; i++) { cp[i] = new FacetLabel("a", Convert.ToString(i)); } try { dir = NewDirectory(); tw = new DirectoryTaxonomyWriter(dir); tw.AddCategory(new FacetLabel("a")); tw.Dispose(); tr = new DirectoryTaxonomyReader(dir); int baseNumCategories = tr.Size; for (int i = 0; i < n; i++) { int k = random.Next(n); tw = new DirectoryTaxonomyWriter(dir, IndexWriterConfig.OpenMode_e.CREATE); for (int j = 0; j <= k; j++) { tw.AddCategory(cp[j]); } tw.Dispose(); if (closeReader) { tr.Dispose(true); tr = new DirectoryTaxonomyReader(dir); } else { var newtr = TaxonomyReader.OpenIfChanged(tr); Assert.NotNull(newtr); tr.Dispose(true); tr = newtr; } Assert.AreEqual(baseNumCategories + 1 + k, tr.Size, "Wrong #categories in taxonomy (i=" + i + ", k=" + k + ")"); } } finally { IOUtils.Close(tr as DirectoryTaxonomyReader, tw, dir); } }
private static void IndexDocsWithFacetsNoTerms(IndexWriter indexWriter, TaxonomyWriter taxoWriter, IDictionary<string, int?> expectedCounts) { Random random = Random(); int numDocs = AtLeast(random, 2); FacetsConfig config = Config; for (int i = 0; i < numDocs; i++) { Document doc = new Document(); AddFacets(doc, config, false); indexWriter.AddDocument(config.Build(taxoWriter, doc)); } indexWriter.Commit(); // flush a segment }
public static void TestFlexLuceneRAM(string[] args) { StandardAnalyzer analyzer = new StandardAnalyzer(); FlexLucene.Store.Directory index = (FlexLucene.Store.Directory) new RAMDirectory(); config = new IndexWriterConfig((Analyzer)analyzer); cnf = new FacetsConfig(); cnf.SetIndexFieldName("title", "facet_title"); cnf.SetIndexFieldName("isbn", "facet_isbn"); LuceneTest.taxoDir = (FlexLucene.Store.Directory) new RAMDirectory(); LuceneTest.taxoWriter = (TaxonomyWriter) new FlexLucene.Facet.Taxonomy.Directory.DirectoryTaxonomyWriter(LuceneTest.taxoDir, IndexWriterConfigOpenMode.CREATE); IndexWriter w = new IndexWriter(index, LuceneTest.config); addDoc(w, "Lucene in Action", "9900001"); addDoc(w, "Lucene for Dummies", "9900002"); addDoc(w, "Lucene for Dummies 2", "9900003"); w.close(); String querystr = "isbn:99*"; Query q = new QueryParser("title", (Analyzer)analyzer).Parse(querystr); int hitsPerPage = 10; IndexReader reader = (IndexReader)DirectoryReader.Open(index); IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector collector = TopScoreDocCollector.Create(hitsPerPage); searcher.Search(q, (Collector)collector); ScoreDoc[] hits = collector.TopDocs().ScoreDocs; Console.WriteLine("Found " + hits.Length + " hits."); for (int i = 0; i < hits.Length; ++i) { int docId = hits [i].Doc; Document d = searcher.Doc(docId); Console.WriteLine(i + 1 + ". " + d.Get("isbn") + "\t" + d.Get("title")); } SortedSetDocValuesReaderState state = (SortedSetDocValuesReaderState) new DefaultSortedSetDocValuesReaderState(reader, "facet_isbn"); FacetsCollector fc = new FacetsCollector(); FacetsCollector.Search(searcher, q, 10, (Collector)fc); Facets facets = (Facets) new SortedSetDocValuesFacetCounts(state, fc); FacetResult result = facets.GetTopChildren(10, "isbn", new String[0]); for (int j = 0; j < result.ChildCount; ++j) { LabelAndValue lv = result.LabelValues [j]; Console.WriteLine(String.Format("Label={0}, Value={1}", lv.Label, lv.Value)); } reader.close(); }
private static void IndexDocsWithFacetsAndSomeTerms(IndexWriter indexWriter, TaxonomyWriter taxoWriter, IDictionary<string, int?> expectedCounts) { Random random = Random(); int numDocs = AtLeast(random, 2); FacetsConfig config = Config; for (int i = 0; i < numDocs; i++) { Document doc = new Document(); bool hasContent = random.NextBoolean(); if (hasContent) { AddField(doc); } AddFacets(doc, config, hasContent); indexWriter.AddDocument(config.Build(taxoWriter, doc)); } indexWriter.Commit(); // flush a segment }
private void indexTwoDocs(TaxonomyWriter taxoWriter, IndexWriter indexWriter, FacetsConfig config, bool withContent) { for (int i = 0; i < 2; i++) { Document doc = new Document(); if (withContent) { doc.Add(new StringField("f", "a", Field.Store.NO)); } if (config != null) { doc.Add(new FacetField("A", Convert.ToString(i))); indexWriter.AddDocument(config.Build(taxoWriter, doc)); } else { indexWriter.AddDocument(doc); } } indexWriter.Commit(); }
private void ProcessAssocFacetFields(TaxonomyWriter taxoWriter, IDictionary <string, IList <AssociationFacetField> > byField, Document doc) { foreach (KeyValuePair <string, IList <AssociationFacetField> > ent in byField) { byte[] bytes = new byte[16]; int upto = 0; string indexFieldName = ent.Key; foreach (AssociationFacetField field in ent.Value) { // NOTE: we don't add parents for associations checkTaxoWriter(taxoWriter); FacetLabel label = new FacetLabel(field.dim, field.path); int ordinal = taxoWriter.AddCategory(label); if (upto + 4 > bytes.Length) { bytes = ArrayUtil.Grow(bytes, upto + 4); } // big-endian: bytes[upto++] = (byte)(ordinal >> 24); bytes[upto++] = (byte)(ordinal >> 16); bytes[upto++] = (byte)(ordinal >> 8); bytes[upto++] = (byte)ordinal; if (upto + field.assoc.Length > bytes.Length) { bytes = ArrayUtil.Grow(bytes, upto + field.assoc.Length); } Array.Copy(field.assoc.Bytes, field.assoc.Offset, bytes, upto, field.assoc.Length); upto += field.assoc.Length; // Drill down: for (int i = 1; i <= label.Length; i++) { doc.Add(new StringField(indexFieldName, PathToString(label.Components, i), Field.Store.NO)); } } doc.Add(new BinaryDocValuesField(indexFieldName, new BytesRef(bytes, 0, upto))); } }
public DocumentIndexer() { if (!System.IO.Directory.Exists(Preferences.Instance.MainIndexFolder)) { logger.Info("Creating main index folder: '{0}'", Preferences.Instance.MainIndexFolder); System.IO.Directory.CreateDirectory(Preferences.Instance.MainIndexFolder); } else { logger.Info("Updating index at '{0}'", Preferences.Instance.MainIndexFolder); } var config = new IndexWriterConfig(FindAPhotoAnalyzers.IndexVersion, FindAPhotoAnalyzers.ForIndexing()); mainWriter = new IndexWriter( FSDirectory.open(new java.io.File(Preferences.Instance.MainIndexFolder)), config); taxonomyWriter = new DirectoryTaxonomyWriter( FSDirectory.open(new java.io.File(Preferences.Instance.FacetIndexFolder)), IndexWriterConfig.OpenMode.CREATE_OR_APPEND); indexPreferences = new IndexPreferences(mainWriter); thumbnailIndexer = new ThumbnailIndexer(indexPreferences); startTime = DateTime.Now; }
/// <summary> /// fillTaxonomy adds the categories in the categories[] array, and asserts /// that the additions return exactly the ordinals (in the past - paths) /// specified in expectedPaths[]. /// Note that this assumes that fillTaxonomy() is called on an empty taxonomy /// index. Calling it after something else was already added to the taxonomy /// index will surely have this method fail. /// </summary> public static void FillTaxonomy(TaxonomyWriter tw) { for (int i = 0; i < categories.Length; i++) { int ordinal = tw.AddCategory(new FacetLabel(categories[i])); int expectedOrdinal = ExpectedPaths[i][ExpectedPaths[i].Length - 1]; if (ordinal != expectedOrdinal) { Fail("For category " + Showcat(categories[i]) + " expected ordinal " + expectedOrdinal + ", but got " + ordinal); } } }
// After fillTaxonomy returned successfully, checkPaths() checks that // the getParent() calls return as expected, from the table public static void CheckPaths(TaxonomyWriter tw) { for (int i = 0; i < categories.Length; i++) { int ordinal = ExpectedPaths[i][ExpectedPaths[i].Length - 1]; for (int j = ExpectedPaths[i].Length - 2; j >= 0; j--) { ordinal = tw.GetParent(ordinal); int expectedOrdinal = ExpectedPaths[i][j]; if (ordinal != expectedOrdinal) { Fail("For category " + Showcat(categories[i]) + " expected ancestor level " + (ExpectedPaths[i].Length - 1 - j) + " was " + expectedOrdinal + ", but got " + ordinal); } } Assert.AreEqual(TaxonomyReader.ROOT_ORDINAL, tw.GetParent(ExpectedPaths[i][0])); } Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, tw.GetParent(TaxonomyReader.ROOT_ORDINAL)); }
private void CheckWriterParent(TaxonomyReader tr, TaxonomyWriter tw) { // check that the parent of the root ordinal is the invalid ordinal: Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, tw.GetParent(0)); // check parent of non-root ordinals: for (int ordinal = 1; ordinal < tr.Size; ordinal++) { FacetLabel me = tr.GetPath(ordinal); int parentOrdinal = tw.GetParent(ordinal); FacetLabel parent = tr.GetPath(parentOrdinal); if (parent == null) { Fail("Parent of " + ordinal + " is " + parentOrdinal + ", but this is not a valid category."); } // verify that the parent is indeed my parent, according to the // strings if (!me.Subpath(me.Length - 1).Equals(parent)) { Fail("Got parent " + parentOrdinal + " for ordinal " + ordinal + " but categories are " + Showcat(parent) + " and " + Showcat(me) + " respectively."); } } // check parent of of invalid ordinals: try { tw.GetParent(-1); Fail("getParent for -1 should throw exception"); } catch (System.IndexOutOfRangeException) { // ok } try { tw.GetParent(TaxonomyReader.INVALID_ORDINAL); Fail("getParent for INVALID_ORDINAL should throw exception"); } catch (System.IndexOutOfRangeException) { // ok } try { int parent = tw.GetParent(tr.Size); Fail("getParent for getSize() should throw exception, but returned " + parent); } catch (System.IndexOutOfRangeException) { // ok } }
/// <summary> /// Translates any added <seealso cref="FacetField"/>s into normal fields for indexing. /// /// <para> /// <b>NOTE:</b> you should add the returned document to IndexWriter, not the /// input one! /// </para> /// </summary> public virtual Document Build(TaxonomyWriter taxoWriter, Document doc) { // Find all FacetFields, collated by the actual field: IDictionary <string, IList <FacetField> > byField = new Dictionary <string, IList <FacetField> >(); // ... and also all SortedSetDocValuesFacetFields: IDictionary <string, IList <SortedSetDocValuesFacetField> > dvByField = new Dictionary <string, IList <SortedSetDocValuesFacetField> >(); // ... and also all AssociationFacetFields IDictionary <string, IList <AssociationFacetField> > assocByField = new Dictionary <string, IList <AssociationFacetField> >(); var seenDims = new HashSet <string>(); foreach (IndexableField field in doc.Fields) { if (field.FieldType == FacetField.TYPE) { FacetField facetField = (FacetField)field; FacetsConfig.DimConfig dimConfig = GetDimConfig(facetField.dim); if (dimConfig.MultiValued == false) { CheckSeen(seenDims, facetField.dim); } string indexFieldName = dimConfig.IndexFieldName; IList <FacetField> fields; if (!byField.TryGetValue(indexFieldName, out fields)) { fields = new List <FacetField>(); byField[indexFieldName] = fields; } fields.Add(facetField); } if (field.FieldType == SortedSetDocValuesFacetField.TYPE) { var facetField = (SortedSetDocValuesFacetField)field; FacetsConfig.DimConfig dimConfig = GetDimConfig(facetField.Dim); if (dimConfig.MultiValued == false) { CheckSeen(seenDims, facetField.Dim); } string indexFieldName = dimConfig.IndexFieldName; IList <SortedSetDocValuesFacetField> fields; if (!dvByField.TryGetValue(indexFieldName, out fields)) { fields = new List <SortedSetDocValuesFacetField>(); dvByField[indexFieldName] = fields; } fields.Add(facetField); } if (field.FieldType == AssociationFacetField.TYPE) { AssociationFacetField facetField = (AssociationFacetField)field; FacetsConfig.DimConfig dimConfig = GetDimConfig(facetField.dim); if (dimConfig.MultiValued == false) { CheckSeen(seenDims, facetField.dim); } if (dimConfig.Hierarchical) { throw new System.ArgumentException("AssociationFacetField cannot be hierarchical (dim=\"" + facetField.dim + "\")"); } if (dimConfig.RequireDimCount) { throw new System.ArgumentException("AssociationFacetField cannot requireDimCount (dim=\"" + facetField.dim + "\")"); } string indexFieldName = dimConfig.IndexFieldName; IList <AssociationFacetField> fields; if (!assocByField.TryGetValue(indexFieldName, out fields)) { fields = new List <AssociationFacetField>(); assocByField[indexFieldName] = fields; } fields.Add(facetField); // Best effort: detect mis-matched types in same // indexed field: string type; if (facetField is IntAssociationFacetField) { type = "int"; } else if (facetField is FloatAssociationFacetField) { type = "float"; } else { type = "bytes"; } // NOTE: not thread safe, but this is just best effort: string curType; if (!assocDimTypes.TryGetValue(indexFieldName, out curType)) { assocDimTypes[indexFieldName] = type; } else if (!curType.Equals(type)) { throw new System.ArgumentException("mixing incompatible types of AssocationFacetField (" + curType + " and " + type + ") in indexed field \"" + indexFieldName + "\"; use FacetsConfig to change the indexFieldName for each dimension"); } } } Document result = new Document(); ProcessFacetFields(taxoWriter, byField, result); processSSDVFacetFields(dvByField, result); ProcessAssocFacetFields(taxoWriter, assocByField, result); //System.out.println("add stored: " + addedStoredFields); foreach (IndexableField field in doc.Fields) { IndexableFieldType ft = field.FieldType; if (ft != FacetField.TYPE && ft != SortedSetDocValuesFacetField.TYPE && ft != AssociationFacetField.TYPE) { result.Add(field); } } return(result); }
private void ProcessFacetFields(TaxonomyWriter taxoWriter, IDictionary <string, IList <FacetField> > byField, Document doc) { foreach (KeyValuePair <string, IList <FacetField> > ent in byField) { string indexFieldName = ent.Key; //System.out.println(" indexFieldName=" + indexFieldName + " fields=" + ent.getValue()); IntsRef ordinals = new IntsRef(32); foreach (FacetField facetField in ent.Value) { FacetsConfig.DimConfig ft = GetDimConfig(facetField.dim); if (facetField.path.Length > 1 && ft.Hierarchical == false) { throw new System.ArgumentException("dimension \"" + facetField.dim + "\" is not hierarchical yet has " + facetField.path.Length + " components"); } FacetLabel cp = new FacetLabel(facetField.dim, facetField.path); checkTaxoWriter(taxoWriter); int ordinal = taxoWriter.AddCategory(cp); if (ordinals.Length == ordinals.Ints.Length) { ordinals.Grow(ordinals.Length + 1); } ordinals.Ints[ordinals.Length++] = ordinal; //System.out.println("ords[" + (ordinals.length-1) + "]=" + ordinal); //System.out.println(" add cp=" + cp); if (ft.MultiValued && (ft.Hierarchical || ft.RequireDimCount)) { //System.out.println(" add parents"); // Add all parents too: int parent = taxoWriter.GetParent(ordinal); while (parent > 0) { if (ordinals.Ints.Length == ordinals.Length) { ordinals.Grow(ordinals.Length + 1); } ordinals.Ints[ordinals.Length++] = parent; parent = taxoWriter.GetParent(parent); } if (ft.RequireDimCount == false) { // Remove last (dimension) ord: ordinals.Length--; } } // Drill down: for (int i = 1; i <= cp.Length; i++) { doc.Add(new StringField(indexFieldName, PathToString(cp.Components, i), Field.Store.NO)); } } // Facet counts: // DocValues are considered stored fields: doc.Add(new BinaryDocValuesField(indexFieldName, DedupAndEncode(ordinals))); } }
/// <summary> /// fillTaxonomyCheckPaths adds the categories in the categories[] array, /// and asserts that the additions return exactly paths specified in /// expectedPaths[]. This is the same add fillTaxonomy() but also checks /// the correctness of getParent(), not just addCategory(). /// Note that this assumes that fillTaxonomyCheckPaths() is called on an empty /// taxonomy index. Calling it after something else was already added to the /// taxonomy index will surely have this method fail. /// </summary> public static void FillTaxonomyCheckPaths(TaxonomyWriter tw) { for (int i = 0; i < categories.Length; i++) { int ordinal = tw.AddCategory(new FacetLabel(categories[i])); int expectedOrdinal = ExpectedPaths[i][ExpectedPaths[i].Length - 1]; if (ordinal != expectedOrdinal) { Fail("For category " + Showcat(categories[i]) + " expected ordinal " + expectedOrdinal + ", but got " + ordinal); } for (int j = ExpectedPaths[i].Length - 2; j >= 0; j--) { ordinal = tw.GetParent(ordinal); expectedOrdinal = ExpectedPaths[i][j]; if (ordinal != expectedOrdinal) { Fail("For category " + Showcat(categories[i]) + " expected ancestor level " + (ExpectedPaths[i].Length - 1 - j) + " was " + expectedOrdinal + ", but got " + ordinal); } } } }
public void Dispose() { DocsWriter.Dispose(); TaxonomyWriter.Dispose(); }