static TestTaxonomyFacetCounts2() { CATEGORIES_A = new FacetField[NUM_CHILDREN_CP_A]; for (int i = 0; i < NUM_CHILDREN_CP_A; i++) { CATEGORIES_A[i] = new FacetField(CP_A, Convert.ToString(i)); } CATEGORIES_B = new FacetField[NUM_CHILDREN_CP_B]; for (int i = 0; i < NUM_CHILDREN_CP_B; i++) { CATEGORIES_B[i] = new FacetField(CP_B, Convert.ToString(i)); } // NO_PARENTS categories CATEGORIES_C = new FacetField[NUM_CHILDREN_CP_C]; for (int i = 0; i < NUM_CHILDREN_CP_C; i++) { CATEGORIES_C[i] = new FacetField(CP_C, Convert.ToString(i)); } // Multi-level categories CATEGORIES_D = new FacetField[NUM_CHILDREN_CP_D]; for (int i = 0; i < NUM_CHILDREN_CP_D; i++) { string val = Convert.ToString(i); CATEGORIES_D[i] = new FacetField(CP_D, val, val + val); // e.g. D/1/11, D/2/22... } }
public override void Run() { Random random = Random; while (numDocs.DecrementAndGet() > 0) { try { Document doc = new Document(); int numCats = random.Next(3) + 1; // 1-3 while (numCats-- > 0) { FacetField ff = NewCategory(); doc.Add(ff); FacetLabel label = new FacetLabel(ff.Dim, ff.Path); // add all prefixes to values int level = label.Length; while (level > 0) { string s = FacetsConfig.PathToString(label.Components, level); values[s] = s; --level; } } iw.AddDocument(config.Build(tw, doc)); } catch (IOException e) { throw new Exception(e.ToString(), e); } } }
/// <summary> /// Adds a new <see cref="FacetField"/> with the specified <paramref name="dim"/> and /// <paramref name="path"/>. /// </summary> /// <param name="document">This <see cref="Document"/>.</param> /// <param name="dim">Dimension for this field.</param> /// <param name="path">Facet path for this field.</param> /// <returns>The field that was added to this <see cref="Document"/>.</returns> public static FacetField AddFacetField(this Document document, string dim, params string[] path) { var field = new FacetField(dim, path); document.Add(field); return(field); }
private int maxValue; // = maxDocFacets * maxFacetDepth; public override void GetNextFacets(IList <FacetField> facets) { facets.Clear(); int numFacets = 1 + random.Next(maxDocFacets); // at least one facet to each doc for (int i = 0; i < numFacets; i++) { int depth; if (maxFacetDepth == 2) { depth = 2; } else { depth = 2 + random.Next(maxFacetDepth - 2); // depth < 2 is not useful } string dim = random.Next(maxDims).ToString(CultureInfo.InvariantCulture); string[] components = new string[depth - 1]; for (int k = 0; k < depth - 1; k++) { components[k] = random.Next(maxValue).ToString(CultureInfo.InvariantCulture); AddItem(); } FacetField ff = new FacetField(dim, components); facets.Add(ff); AddBytes(ff.ToString().Length); // very rough approximation } }
/// <summary> /// Register <see cref="IFacetField"/> for use within query /// </summary> protected virtual IFacetQueryField FacetInternal(string field, string[] values = null) { var facet = new FacetField(field, values); Fields.Add(facet); return(new FacetQueryField(this, facet)); }
/// <summary> /// Sole constructor. </summary> public SortedSetDocValuesFacetField(string dim, string label) : base("dummy", TYPE) { FacetField.VerifyLabel(label); FacetField.VerifyLabel(dim); this.Dim = dim; this.Label = label; }
public void TestAddFacetField() { FacetField field = null; string[] path = new[] { "thePath0", "thePath1", "thePath2" }; AssertDocumentExtensionAddsToDocument(document => field = document.AddFacetField("theDim", path)); Assert.AreEqual("theDim", field.Dim); Assert.AreEqual(path, field.Path); }
///<inheritdoc/> public IFacetQueryField Facet(string field, string[] values) { var facet = new FacetField(field) { Values = values }; FacetInternal(facet); return(new FacetQueryField(this, facet)); }
/// <summary> /// Adds a new <see cref="FacetField"/> with the specified <paramref name="dim"/> and /// <paramref name="path"/>. /// </summary> /// <param name="document">This <see cref="Document"/>.</param> /// <param name="dim">Dimension for this field.</param> /// <param name="path">Facet path for this field.</param> /// <returns>The field that was added to this <see cref="Document"/>.</returns> /// <exception cref="ArgumentNullException">This <paramref name="document"/> is <c>null</c>. </exception> public static FacetField AddFacetField(this Document document, string dim, params string[] path) { if (document is null) { throw new ArgumentNullException(nameof(document)); } var field = new FacetField(dim, path); document.Add(field); return(field); }
public virtual void TestHugeLabel() { Directory indexDir = NewDirectory(), taxoDir = NewDirectory(); IndexWriter indexWriter = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE, new Cl2oTaxonomyWriterCache(2, 1f, 1)); FacetsConfig config = new FacetsConfig(); // Add one huge label: string bigs = null; int ordinal = -1; int len = FacetLabel.MAX_CATEGORY_PATH_LENGTH - 4; // for the dimension and separator bigs = TestUtil.RandomSimpleString(Random(), len, len); FacetField ff = new FacetField("dim", bigs); FacetLabel cp = new FacetLabel("dim", bigs); ordinal = taxoWriter.AddCategory(cp); Document doc = new Document(); doc.Add(ff); indexWriter.AddDocument(config.Build(taxoWriter, doc)); // Add tiny ones to cause a re-hash for (int i = 0; i < 3; i++) { string s = TestUtil.RandomSimpleString(Random(), 1, 10); taxoWriter.AddCategory(new FacetLabel("dim", s)); doc = new Document(); doc.Add(new FacetField("dim", s)); indexWriter.AddDocument(config.Build(taxoWriter, doc)); } // when too large components were allowed to be added, this resulted in a new added category Assert.AreEqual(ordinal, taxoWriter.AddCategory(cp)); IOUtils.Close(indexWriter, taxoWriter); DirectoryReader indexReader = DirectoryReader.Open(indexDir); var taxoReader = new DirectoryTaxonomyReader(taxoDir); IndexSearcher searcher = new IndexSearcher(indexReader); DrillDownQuery ddq = new DrillDownQuery(new FacetsConfig()); ddq.Add("dim", bigs); Assert.AreEqual(1, searcher.Search(ddq, 10).TotalHits); IOUtils.Close(indexReader, taxoReader, indexDir, taxoDir); }
/// <summary> /// Creates this from <paramref name="dim"/> and <paramref name="path"/> and an /// association /// </summary> public AssociationFacetField(BytesRef assoc, string dim, params string[] path) : base("dummy", TYPE) { FacetField.VerifyLabel(dim); foreach (string label in path) { FacetField.VerifyLabel(label); } this.Dim = dim; this.Assoc = assoc; if (path.Length == 0) { throw new System.ArgumentException("path must have at least one element"); } this.Path = path; }
/// <summary> /// Checks if the specified FacetField is configured; if not, FacetField is configured as hierarchical and multivalued. /// </summary> /// <param name="facetsConfig">The facets configuration.</param> /// <param name="facetField">The facet field.</param> public static void EnsureConfig(this FacetsConfig facetsConfig, FacetField facetField) { if (facetsConfig == null) throw new ArgumentNullException(nameof(facetsConfig)); if (facetField == null) throw new ArgumentNullException(nameof(facetField)); var facetName = facetField.Dim; if (!facetsConfig.Contains(facetName)) { // Configure the FacetField if not already configured. // By default set it to hierarchical, and multi-valued. lock (facetsConfig) { if (!facetsConfig.Contains(facetName)) { facetsConfig.SetHierarchical(facetName, true); facetsConfig.SetMultiValued(facetName, true); } } } }
/// <summary> /// Converts the specified category string to a Lucene FacetField. /// </summary> /// <param name="categoryString">The category.</param> /// <returns></returns> public static FacetField ToLuceneFacetField(this string categoryString) { const string COLON = ":"; const string ESCAPED_COLON = @"\:"; const string ESCAPED_COLON_TEMP_TOKEN = @"<<€€€€€€>>"; const string SLASH = @"/"; const string ESCAPED_SLASH = @"\/"; const string ESCAPED_SLASH_TEMP_TOKEN = @"<<$$$$$$>>"; // Sample categoryStrings: // "Author:Arthur Dent" => facet name is "Author", value is "Arthur Dent" // "Publish Date:2013/Mar/12" => facet name is "Publish Date", value (hierarchical) is "2013/Mar/12" // A category string must have a name and value => {name}:{value} if (String.IsNullOrWhiteSpace(categoryString)) throw new ArgumentNullException(nameof(categoryString)); // Convert \/ to <<$$$$$$>> and \: to <<€€€€€€>> var categoryStringCopy = categoryString.Trim() .Replace(ESCAPED_SLASH, ESCAPED_SLASH_TEMP_TOKEN) .Replace(ESCAPED_COLON, ESCAPED_COLON_TEMP_TOKEN); // Split the string using ":" to get the facet name var facetName = categoryStringCopy.Contains(COLON) ? categoryStringCopy.Split(new[] { COLON }, StringSplitOptions.None).FirstOrDefault() : null; if (String.IsNullOrWhiteSpace(facetName)) throw new SchemaException($"Invalid category string: '{categoryString}'"); facetName = facetName.Trim() .Replace(ESCAPED_SLASH_TEMP_TOKEN, SLASH) .Replace(ESCAPED_COLON, COLON); var facetValueStartIndex = categoryStringCopy.IndexOf(COLON, StringComparison.InvariantCulture) + 1; categoryStringCopy = categoryStringCopy.Substring(facetValueStartIndex); // Split the string using / as separateor to get the facet values var categoryParts = categoryStringCopy.Split(new[] { SLASH }, StringSplitOptions.None); if (categoryParts.Count() >= 1) { var facetValues = categoryParts.Select(s => s.Trim() .Replace(ESCAPED_SLASH_TEMP_TOKEN, SLASH) .Replace(ESCAPED_COLON_TEMP_TOKEN, COLON)) .ToArray(); if (facetValues.Any(s => String.IsNullOrWhiteSpace(s))) throw new SchemaException($"Invalid category string: '{categoryString}'"); var facetField = new FacetField(facetName, facetValues); return facetField; } return null; }
/// <summary> /// Add documents. /// </summary> /// <param name="writer">The index writer.</param> /// <param name="facetWriter">The facet index writer.</param> /// <param name="directoryInfo">The directory information where all the files that are to be added are located.</param> /// <param name="files">The list of files that are to be added.</param> /// <param name="documents">The supported documents search filter, used to indicate what files are to be added.</param> /// <param name="facetField">The facet field information.</param> /// <param name="config">The facet configuration information.</param> public void AddDocuments(Lucene.Net.Index.IndexWriter writer, DirectoryTaxonomyWriter facetWriter, DirectoryInfo directoryInfo, string[] files, SupportedDocumentExtension documents, FacetField facetField, FacetsConfig config) { FieldType pathFieldType = new Lucene.Net.Documents.FieldType() { Indexed = true, Tokenized = false, Stored = true, IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, }; FieldType contentFieldType = new Lucene.Net.Documents.FieldType() { Indexed = true, Tokenized = documents.TokenizeContent, Stored = documents.StoreContent, IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, }; // For each file. for (int i = 0; i < files.Length; i++) { // If the file exists if (File.Exists(files[i])) { Lucene.Net.Documents.Document document = new Lucene.Net.Documents.Document(); try { FileInfo fileInfo = new FileInfo(files[i]); string file = files[i].Replace(directoryInfo.Root.FullName, "").ToLower(); Lucene.Net.Documents.Field path = new Field("path", file.ToLower().Replace("\\", "/"), pathFieldType); Lucene.Net.Documents.Field modified = new Field("modified", fileInfo.LastWriteTime.ToShortDateString() + " " + fileInfo.LastWriteTime.ToShortTimeString(), pathFieldType); // Add the fields. document.Add(facetField); document.Add(path); document.Add(modified); // Create the stream reader. OpenDocument(files[i]); string content = Nequeo.Xml.Document.ExtractContent(_xDocument); // If content exists. if (!String.IsNullOrEmpty(content)) { // Split the white spaces from the text. string[] words = content.Words(); // If words exist. if (words != null && words.Length > 0) { // Add the query for each word. for (int j = 0; j < words.Length; j++) { // Format the word. string word = words[j].ToLower().RemovePunctuationFromStartAndEnd(); // If a word exists. if (!String.IsNullOrEmpty(word)) { Lucene.Net.Documents.Field contentField = new Field("facetcontent", word, contentFieldType); document.Add(contentField); } } } } // Add the document. writer.AddDocument(config.Build(facetWriter, document)); _document.Close(); // Commit after a set number of documents. documents.TotalDocumentSize += fileInfo.Length; if (documents.TotalDocumentSize > documents.MaxDocumentSizePerCommit) { // Commit the index. writer.Commit(); facetWriter.Commit(); documents.TotalDocumentSize = 0; } } catch (Exception) { throw; } finally { CloseDocument(); } } } }
public virtual void TestEmptyNullComponents() { // LUCENE-4724: CategoryPath should not allow empty or null components string[][] components_tests = new string[][] { new string[] { "", "test" }, // empty in the beginning new string[] { "test", "" }, // empty in the end new string[] { "test", "", "foo" }, // empty in the middle new string[] { null, "test" }, // null at the beginning new string[] { "test", null }, // null in the end new string[] { "test", null, "foo" } // null in the middle }; foreach (string[] components in components_tests) { try { Assert.IsNotNull(new FacetLabel(components)); fail("empty or null components should not be allowed: " + Arrays.ToString(components)); } catch (Exception e) when(e.IsIllegalArgumentException()) { // expected } try { _ = new FacetField("dim", components); fail("empty or null components should not be allowed: " + Arrays.ToString(components)); } catch (Exception e) when(e.IsIllegalArgumentException()) { // expected } try { _ = new AssociationFacetField(new BytesRef(), "dim", components); fail("empty or null components should not be allowed: " + Arrays.ToString(components)); } catch (Exception e) when(e.IsIllegalArgumentException()) { // expected } try { _ = new Int32AssociationFacetField(17, "dim", components); fail("empty or null components should not be allowed: " + Arrays.ToString(components)); } catch (Exception e) when(e.IsIllegalArgumentException()) { // expected } try { _ = new SingleAssociationFacetField(17.0f, "dim", components); fail("empty or null components should not be allowed: " + Arrays.ToString(components)); } catch (Exception e) when(e.IsIllegalArgumentException()) { // expected } } try { _ = new FacetField(null, new string[] { "abc" }); fail("empty or null components should not be allowed"); } catch (Exception e) when(e.IsIllegalArgumentException()) { // expected } try { _ = new FacetField("", new string[] { "abc" }); fail("empty or null components should not be allowed"); } catch (Exception e) when(e.IsIllegalArgumentException()) { // expected } try { _ = new Int32AssociationFacetField(17, null, new string[] { "abc" }); fail("empty or null components should not be allowed"); } catch (Exception e) when(e.IsIllegalArgumentException()) { // expected } try { _ = new Int32AssociationFacetField(17, "", new string[] { "abc" }); fail("empty or null components should not be allowed"); } catch (Exception e) when(e.IsIllegalArgumentException()) { // expected } try { _ = new SingleAssociationFacetField(17.0f, null, new string[] { "abc" }); fail("empty or null components should not be allowed"); } catch (Exception e) when(e.IsIllegalArgumentException()) { // expected } try { _ = new SingleAssociationFacetField(17.0f, "", new string[] { "abc" }); fail("empty or null components should not be allowed"); } catch (Exception e) when(e.IsIllegalArgumentException()) { // expected } try { _ = new AssociationFacetField(new BytesRef(), null, new string[] { "abc" }); fail("empty or null components should not be allowed"); } catch (Exception e) when(e.IsIllegalArgumentException()) { // expected } try { _ = new AssociationFacetField(new BytesRef(), "", new string[] { "abc" }); fail("empty or null components should not be allowed"); } catch (Exception e) when(e.IsIllegalArgumentException()) { // expected } try { _ = new SortedSetDocValuesFacetField(null, "abc"); fail("empty or null components should not be allowed"); } catch (Exception e) when(e.IsIllegalArgumentException()) { // expected } try { _ = new SortedSetDocValuesFacetField("", "abc"); fail("empty or null components should not be allowed"); } catch (Exception e) when(e.IsIllegalArgumentException()) { // expected } try { _ = new SortedSetDocValuesFacetField("dim", null); fail("empty or null components should not be allowed"); } catch (Exception e) when(e.IsIllegalArgumentException()) { // expected } try { _ = new SortedSetDocValuesFacetField("dim", ""); fail("empty or null components should not be allowed"); } catch (Exception e) when(e.IsIllegalArgumentException()) { // expected } }