public static void BeforeClass() { Directory = NewDirectory(); Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, analyzer); Documents.Document doc = new Documents.Document(); doc.Add(NewTextField("field", "one two three four five", Field.Store.YES)); doc.Add(NewTextField("repeated", "this is a repeated field - first part", Field.Store.YES)); IndexableField repeatedField = NewTextField("repeated", "second part of a repeated field", Field.Store.YES); doc.Add(repeatedField); doc.Add(NewTextField("palindrome", "one two three two one", Field.Store.YES)); writer.AddDocument(doc); doc = new Documents.Document(); doc.Add(NewTextField("nonexist", "phrase exist notexist exist found", Field.Store.YES)); writer.AddDocument(doc); doc = new Documents.Document(); doc.Add(NewTextField("nonexist", "phrase exist notexist exist found", Field.Store.YES)); writer.AddDocument(doc); Reader = writer.Reader; writer.Dispose(); Searcher = NewSearcher(Reader); }
public void Inform(IResourceLoader loader) { TokenizerFactory factory = tokenizerFactory == null ? null : LoadTokenizerFactory(loader, tokenizerFactory); Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this, factory); try { string formatClass = format; if (format == null || format.Equals("solr")) { formatClass = typeof(SolrSynonymParser).AssemblyQualifiedName; } else if (format.Equals("wordnet")) { formatClass = typeof(WordnetSynonymParser).AssemblyQualifiedName; } // TODO: expose dedup as a parameter? map = LoadSynonyms(loader, formatClass, true, analyzer); } catch (Exception e) { throw new IOException("Error parsing synonyms file:", e); } }
public virtual void TestCrossPlaneNormalization() { var analyzer = new AnalyzerAnonymousInnerClassHelper(); var num = 1000 * RANDOM_MULTIPLIER; for (var i = 0; i < num; i++) { var s = TestUtil.RandomUnicodeString(Random()); var ts = analyzer.TokenStream("foo", s); try { ts.Reset(); var offsetAtt = ts.AddAttribute<IOffsetAttribute>(); while (ts.IncrementToken()) { var highlightedText = s.Substring(offsetAtt.StartOffset(), offsetAtt.EndOffset() - offsetAtt.StartOffset()); for (int j = 0, cp = 0; j < highlightedText.Length; j += Character.CharCount(cp)) { cp = char.ConvertToUtf32(highlightedText, j); assertTrue("non-letter:" + cp.ToString("x"), Character.IsLetter(cp)); } } ts.End(); } finally { IOUtils.CloseWhileHandlingException(ts); } } // just for fun CheckRandomData(Random(), analyzer, num); }
/// <summary> /// blast some random strings through the analyzer </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testRandomStrings() throws Exception public virtual void testRandomStrings() { Analyzer a = new AnalyzerAnonymousInnerClassHelper(this); checkRandomData(random(), a, 1000 * RANDOM_MULTIPLIER); Analyzer b = new AnalyzerAnonymousInnerClassHelper2(this); checkRandomData(random(), b, 1000 * RANDOM_MULTIPLIER); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testEmptyTerm() throws java.io.IOException public virtual void testEmptyTerm() { foreach (String lang in SNOWBALL_LANGS) { Analyzer a = new AnalyzerAnonymousInnerClassHelper(this); checkOneTerm(a, "", ""); } }
public virtual void TestCharFilters() { Analyzer a = new AnalyzerAnonymousInnerClassHelper(this); AssertAnalyzesTo(a, "ab", new string[] { "aab" }, new int[] { 0 }, new int[] { 2 }); // now wrap in PFAW PerFieldAnalyzerWrapper p = new PerFieldAnalyzerWrapper(a, new Dictionary<string, Analyzer>()); AssertAnalyzesTo(p, "ab", new string[] { "aab" }, new int[] { 0 }, new int[] { 2 }); }
public virtual void Test() { Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this); AssertAnalyzesTo(analyzer, "aab", new string[] { "aab" }, new int[] { 0 }, new int[] { 3 }); AssertAnalyzesTo(analyzer, "aabaa", new string[] { "aabaa" }, new int[] { 0 }, new int[] { 5 }); AssertAnalyzesTo(analyzer, "aabcdefgaa", new string[] { "aabcdefgaa" }, new int[] { 0 }, new int[] { 10 }); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testCharFilters() throws Exception public virtual void testCharFilters() { Analyzer a = new AnalyzerAnonymousInnerClassHelper(this); assertAnalyzesTo(a, "ab", new string[] {"aab"}, new int[] {0}, new int[] {2}); // now wrap in PFAW PerFieldAnalyzerWrapper p = new PerFieldAnalyzerWrapper(a, System.Linq.Enumerable.Empty<string, Analyzer>()); assertAnalyzesTo(p, "ab", new string[] {"aab"}, new int[] {0}, new int[] {2}); }
public virtual void TestRandomStrings() { ISet<string> words = new HashSet<string>(); words.Add("a"); words.Add("b"); Analyzer a = new AnalyzerAnonymousInnerClassHelper(this, words); CheckRandomData(Random(), a, 1000 * RANDOM_MULTIPLIER); }
public virtual void TestLetterAscii() { Random random = Random(); Analyzer left = new MockAnalyzer(random, jvmLetter, false); Analyzer right = new AnalyzerAnonymousInnerClassHelper(this); for (int i = 0; i < 1000; i++) { string s = TestUtil.RandomSimpleString(random); assertEquals(s, left.TokenStream("foo", newStringReader(s)), right.TokenStream("foo", newStringReader(s))); } }
/// <summary> /// For the supplied language, run the stemmer against all strings in voc.txt /// The output should be the same as the string in output.txt /// </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: private void assertCorrectOutput(final String snowballLanguage, String dataDirectory) throws java.io.IOException //JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET: private void assertCorrectOutput(string snowballLanguage, string dataDirectory) { if (VERBOSE) { Console.WriteLine("checking snowball language: " + snowballLanguage); } Analyzer a = new AnalyzerAnonymousInnerClassHelper(this, snowballLanguage); assertVocabulary(a, getDataFile("TestSnowballVocabData.zip"), dataDirectory + "/voc.txt", dataDirectory + "/output.txt"); }
/// <summary> /// blast some random strings through the analyzer </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testRandomStrings() throws Exception public virtual void testRandomStrings() { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final java.util.Set<String> words = new java.util.HashSet<>(); ISet<string> words = new HashSet<string>(); words.Add("a"); words.Add("b"); Analyzer a = new AnalyzerAnonymousInnerClassHelper(this, words); checkRandomData(random(), a, 1000 * RANDOM_MULTIPLIER); }
public virtual void Test() { CharArraySet cas = new CharArraySet(TEST_VERSION_CURRENT, 3, false); cas.add("jjp"); cas.add("wlmwoknt"); cas.add("tcgyreo"); NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder(); builder.Add("mtqlpi", ""); builder.Add("mwoknt", "jjp"); builder.Add("tcgyreo", "zpfpajyws"); NormalizeCharMap map = builder.Build(); Analyzer a = new AnalyzerAnonymousInnerClassHelper(this, cas, map); CheckAnalysisConsistency(Random(), a, false, "wmgddzunizdomqyj"); }
public virtual void TestMaxPosition2() { foreach (bool consumeAll in new bool[] { true, false }) { Analyzer a = new AnalyzerAnonymousInnerClassHelper(consumeAll); // don't use assertAnalyzesTo here, as the end offset is not the end of the string (unless consumeAll is true, in which case its correct)! AssertTokenStreamContents(a.TokenStream("dummy", "1 2 3 4 5"), new string[] { "1", "2" }, new int[] { 0, 3 }, new int[] { 1, 4 }, consumeAll ? 16 : (int?)null); AssertTokenStreamContents(a.TokenStream("dummy", new StringReader("1 2 3 4 5")), new string[] { "1", "2" }, new int[] { 0, 2 }, new int[] { 1, 3 }, consumeAll ? 9 : (int?)null); // less than the limit, ensure we behave correctly AssertTokenStreamContents(a.TokenStream("dummy", "1 "), new string[] { "1" }, new int[] { 0 }, new int[] { 1 }, consumeAll ? 3 : (int?)null); // equal to limit AssertTokenStreamContents(a.TokenStream("dummy", "1 2 "), new string[] { "1", "2" }, new int[] { 0, 3 }, new int[] { 1, 4 }, consumeAll ? 6 : (int?)null); } }
public virtual void TestSynonyms() { WordnetSynonymParser parser = new WordnetSynonymParser(true, true, new MockAnalyzer(Random())); parser.Parse(new StringReader(synonymsFile)); SynonymMap map = parser.Build(); Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this, map); /* all expansions */ AssertAnalyzesTo(analyzer, "Lost in the woods", new string[] { "Lost", "in", "the", "woods", "wood", "forest" }, new int[] { 0, 5, 8, 12, 12, 12 }, new int[] { 4, 7, 11, 17, 17, 17 }, new int[] { 1, 1, 1, 1, 0, 0 }); /* single quote */ AssertAnalyzesTo(analyzer, "king", new string[] { "king", "baron" }); /* multi words */ AssertAnalyzesTo(analyzer, "king's evil", new string[] { "king's", "king's", "evil", "meany" }); }
/// <summary> /// test that offsets are correct when mappingcharfilter is previously applied </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testChangedOffsets() throws java.io.IOException public virtual void testChangedOffsets() { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.charfilter.NormalizeCharMap.Builder builder = new org.apache.lucene.analysis.charfilter.NormalizeCharMap.Builder(); NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder(); builder.add("a", "一二"); builder.add("b", "二三"); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.charfilter.NormalizeCharMap norm = builder.build(); NormalizeCharMap norm = builder.build(); Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this, norm); assertAnalyzesTo(analyzer, "ab", new string[] {"一二", "二二", "二三"}, new int[] {0, 0, 1}, new int[] {1, 1, 2}); // note: offsets are strange since this is how the charfilter maps them... // before bigramming, the 4 tokens look like: // { 0, 0, 1, 1 }, // { 0, 1, 1, 2 } }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testSynonyms() throws Exception public virtual void testSynonyms() { WordnetSynonymParser parser = new WordnetSynonymParser(true, true, new MockAnalyzer(random())); parser.parse(new StringReader(synonymsFile)); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final SynonymMap map = parser.build(); SynonymMap map = parser.build(); Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this, map); /* all expansions */ assertAnalyzesTo(analyzer, "Lost in the woods", new string[] {"Lost", "in", "the", "woods", "wood", "forest"}, new int[] {0, 5, 8, 12, 12, 12}, new int[] {4, 7, 11, 17, 17, 17}, new int[] {1, 1, 1, 1, 0, 0}); /* single quote */ assertAnalyzesTo(analyzer, "king", new string[] {"king", "baron"}); /* multi words */ assertAnalyzesTo(analyzer, "king's evil", new string[] {"king's", "king's", "evil", "meany"}); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void test() throws Exception public virtual void test() { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.util.CharArraySet cas = new org.apache.lucene.analysis.util.CharArraySet(TEST_VERSION_CURRENT, 3, false); CharArraySet cas = new CharArraySet(TEST_VERSION_CURRENT, 3, false); cas.add("jjp"); cas.add("wlmwoknt"); cas.add("tcgyreo"); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.charfilter.NormalizeCharMap.Builder builder = new org.apache.lucene.analysis.charfilter.NormalizeCharMap.Builder(); NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder(); builder.add("mtqlpi", ""); builder.add("mwoknt", "jjp"); builder.add("tcgyreo", "zpfpajyws"); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.charfilter.NormalizeCharMap map = builder.build(); NormalizeCharMap map = builder.build(); Analyzer a = new AnalyzerAnonymousInnerClassHelper(this, cas, map); checkAnalysisConsistency(random(), a, false, "wmgddzunizdomqyj"); }
//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes: //ORIGINAL LINE: @Test public void testPositionIncrements() throws Exception //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: public virtual void testPositionIncrements() { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | CATENATE_ALL | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS | STEM_ENGLISH_POSSESSIVE; int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | CATENATE_ALL | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS | STEM_ENGLISH_POSSESSIVE; //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.util.CharArraySet protWords = new org.apache.lucene.analysis.util.CharArraySet(TEST_VERSION_CURRENT, new HashSet<>(Arrays.asList("NUTCH")), false); CharArraySet protWords = new CharArraySet(TEST_VERSION_CURRENT, new HashSet<>("NUTCH"), false); /* analyzer that uses whitespace + wdf */ Analyzer a = new AnalyzerAnonymousInnerClassHelper(this, flags, protWords); /* in this case, works as expected. */ assertAnalyzesTo(a, "LUCENE / SOLR", new string[] {"LUCENE", "SOLR"}, new int[] {0, 9}, new int[] {6, 13}, new int[] {1, 1}); /* only in this case, posInc of 2 ?! */ assertAnalyzesTo(a, "LUCENE / solR", new string[] {"LUCENE", "sol", "solR", "R"}, new int[] {0, 9, 9, 12}, new int[] {6, 12, 13, 13}, new int[] {1, 1, 0, 1}); assertAnalyzesTo(a, "LUCENE / NUTCH SOLR", new string[] {"LUCENE", "NUTCH", "SOLR"}, new int[] {0, 9, 15}, new int[] {6, 14, 19}, new int[] {1, 1, 1}); /* analyzer that will consume tokens with large position increments */ Analyzer a2 = new AnalyzerAnonymousInnerClassHelper2(this, flags, protWords); /* increment of "largegap" is preserved */ assertAnalyzesTo(a2, "LUCENE largegap SOLR", new string[] {"LUCENE", "largegap", "SOLR"}, new int[] {0, 7, 16}, new int[] {6, 15, 20}, new int[] {1, 10, 1}); /* the "/" had a position increment of 10, where did it go?!?!! */ assertAnalyzesTo(a2, "LUCENE / SOLR", new string[] {"LUCENE", "SOLR"}, new int[] {0, 9}, new int[] {6, 13}, new int[] {1, 11}); /* in this case, the increment of 10 from the "/" is carried over */ assertAnalyzesTo(a2, "LUCENE / solR", new string[] {"LUCENE", "sol", "solR", "R"}, new int[] {0, 9, 9, 12}, new int[] {6, 12, 13, 13}, new int[] {1, 11, 0, 1}); assertAnalyzesTo(a2, "LUCENE / NUTCH SOLR", new string[] {"LUCENE", "NUTCH", "SOLR"}, new int[] {0, 9, 15}, new int[] {6, 14, 19}, new int[] {1, 11, 1}); Analyzer a3 = new AnalyzerAnonymousInnerClassHelper3(this, flags, protWords); assertAnalyzesTo(a3, "lucene.solr", new string[] {"lucene", "lucenesolr", "solr"}, new int[] {0, 0, 7}, new int[] {6, 11, 11}, new int[] {1, 0, 1}); /* the stopword should add a gap here */ assertAnalyzesTo(a3, "the lucene.solr", new string[] {"lucene", "lucenesolr", "solr"}, new int[] {4, 4, 11}, new int[] {10, 15, 15}, new int[] {2, 0, 1}); }
/// <summary> /// blast some random strings through the analyzer </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testRandomStrings() throws Exception public virtual void testRandomStrings() { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int numIters = atLeast(10); int numIters = atLeast(10); for (int i = 0; i < numIters; i++) { SynonymMap.Builder b = new SynonymMap.Builder(random().nextBoolean()); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int numEntries = atLeast(10); int numEntries = atLeast(10); for (int j = 0; j < numEntries; j++) { add(b, randomNonEmptyString(), randomNonEmptyString(), random().nextBoolean()); } //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.synonym.SynonymMap map = b.build(); SynonymMap map = b.build(); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final boolean ignoreCase = random().nextBoolean(); bool ignoreCase = random().nextBoolean(); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.Analyzer analyzer = new org.apache.lucene.analysis.Analyzer() Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this, map, ignoreCase); checkRandomData(random(), analyzer, 200); } }
public virtual void TestRandomStrings() { int numIters = AtLeast(10); for (int i = 0; i < numIters; i++) { SynonymMap.Builder b = new SynonymMap.Builder(Random().nextBoolean()); int numEntries = AtLeast(10); for (int j = 0; j < numEntries; j++) { Add(b, RandomNonEmptyString(), RandomNonEmptyString(), Random().nextBoolean()); } SynonymMap map = b.Build(); bool ignoreCase = Random().nextBoolean(); Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this, map, ignoreCase); CheckRandomData(Random(), analyzer, 200); } }
public virtual void TestFirstPosInc() { Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this); AssertAnalyzesTo(analyzer, "the quick brown fox", new string[] { "hte", "quick", "brown", "fox" }, new int[] { 1, 1, 1, 1 }); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testEmptyTerm() throws java.io.IOException public virtual void testEmptyTerm() { Analyzer a = new AnalyzerAnonymousInnerClassHelper(this); checkOneTerm(a, "", ""); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testRandom() throws Exception public virtual void testRandom() { Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this); int numRounds = RANDOM_MULTIPLIER * 10000; checkRandomData(random(), analyzer, numRounds); }
public virtual void TestRandomStrings() { Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this); CheckRandomData(Random(), analyzer, 1000 * RANDOM_MULTIPLIER); }
public virtual void Test() { Directory dir = NewDirectory(); Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this, Analyzer.PER_FIELD_REUSE_STRATEGY); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); iwc.SetCodec(TestUtil.AlwaysPostingsFormat(new Lucene41PostingsFormat())); // TODO we could actually add more fields implemented with different PFs // or, just put this test into the usual rotation? RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, (IndexWriterConfig)iwc.Clone()); Document doc = new Document(); FieldType docsOnlyType = new FieldType(TextField.TYPE_NOT_STORED); // turn this on for a cross-check docsOnlyType.StoreTermVectors = true; docsOnlyType.IndexOptions = FieldInfo.IndexOptions.DOCS_ONLY; FieldType docsAndFreqsType = new FieldType(TextField.TYPE_NOT_STORED); // turn this on for a cross-check docsAndFreqsType.StoreTermVectors = true; docsAndFreqsType.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS; FieldType positionsType = new FieldType(TextField.TYPE_NOT_STORED); // turn these on for a cross-check positionsType.StoreTermVectors = true; positionsType.StoreTermVectorPositions = true; positionsType.StoreTermVectorOffsets = true; positionsType.StoreTermVectorPayloads = true; FieldType offsetsType = new FieldType(positionsType); offsetsType.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; Field field1 = new Field("field1docs", "", docsOnlyType); Field field2 = new Field("field2freqs", "", docsAndFreqsType); Field field3 = new Field("field3positions", "", positionsType); Field field4 = new Field("field4offsets", "", offsetsType); Field field5 = new Field("field5payloadsFixed", "", positionsType); Field field6 = new Field("field6payloadsVariable", "", positionsType); Field field7 = new Field("field7payloadsFixedOffsets", "", offsetsType); Field field8 = new Field("field8payloadsVariableOffsets", "", offsetsType); doc.Add(field1); doc.Add(field2); doc.Add(field3); doc.Add(field4); doc.Add(field5); doc.Add(field6); doc.Add(field7); doc.Add(field8); for (int i = 0; i < MAXDOC; i++) { string stringValue = Convert.ToString(i) + " verycommon " + English.IntToEnglish(i).Replace('-', ' ') + " " + TestUtil.RandomSimpleString(Random()); field1.StringValue = stringValue; field2.StringValue = stringValue; field3.StringValue = stringValue; field4.StringValue = stringValue; field5.StringValue = stringValue; field6.StringValue = stringValue; field7.StringValue = stringValue; field8.StringValue = stringValue; iw.AddDocument(doc); } iw.Dispose(); Verify(dir); TestUtil.CheckIndex(dir); // for some extra coverage, checkIndex before we forceMerge iwc.SetOpenMode(OpenMode_e.APPEND); IndexWriter iw2 = new IndexWriter(dir, (IndexWriterConfig)iwc.Clone()); iw2.ForceMerge(1); iw2.Dispose(); Verify(dir); dir.Dispose(); }
public virtual void TestSetPosition() { Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this); Directory store = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, store, analyzer); Document d = new Document(); d.Add(NewTextField("field", "bogus", Field.Store.YES)); writer.AddDocument(d); IndexReader reader = writer.GetReader(); writer.Dispose(); IndexSearcher searcher = NewSearcher(reader); DocsAndPositionsEnum pos = MultiFields.GetTermPositionsEnum(searcher.IndexReader, MultiFields.GetLiveDocs(searcher.IndexReader), "field", new BytesRef("1")); pos.NextDoc(); // first token should be at position 0 Assert.AreEqual(0, pos.NextPosition()); pos = MultiFields.GetTermPositionsEnum(searcher.IndexReader, MultiFields.GetLiveDocs(searcher.IndexReader), "field", new BytesRef("2")); pos.NextDoc(); // second token should be at position 2 Assert.AreEqual(2, pos.NextPosition()); PhraseQuery q; ScoreDoc[] hits; q = new PhraseQuery(); q.Add(new Term("field", "1")); q.Add(new Term("field", "2")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // same as previous, just specify positions explicitely. q = new PhraseQuery(); q.Add(new Term("field", "1"), 0); q.Add(new Term("field", "2"), 1); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // specifying correct positions should find the phrase. q = new PhraseQuery(); q.Add(new Term("field", "1"), 0); q.Add(new Term("field", "2"), 2); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "2")); q.Add(new Term("field", "3")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "3")); q.Add(new Term("field", "4")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // phrase query would find it when correct positions are specified. q = new PhraseQuery(); q.Add(new Term("field", "3"), 0); q.Add(new Term("field", "4"), 0); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); // phrase query should fail for non existing searched term // even if there exist another searched terms in the same searched position. q = new PhraseQuery(); q.Add(new Term("field", "3"), 0); q.Add(new Term("field", "9"), 0); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // multi-phrase query should succed for non existing searched term // because there exist another searched terms in the same searched position. MultiPhraseQuery mq = new MultiPhraseQuery(); mq.Add(new Term[] { new Term("field", "3"), new Term("field", "9") }, 0); hits = searcher.Search(mq, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "2")); q.Add(new Term("field", "4")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "3")); q.Add(new Term("field", "5")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "4")); q.Add(new Term("field", "5")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "2")); q.Add(new Term("field", "5")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); reader.Dispose(); store.Dispose(); }
public virtual void TestEmptyTerm() { Analyzer a = new AnalyzerAnonymousInnerClassHelper(this); CheckOneTerm(a, "", ""); }
public virtual void TestRandomStrings() { Analyzer a = new AnalyzerAnonymousInnerClassHelper(this); CheckRandomData(Random, a, 200 * RANDOM_MULTIPLIER, 8192); }
internal virtual void AddDocs(Random random, Directory dir, int ndocs, string field, string val, int maxTF, float percentDocs) { Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(random, val, maxTF, percentDocs); Document doc = new Document(); doc.Add(NewStringField(field, val, Field.Store.NO)); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetOpenMode(OpenMode_e.CREATE).SetMaxBufferedDocs(100).SetMergePolicy(NewLogMergePolicy(100))); for (int i = 0; i < ndocs; i++) { writer.AddDocument(doc); } writer.ForceMerge(1); writer.Dispose(); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testSingleChar2() throws Exception public virtual void testSingleChar2() { Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this); assertAnalyzesTo(analyzer, "一", new string[] { "一" }, new int[] { 0 }, new int[] { 1 }, new string[] { "<SINGLE>" }, new int[] { 1 }); }
public virtual void Test() { Directory dir = NewDirectory(); Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this, Analyzer.PER_FIELD_REUSE_STRATEGY); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); iwc.SetCodec(TestUtil.AlwaysPostingsFormat(new Lucene41PostingsFormat())); // TODO we could actually add more fields implemented with different PFs // or, just put this test into the usual rotation? RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, (IndexWriterConfig)iwc.Clone()); Document doc = new Document(); FieldType docsOnlyType = new FieldType(TextField.TYPE_NOT_STORED); // turn this on for a cross-check docsOnlyType.StoreTermVectors = true; docsOnlyType.IndexOptions = FieldInfo.IndexOptions.DOCS_ONLY; FieldType docsAndFreqsType = new FieldType(TextField.TYPE_NOT_STORED); // turn this on for a cross-check docsAndFreqsType.StoreTermVectors = true; docsAndFreqsType.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS; FieldType positionsType = new FieldType(TextField.TYPE_NOT_STORED); // turn these on for a cross-check positionsType.StoreTermVectors = true; positionsType.StoreTermVectorPositions = true; positionsType.StoreTermVectorOffsets = true; positionsType.StoreTermVectorPayloads = true; FieldType offsetsType = new FieldType(positionsType); offsetsType.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; Field field1 = new Field("field1docs", "", docsOnlyType); Field field2 = new Field("field2freqs", "", docsAndFreqsType); Field field3 = new Field("field3positions", "", positionsType); Field field4 = new Field("field4offsets", "", offsetsType); Field field5 = new Field("field5payloadsFixed", "", positionsType); Field field6 = new Field("field6payloadsVariable", "", positionsType); Field field7 = new Field("field7payloadsFixedOffsets", "", offsetsType); Field field8 = new Field("field8payloadsVariableOffsets", "", offsetsType); doc.Add(field1); doc.Add(field2); doc.Add(field3); doc.Add(field4); doc.Add(field5); doc.Add(field6); doc.Add(field7); doc.Add(field8); for (int i = 0; i < MAXDOC; i++) { string stringValue = Convert.ToString(i) + " verycommon " + English.IntToEnglish(i).Replace('-', ' ') + " " + TestUtil.RandomSimpleString(Random()); field1.StringValue = stringValue; field2.StringValue = stringValue; field3.StringValue = stringValue; field4.StringValue = stringValue; field5.StringValue = stringValue; field6.StringValue = stringValue; field7.StringValue = stringValue; field8.StringValue = stringValue; iw.AddDocument(doc); } iw.Dispose(); Verify(dir); TestUtil.CheckIndex(dir); // for some extra coverage, checkIndex before we forceMerge iwc.SetOpenMode(OpenMode_e.APPEND); IndexWriter iw2 = new IndexWriter(dir, (IndexWriterConfig)iwc.Clone()); iw2.ForceMerge(1); iw2.Dispose(); Verify(dir); dir.Dispose(); }
public LetterTokenizerAnonymousInnerClassHelper(AnalyzerAnonymousInnerClassHelper outerInstance, UnknownType TEST_VERSION_CURRENT, Reader reader) : base(TEST_VERSION_CURRENT, reader) { this.outerInstance = outerInstance; }
/// <summary> /// blast some random strings through the analyzer </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testRandomStrings() throws Exception public virtual void testRandomStrings() { for (int i = 0; i < 10; i++) { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int min = org.apache.lucene.util.TestUtil.nextInt(random(), 2, 10); int min = TestUtil.Next(random(), 2, 10); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int max = org.apache.lucene.util.TestUtil.nextInt(random(), min, 20); int max = TestUtil.Next(random(), min, 20); Analyzer a = new AnalyzerAnonymousInnerClassHelper(this, min, max); checkRandomData(random(), a, 200 * RANDOM_MULTIPLIER, 20); checkRandomData(random(), a, 10 * RANDOM_MULTIPLIER, 1027); } }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testRandomString() throws Exception public virtual void testRandomString() { Analyzer a = new AnalyzerAnonymousInnerClassHelper(this); checkRandomData(random(), a, 1000 * RANDOM_MULTIPLIER); }
// LUCENE-5269 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testUnicodeShinglesAndNgrams() throws Exception public virtual void testUnicodeShinglesAndNgrams() { Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this); checkRandomData(random(), analyzer, 2000); }
// LUCENE-5269 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testUnicodeShinglesAndNgrams() throws Exception public virtual void testUnicodeShinglesAndNgrams() { Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this); checkRandomData(random(), analyzer, 2000); }
public virtual void TestAltFillerToken() { Analyzer @delegate = new AnalyzerAnonymousInnerClassHelper(this); ShingleAnalyzerWrapper analyzer = new ShingleAnalyzerWrapper(@delegate, ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE, ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE, ShingleFilter.DEFAULT_TOKEN_SEPARATOR, true, false, "--"); AssertAnalyzesTo(analyzer, "please divide into shingles", new string[] { "please", "please divide", "divide", "divide --", "-- shingles", "shingles" }, new int[] { 0, 0, 7, 7, 19, 19 }, new int[] { 6, 13, 13, 19, 27, 27 }, new int[] { 1, 0, 1, 0, 1, 1 }); analyzer = new ShingleAnalyzerWrapper(@delegate, ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE, ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE, ShingleFilter.DEFAULT_TOKEN_SEPARATOR, false, false, null); AssertAnalyzesTo(analyzer, "please divide into shingles", new string[] { "please divide", "divide ", " shingles" }, new int[] { 0, 7, 19 }, new int[] { 13, 19, 27 }, new int[] { 1, 1, 1 }); analyzer = new ShingleAnalyzerWrapper(@delegate, ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE, ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE, ShingleFilter.DEFAULT_TOKEN_SEPARATOR, false, false, ""); AssertAnalyzesTo(analyzer, "please divide into shingles", new string[] { "please divide", "divide ", " shingles" }, new int[] { 0, 7, 19 }, new int[] { 13, 19, 27 }, new int[] { 1, 1, 1 }); }
public virtual void TestIndexingThenDeleting() { // TODO: move this test to its own class and just @SuppressCodecs? // TODO: is it enough to just use newFSDirectory? string fieldFormat = TestUtil.GetPostingsFormat("field"); AssumeFalse("this test cannot run with Memory codec", fieldFormat.Equals("Memory")); AssumeFalse("this test cannot run with SimpleText codec", fieldFormat.Equals("SimpleText")); AssumeFalse("this test cannot run with Direct codec", fieldFormat.Equals("Direct")); Random r = Random(); Directory dir = NewDirectory(); // note this test explicitly disables payloads Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this); IndexWriter w = new IndexWriter(dir, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetRAMBufferSizeMB(1.0).SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH).SetMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH)); Document doc = new Document(); doc.Add(NewTextField("field", "go 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20", Field.Store.NO)); int num = AtLeast(3); for (int iter = 0; iter < num; iter++) { int count = 0; bool doIndexing = r.NextBoolean(); if (VERBOSE) { Console.WriteLine("TEST: iter doIndexing=" + doIndexing); } if (doIndexing) { // Add docs until a flush is triggered int startFlushCount = w.FlushCount; while (w.FlushCount == startFlushCount) { w.AddDocument(doc); count++; } } else { // Delete docs until a flush is triggered int startFlushCount = w.FlushCount; while (w.FlushCount == startFlushCount) { w.DeleteDocuments(new Term("foo", "" + count)); count++; } } Assert.IsTrue(count > 2500, "flush happened too quickly during " + (doIndexing ? "indexing" : "deleting") + " count=" + count); } w.Dispose(); dir.Dispose(); }
public virtual void TestInvalidOffsets() { Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this); AssertAnalyzesTo(analyzer, "mosfellsbær", new string[] { "mo", "os", "sf", "fe", "el", "ll", "ls", "sb", "ba", "ae", "er" }, new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, new int[] { 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11 }, new int[] { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }); }
public virtual void TestRandomStrings() { Analyzer a = new AnalyzerAnonymousInnerClassHelper(this); CheckRandomData(Random, a, 1000 * RandomMultiplier); }
public virtual void TestRandomString() { Analyzer a = new AnalyzerAnonymousInnerClassHelper(this); CheckRandomData(Random(), a, 1000 * RANDOM_MULTIPLIER); }
public virtual void TestCommitOnCloseDiskUsage() { // MemoryCodec, since it uses FST, is not necessarily // "additive", ie if you add up N small FSTs, then merge // them, the merged result can easily be larger than the // sum because the merged FST may use array encoding for // some arcs (which uses more space): string idFormat = TestUtil.GetPostingsFormat("id"); string contentFormat = TestUtil.GetPostingsFormat("content"); AssumeFalse("this test cannot run with Memory codec", idFormat.Equals("Memory", StringComparison.Ordinal) || contentFormat.Equals("Memory", StringComparison.Ordinal)); MockDirectoryWrapper dir = NewMockDirectory(); Analyzer analyzer; if (Random.NextBoolean()) { // no payloads analyzer = new AnalyzerAnonymousInnerClassHelper(this); } else { // fixed length payloads int length = Random.Next(200); analyzer = new AnalyzerAnonymousInnerClassHelper2(this, length); } IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMaxBufferedDocs(10).SetReaderPooling(false).SetMergePolicy(NewLogMergePolicy(10))); for (int j = 0; j < 30; j++) { AddDocWithIndex(writer, j); } writer.Dispose(); dir.ResetMaxUsedSizeInBytes(); dir.TrackDiskUsage = true; long startDiskUsage = dir.MaxUsedSizeInBytes; writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetOpenMode(OpenMode.APPEND).SetMaxBufferedDocs(10).SetMergeScheduler(new SerialMergeScheduler()).SetReaderPooling(false).SetMergePolicy(NewLogMergePolicy(10))); for (int j = 0; j < 1470; j++) { AddDocWithIndex(writer, j); } long midDiskUsage = dir.MaxUsedSizeInBytes; dir.ResetMaxUsedSizeInBytes(); writer.ForceMerge(1); writer.Dispose(); DirectoryReader.Open(dir).Dispose(); long endDiskUsage = dir.MaxUsedSizeInBytes; // Ending index is 50X as large as starting index; due // to 3X disk usage normally we allow 150X max // transient usage. If something is wrong w/ deleter // and it doesn't delete intermediate segments then it // will exceed this 150X: // System.out.println("start " + startDiskUsage + "; mid " + midDiskUsage + ";end " + endDiskUsage); Assert.IsTrue(midDiskUsage < 150 * startDiskUsage, "writer used too much space while adding documents: mid=" + midDiskUsage + " start=" + startDiskUsage + " end=" + endDiskUsage + " max=" + (startDiskUsage * 150)); Assert.IsTrue(endDiskUsage < 150 * startDiskUsage, "writer used too much space after close: endDiskUsage=" + endDiskUsage + " startDiskUsage=" + startDiskUsage + " max=" + (startDiskUsage * 150)); dir.Dispose(); }