private Analyzer GetAnalyer(int type) { Analyzer analyzer; string path =System.Configuration.ConfigurationSettings.AppSettings["Data"].ToString(); switch (type) { case 0: analyzer=new StockFooAnalyzer(path); break; case 1: analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT); break; case 2: analyzer = new SimpleAnalyzer(); break; case 3: analyzer = new StopAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT); break; case 4: analyzer = new KeywordAnalyzer(); break; case 5: analyzer = new WhitespaceAnalyzer(); break; default: analyzer = new StockFooAnalyzer(path); break; } return analyzer; }
public void MrsJones() { var dir = new RAMDirectory(); var analyzer = new WhitespaceAnalyzer(); var writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); var document = new Lucene.Net.Documents.Document(); document.Add(new Field("Name", "MRS. SHABA", Field.Store.NO, Field.Index.ANALYZED_NO_NORMS)); writer.AddDocument(document); writer.Close(true); var searcher = new IndexSearcher(dir, true); var termEnum = searcher.GetIndexReader().Terms(); while (termEnum.Next()) { var buffer = termEnum.Term().Text(); Console.WriteLine(buffer); } var queryParser = new QueryParser(Version.LUCENE_29, "", analyzer); queryParser.SetLowercaseExpandedTerms(false); var query = queryParser.Parse("Name:MRS.*"); Console.WriteLine(query); var result = searcher.Search(query, 10); Assert.NotEqual(0,result.totalHits); }
public void TestWithCachingFilter() { Directory dir = new RAMDirectory(); Analyzer analyzer = new WhitespaceAnalyzer(); IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); writer.Close(); Searcher searcher = new IndexSearcher(dir, true); Query query = new TermQuery(new Term("none", "none")); QueryWrapperFilter queryFilter = new QueryWrapperFilter(query); CachingWrapperFilter cachingFilter = new CachingWrapperFilter(queryFilter); searcher.Search(query, cachingFilter, 1); CachingWrapperFilter cachingFilter2 = new CachingWrapperFilter(queryFilter); Filter[] chain = new Filter[2]; chain[0] = cachingFilter; chain[1] = cachingFilter2; ChainedFilter cf = new ChainedFilter(chain); // throws java.lang.ClassCastException: org.apache.lucene.util.OpenBitSet cannot be cast to java.util.BitSet searcher.Search(new MatchAllDocsQuery(), cf, 1); }
public virtual void TestNull() { Analyzer a = new WhitespaceAnalyzer(); AssertAnalyzesTo(a, "foo bar FOO BAR", new System.String[]{"foo", "bar", "FOO", "BAR"}); AssertAnalyzesTo(a, "foo bar . FOO <> BAR", new System.String[]{"foo", "bar", ".", "FOO", "<>", "BAR"}); AssertAnalyzesTo(a, "foo.bar.FOO.BAR", new System.String[]{"foo.bar.FOO.BAR"}); AssertAnalyzesTo(a, "U.S.A.", new System.String[]{"U.S.A."}); AssertAnalyzesTo(a, "C++", new System.String[]{"C++"}); AssertAnalyzesTo(a, "B2B", new System.String[]{"B2B"}); AssertAnalyzesTo(a, "2B", new System.String[]{"2B"}); AssertAnalyzesTo(a, "\"QUOTED\" word", new System.String[]{"\"QUOTED\"", "word"}); }
public virtual void TestNull() { Analyzer a = new WhitespaceAnalyzer(); AssertAnalyzesTo(a, "foo bar FOO BAR", new System.String[] { "foo", "bar", "FOO", "BAR" }); AssertAnalyzesTo(a, "foo bar . FOO <> BAR", new System.String[] { "foo", "bar", ".", "FOO", "<>", "BAR" }); AssertAnalyzesTo(a, "foo.bar.FOO.BAR", new System.String[] { "foo.bar.FOO.BAR" }); AssertAnalyzesTo(a, "U.S.A.", new System.String[] { "U.S.A." }); AssertAnalyzesTo(a, "C++", new System.String[] { "C++" }); AssertAnalyzesTo(a, "B2B", new System.String[] { "B2B" }); AssertAnalyzesTo(a, "2B", new System.String[] { "2B" }); AssertAnalyzesTo(a, "\"QUOTED\" word", new System.String[] { "\"QUOTED\"", "word" }); }
public LuceneIndexer(string indexPath) { Analyzer whitespaceAnalyzer = new Lucene.Net.Analysis.WhitespaceAnalyzer(); Analyzer nGramAnalyzer = new NGramAnalyzer(Lucene.Net.Util.Version.LUCENE_30); analyzerWrapper = new PerFieldAnalyzerWrapper(new Lucene.Net.Analysis.WhitespaceAnalyzer()); analyzerWrapper.AddAnalyzer("Name", whitespaceAnalyzer); analyzerWrapper.AddAnalyzer("Mobile", nGramAnalyzer); luceneIndexDirectory = FSDirectory.Open(indexPath); indexWriter = new IndexWriter(luceneIndexDirectory, analyzerWrapper, IndexWriter.MaxFieldLength.UNLIMITED); }
public void Test_SegmentTermVector_IndexOf() { Lucene.Net.Store.RAMDirectory directory = new Lucene.Net.Store.RAMDirectory(); Lucene.Net.Analysis.Analyzer analyzer = new Lucene.Net.Analysis.WhitespaceAnalyzer(); Lucene.Net.Index.IndexWriter writer = new Lucene.Net.Index.IndexWriter(directory, analyzer, Lucene.Net.Index.IndexWriter.MaxFieldLength.LIMITED); Lucene.Net.Documents.Document document = new Lucene.Net.Documents.Document(); document.Add(new Lucene.Net.Documents.Field("contents", new System.IO.StreamReader(new System.IO.MemoryStream(System.Text.Encoding.ASCII.GetBytes("a_ a0"))), Lucene.Net.Documents.Field.TermVector.WITH_OFFSETS)); writer.AddDocument(document); Lucene.Net.Index.IndexReader reader = writer.GetReader(); Lucene.Net.Index.TermPositionVector tpv = reader.GetTermFreqVector(0, "contents") as Lucene.Net.Index.TermPositionVector; //Console.WriteLine("tpv: " + tpv); int index = tpv.IndexOf("a_"); Assert.AreEqual(index, 1, "See the issue: LUCENENET-183"); }
public void Code() { Analyzer _keywordanalyzer = new KeywordAnalyzer(); Analyzer _simpleanalyzer = new Lucene.Net.Analysis.SimpleAnalyzer(); Analyzer _stopanalyzer = new Lucene.Net.Analysis.StopAnalyzer(Lucene.Net.Util.Version.LUCENE_30); Analyzer _whitespaceanalyzer = new Lucene.Net.Analysis.WhitespaceAnalyzer(); Analyzer _standardanalyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30); var _perfieldanalyzer = new Lucene.Net.Analysis.PerFieldAnalyzerWrapper(_standardanalyzer); _perfieldanalyzer.AddAnalyzer("firstname", _keywordanalyzer); _perfieldanalyzer.AddAnalyzer("lastname", _keywordanalyzer); IndexWriter _writer = new IndexWriter(_directory, _perfieldanalyzer, IndexWriter.MaxFieldLength.UNLIMITED); IndexReader _reader = _writer.GetReader(); IndexSearcher _searcher = new IndexSearcher(_reader); //QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "title", _standardanalyzer); string[] fields = new[] { "text", "title", "author" }; var boosts = new Dictionary <string, float>(); boosts.Add("text", 2.0f); boosts.Add("title", 1.5f); QueryParser parser = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_30, fields, _standardanalyzer, boosts); Query query = parser.Parse("lucene is great"); TopDocs hits = _searcher.Search(query, 1000); IEnumerable <Document> docs = hits.ScoreDocs.Select(hit => _searcher.Doc(hit.Doc)); var books = docs.Select(doc => new Book() { Text = doc.Get("text"), Title = doc.Get("title"), Author = doc.Get("author"), Length = Int32.Parse(doc.Get("length")) }); _writer.Optimize(); _writer.Commit(); _writer.DeleteAll(); }
public virtual void TestAddDocument() { Document testDoc = new Document(); DocHelper.SetupDoc(testDoc); Analyzer analyzer = new WhitespaceAnalyzer(); IndexWriter writer = new IndexWriter(dir, analyzer, true); writer.AddDocument(testDoc); writer.Flush(); SegmentInfo info = writer.NewestSegment(); writer.Close(); //After adding the document, we should be able to read it back in SegmentReader reader = SegmentReader.Get(info); Assert.IsTrue(reader != null); Document doc = reader.Document(0); Assert.IsTrue(doc != null); //System.out.println("Document: " + doc); Fieldable[] fields = doc.GetFields("textField2"); Assert.IsTrue(fields != null && fields.Length == 1); Assert.IsTrue(fields[0].StringValue().Equals(DocHelper.FIELD_2_TEXT)); Assert.IsTrue(fields[0].IsTermVectorStored()); fields = doc.GetFields("textField1"); Assert.IsTrue(fields != null && fields.Length == 1); Assert.IsTrue(fields[0].StringValue().Equals(DocHelper.FIELD_1_TEXT)); Assert.IsFalse(fields[0].IsTermVectorStored()); fields = doc.GetFields("keyField"); Assert.IsTrue(fields != null && fields.Length == 1); Assert.IsTrue(fields[0].StringValue().Equals(DocHelper.KEYWORD_TEXT)); fields = doc.GetFields(DocHelper.NO_NORMS_KEY); Assert.IsTrue(fields != null && fields.Length == 1); Assert.IsTrue(fields[0].StringValue().Equals(DocHelper.NO_NORMS_TEXT)); fields = doc.GetFields(DocHelper.TEXT_FIELD_3_KEY); Assert.IsTrue(fields != null && fields.Length == 1); Assert.IsTrue(fields[0].StringValue().Equals(DocHelper.FIELD_3_TEXT)); // test that the norms are not present in the segment if // omitNorms is true for (int i = 0; i < reader.FieldInfos().Size(); i++) { FieldInfo fi = reader.FieldInfos().FieldInfo(i); if (fi.IsIndexed()) { Assert.IsTrue(fi.omitNorms == !reader.HasNorms(fi.Name_ForNUnitTest)); } } }
/// <summary> /// Returns an Analyzer for the given AnalyzerType /// </summary> /// <param name="oAnalyzerType">Enumeration value</param> /// <returns>Analyzer</returns> public static Analyzer GetAnalyzer(AnalyzerType oAnalyzerType) { Analyzer oAnalyzer = null; switch (oAnalyzerType) { case AnalyzerType.SimpleAnalyzer: oAnalyzer = new SimpleAnalyzer(); break; case AnalyzerType.StopAnalyzer: oAnalyzer = new StopAnalyzer(); break; case AnalyzerType.WhitespaceAnalyzer: oAnalyzer = new WhitespaceAnalyzer(); break; default: case AnalyzerType.StandardAnalyzer: oAnalyzer = new StandardAnalyzer(); break; } return oAnalyzer; }
public virtual void TestPunct() { Analyzer a = new WhitespaceAnalyzer(); AssertQueryEquals("a&b", a, "a&b"); AssertQueryEquals("a&&b", a, "a&&b"); AssertQueryEquals(".NET", a, ".NET"); }
public void TestFuzzyLikeThisQueryEquals() { Analyzer analyzer = new WhitespaceAnalyzer(); FuzzyLikeThisQuery fltq1 = new FuzzyLikeThisQuery(10, analyzer); fltq1.AddTerms("javi", "subject", 0.5f, 2); FuzzyLikeThisQuery fltq2 = new FuzzyLikeThisQuery(10, analyzer); fltq2.AddTerms("javi", "subject", 0.5f, 2); Assert.AreEqual(fltq1, fltq2, "FuzzyLikeThisQuery with same attributes is not equal"); }
public virtual void TestEscaped() { Analyzer a = new WhitespaceAnalyzer(); /*assertQueryEquals("\\[brackets", a, "\\[brackets"); assertQueryEquals("\\[brackets", null, "brackets"); assertQueryEquals("\\\\", a, "\\\\"); assertQueryEquals("\\+blah", a, "\\+blah"); assertQueryEquals("\\(blah", a, "\\(blah"); assertQueryEquals("\\-blah", a, "\\-blah"); assertQueryEquals("\\!blah", a, "\\!blah"); assertQueryEquals("\\{blah", a, "\\{blah"); assertQueryEquals("\\}blah", a, "\\}blah"); assertQueryEquals("\\:blah", a, "\\:blah"); assertQueryEquals("\\^blah", a, "\\^blah"); assertQueryEquals("\\[blah", a, "\\[blah"); assertQueryEquals("\\]blah", a, "\\]blah"); assertQueryEquals("\\\"blah", a, "\\\"blah"); assertQueryEquals("\\(blah", a, "\\(blah"); assertQueryEquals("\\)blah", a, "\\)blah"); assertQueryEquals("\\~blah", a, "\\~blah"); assertQueryEquals("\\*blah", a, "\\*blah"); assertQueryEquals("\\?blah", a, "\\?blah"); //assertQueryEquals("foo \\&\\& bar", a, "foo \\&\\& bar"); //assertQueryEquals("foo \\|| bar", a, "foo \\|| bar"); //assertQueryEquals("foo \\AND bar", a, "foo \\AND bar");*/ AssertQueryEquals("\\a", a, "a"); AssertQueryEquals("a\\-b:c", a, "a-b:c"); AssertQueryEquals("a\\+b:c", a, "a+b:c"); AssertQueryEquals("a\\:b:c", a, "a:b:c"); AssertQueryEquals("a\\\\b:c", a, "a\\b:c"); AssertQueryEquals("a:b\\-c", a, "a:b-c"); AssertQueryEquals("a:b\\+c", a, "a:b+c"); AssertQueryEquals("a:b\\:c", a, "a:b:c"); AssertQueryEquals("a:b\\\\c", a, "a:b\\c"); AssertQueryEquals("a:b\\-c*", a, "a:b-c*"); AssertQueryEquals("a:b\\+c*", a, "a:b+c*"); AssertQueryEquals("a:b\\:c*", a, "a:b:c*"); AssertQueryEquals("a:b\\\\c*", a, "a:b\\c*"); AssertQueryEquals("a:b\\-?c", a, "a:b-?c"); AssertQueryEquals("a:b\\+?c", a, "a:b+?c"); AssertQueryEquals("a:b\\:?c", a, "a:b:?c"); AssertQueryEquals("a:b\\\\?c", a, "a:b\\?c"); AssertQueryEquals("a:b\\-c~", a, "a:b-c~0.5"); AssertQueryEquals("a:b\\+c~", a, "a:b+c~0.5"); AssertQueryEquals("a:b\\:c~", a, "a:b:c~0.5"); AssertQueryEquals("a:b\\\\c~", a, "a:b\\c~0.5"); AssertQueryEquals("[ a\\- TO a\\+ ]", null, "[a- TO a+]"); AssertQueryEquals("[ a\\: TO a\\~ ]", null, "[a: TO a~]"); AssertQueryEquals("[ a\\\\ TO a\\* ]", null, "[a\\ TO a*]"); AssertQueryEquals("[\"c\\:\\\\temp\\\\\\~foo0.txt\" TO \"c\\:\\\\temp\\\\\\~foo9.txt\"]", a, "[c:\\temp\\~foo0.txt TO c:\\temp\\~foo9.txt]"); AssertQueryEquals("a\\\\\\+b", a, "a\\+b"); AssertQueryEquals("a \\\"b c\\\" d", a, "a \"b c\" d"); AssertQueryEquals("\"a \\\"b c\\\" d\"", a, "\"a \"b c\" d\""); AssertQueryEquals("\"a \\+b c d\"", a, "\"a +b c d\""); AssertQueryEquals("c\\:\\\\temp\\\\\\~foo.txt", a, "c:\\temp\\~foo.txt"); AssertParseException("XY\\"); // there must be a character after the escape char // test unicode escaping AssertQueryEquals("a\\u0062c", a, "abc"); AssertQueryEquals("XY\\u005a", a, "XYZ"); AssertQueryEquals("XY\\u005A", a, "XYZ"); AssertQueryEquals("\"a \\\\\\u0028\\u0062\\\" c\"", a, "\"a \\(b\" c\""); AssertParseException("XY\\u005G"); // test non-hex character in escaped unicode sequence AssertParseException("XY\\u005"); // test incomplete escaped unicode sequence // Tests bug LUCENE-800 AssertQueryEquals("(item:\\\\ item:ABCD\\\\)", a, "item:\\ item:ABCD\\"); AssertParseException("(item:\\\\ item:ABCD\\\\))"); // unmatched closing paranthesis AssertQueryEquals("\\*", a, "*"); AssertQueryEquals("\\\\", a, "\\"); // escaped backslash AssertParseException("\\"); // a backslash must always be escaped }
private static List<OneRawContactsListCsvRow> RemoveRowsByDomains(List<OneRawContactsListCsvRow> rows, List<string> removeDomains) { var directory = Guid.NewGuid().ToString(); var simpleFsDirectory = new SimpleFSDirectory(new DirectoryInfo(directory)); if (!Directory.Exists(directory)) { var standardAnalyzer = new WhitespaceAnalyzer(); var indexer = new IndexWriter(simpleFsDirectory, standardAnalyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); var st = new Stopwatch(); st.Start(); var counter = 0; rows.ForEach(x => { var document = new Document(); document.Add(new Field("email", x.Email.Replace("@", " "), Field.Store.YES, Field.Index.ANALYZED)); document.Add(new Field("collectionIndex", counter.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); indexer.AddDocument(document); counter++; }); indexer.Commit(); st.Stop(); WriteToConsole("Index took: " + st.ElapsedMilliseconds / 1000); } var reader = IndexReader.Open(simpleFsDirectory, true); var searcher = new IndexSearcher(reader); var st2 = new Stopwatch(); st2.Start(); removeDomains.ForEach(x => { var ids = Search(searcher, "email", x); if (!ids.Any()) Console.WriteLine("for " + x + " there were no ids found."); ids.ToList().ForEach(p => { rows[p].Mark = true; }); }); var oneRawContactsListCsvRows = rows.AsParallel().Where(x => !x.Mark).ToList(); st2.Stop(); WriteToConsole("Removing took: " + st2.ElapsedMilliseconds / 1000); return oneRawContactsListCsvRows; }
public void TestRepeatingTermsInMultBooleans() { String content = "x y z a b c d e f g b c g"; String ph1 = "\"a b c d\""; String ph2 = "\"b c g\""; String f1 = "f1"; String f2 = "f2"; String f1c = f1 + ":"; String f2c = f2 + ":"; String q = "(" + f1c + ph1 + " OR " + f2c + ph1 + ") AND (" + f1c + ph2 + " OR " + f2c + ph2 + ")"; Analyzer analyzer = new WhitespaceAnalyzer(); QueryParser qp = new QueryParser(TEST_VERSION, f1, analyzer); Query query = qp.Parse(q); QueryScorer scorer = new QueryScorer(query, f1); scorer.IsExpandMultiTermQuery = false; Highlighter h = new Highlighter(this, scorer); h.GetBestFragment(analyzer, f1, content); Assert.IsTrue(numHighlights == 7, "Failed to find correct number of highlights " + numHighlights + " found"); }
public virtual void TestQueryStringEscaping() { Analyzer a = new WhitespaceAnalyzer(); AssertEscapedQueryEquals("a-b:c", a, "a\\-b\\:c"); AssertEscapedQueryEquals("a+b:c", a, "a\\+b\\:c"); AssertEscapedQueryEquals("a:b:c", a, "a\\:b\\:c"); AssertEscapedQueryEquals("a\\b:c", a, "a\\\\b\\:c"); AssertEscapedQueryEquals("a:b-c", a, "a\\:b\\-c"); AssertEscapedQueryEquals("a:b+c", a, "a\\:b\\+c"); AssertEscapedQueryEquals("a:b:c", a, "a\\:b\\:c"); AssertEscapedQueryEquals("a:b\\c", a, "a\\:b\\\\c"); AssertEscapedQueryEquals("a:b-c*", a, "a\\:b\\-c\\*"); AssertEscapedQueryEquals("a:b+c*", a, "a\\:b\\+c\\*"); AssertEscapedQueryEquals("a:b:c*", a, "a\\:b\\:c\\*"); AssertEscapedQueryEquals("a:b\\\\c*", a, "a\\:b\\\\\\\\c\\*"); AssertEscapedQueryEquals("a:b-?c", a, "a\\:b\\-\\?c"); AssertEscapedQueryEquals("a:b+?c", a, "a\\:b\\+\\?c"); AssertEscapedQueryEquals("a:b:?c", a, "a\\:b\\:\\?c"); AssertEscapedQueryEquals("a:b?c", a, "a\\:b\\?c"); AssertEscapedQueryEquals("a:b-c~", a, "a\\:b\\-c\\~"); AssertEscapedQueryEquals("a:b+c~", a, "a\\:b\\+c\\~"); AssertEscapedQueryEquals("a:b:c~", a, "a\\:b\\:c\\~"); AssertEscapedQueryEquals("a:b\\c~", a, "a\\:b\\\\c\\~"); AssertEscapedQueryEquals("[ a - TO a+ ]", null, "\\[ a \\- TO a\\+ \\]"); AssertEscapedQueryEquals("[ a : TO a~ ]", null, "\\[ a \\: TO a\\~ \\]"); AssertEscapedQueryEquals("[ a\\ TO a* ]", null, "\\[ a\\\\ TO a\\* \\]"); // LUCENE-881 AssertEscapedQueryEquals("|| abc ||", a, "\\|\\| abc \\|\\|"); AssertEscapedQueryEquals("&& abc &&", a, "\\&\\& abc \\&\\&"); }