protected override Analyzer GetAnalyzer(Net.Util.Version version)
 {
     var a = new PerFieldAnalyzerWrapper(base.GetAnalyzer(version));
     a.AddAnalyzer("Version", new KeywordAnalyzer());
     a.AddAnalyzer("Flag", new KeywordAnalyzer());
     return a;
 }
 protected override Analyzer GetAnalyzer(Net.Util.Version version)
 {
     var analyzer = new PerFieldAnalyzerWrapper(base.GetAnalyzer(version));
     analyzer.AddAnalyzer("Path", new CaseInsensitiveKeywordAnalyzer());
     analyzer.AddAnalyzer("Key", new KeywordAnalyzer());
     return analyzer;
 }
Exemplo n.º 3
0
        void SetAnalyzerType(Type defaultType, IEnumerable<FieldDetails> fields)
        {
            if (defaultType == null) {
                defaultType = typeof(StandardAnalyzer);
            }

            // create default analyzer
            _defaultAnalyzer = Activator.CreateInstance(defaultType) as Analyzer;
            if (_defaultAnalyzer == null) {
                throw new ArgumentException("defaultType is not an Analyzer type");
            }

            var wrapper = new PerFieldAnalyzerWrapper(_defaultAnalyzer);
            if (fields != null) {
                foreach (var fd in fields) {
                    if (fd.Field.Analyzer!=null) {
                        var fieldAnalyzer = CreateAnalyzerFromType(fd.Field.Analyzer);
                        if (fieldAnalyzer != null) {
                            wrapper.AddAnalyzer(fd.Name, fieldAnalyzer);
                        }
                    }

                }
            }
            Analyzer = wrapper;
        }
Exemplo n.º 4
0
        private void InitSearchServiceAnalyzer(Type indexingServiceSettingsType, Analyzer defaultAnalyzer, Analyzer textAnalyzer)
        {
            var perFieldAnalyzerWrapper = new PerFieldAnalyzerWrapper(defaultAnalyzer);
            perFieldAnalyzerWrapper.AddAnalyzer("EPISERVER_SEARCH_ID", new KeywordAnalyzer());
            perFieldAnalyzerWrapper.AddAnalyzer("EPISERVER_SEARCH_CULTURE", new KeywordAnalyzer());
            perFieldAnalyzerWrapper.AddAnalyzer("EPISERVER_SEARCH_REFERENCEID", new KeywordAnalyzer());
            perFieldAnalyzerWrapper.AddAnalyzer("EPISERVER_SEARCH_AUTHORSTORAGE", new KeywordAnalyzer());
            perFieldAnalyzerWrapper.AddAnalyzer("EPISERVER_SEARCH_CATEGORIES", new WhitespaceAnalyzer());
            perFieldAnalyzerWrapper.AddAnalyzer("EPISERVER_SEARCH_ACL", new WhitespaceAnalyzer());
            perFieldAnalyzerWrapper.AddAnalyzer("EPISERVER_SEARCH_VIRTUALPATH", new WhitespaceAnalyzer());
            perFieldAnalyzerWrapper.AddAnalyzer("EPISERVER_SEARCH_TYPE", new WhitespaceAnalyzer());
            perFieldAnalyzerWrapper.AddAnalyzer("EPISERVER_SEARCH_CREATED", new WhitespaceAnalyzer());
            perFieldAnalyzerWrapper.AddAnalyzer("EPISERVER_SEARCH_MODIFIED", new WhitespaceAnalyzer());
            perFieldAnalyzerWrapper.AddAnalyzer("EPISERVER_SEARCH_PUBLICATIONEND", new WhitespaceAnalyzer());
            perFieldAnalyzerWrapper.AddAnalyzer("EPISERVER_SEARCH_PUBLICATIONSTART", new WhitespaceAnalyzer());
            perFieldAnalyzerWrapper.AddAnalyzer("EPISERVER_SEARCH_ITEMSTATUS", new WhitespaceAnalyzer());

            perFieldAnalyzerWrapper.AddAnalyzer("EPISERVER_SEARCH_TITLE", textAnalyzer);
            perFieldAnalyzerWrapper.AddAnalyzer("EPISERVER_SEARCH_DISPLAYTEXT", textAnalyzer);
            perFieldAnalyzerWrapper.AddAnalyzer("EPISERVER_SEARCH_AUTHORS", textAnalyzer);
            perFieldAnalyzerWrapper.AddAnalyzer("EPISERVER_SEARCH_DEFAULT", textAnalyzer);

            indexingServiceSettingsType
                .GetField("_analyzer", BindingFlags.Static | BindingFlags.NonPublic)
                .SetValue(null, perFieldAnalyzerWrapper);
        }
Exemplo n.º 5
0
		/// <summary>
		/// Detects untokenized fields and sets as NotAnalyzed in analyzer
		/// </summary>
		private static string PreProcessUntokenizedTerms(PerFieldAnalyzerWrapper analyzer, string query, Analyzer keywordAnlyzer)
		{
			var untokenizedMatches = untokenizedQuery.Matches(query);
			if (untokenizedMatches.Count < 1)
			{
				return query;
			}

			var sb = new StringBuilder(query);

			// KeywordAnalyzer will not tokenize the values

			// process in reverse order to leverage match string indexes
			for (int i=untokenizedMatches.Count; i>0; i--)
			{
				Match match = untokenizedMatches[i-1];

				// specify that term for this field should not be tokenized
				analyzer.AddAnalyzer(match.Groups[1].Value, keywordAnlyzer);

				Group term = match.Groups[2];

				// remove enclosing "[[" "]]" from term value (again in reverse order)
				sb.Remove(term.Index+term.Length-2, 2);
				sb.Remove(term.Index, 2);
			}

			return sb.ToString();
		}
Exemplo n.º 6
0
 protected override Analyzer GetAnalyzer(Net.Util.Version version)
 {
     analyzer = new PerFieldAnalyzerWrapper(base.GetAnalyzer(version));
     analyzer.AddAnalyzer<SampleDocument>(t => t.Id, new KeywordAnalyzer());
     analyzer.AddAnalyzer<SampleDocument>(t => t.Key, new CaseInsensitiveKeywordAnalyzer());
     return analyzer;
 }
Exemplo n.º 7
0
        private void btnSearch_Click(object sender, EventArgs e)
        {
            lstResults.Items.Clear();
            searcher = new IndexSearcher(new RAMDirectory(_indexTarget));
            PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new StandardAnalyzer());
            analyzer.AddAnalyzer("ayat_arabic", new DiacriticAnalyzer(FilterData.stopWords));
            //MyQueryParser parser = new MyQueryParser(new string[] { "ayat_desc", "ayat_urdu", "ayat_arabic" }, analyzer);
            //parser.SetDefaultOperator(QueryParser.Operator.AND);
            //Query q = parser.Parse(txtSearch.Text);
            //Query q = new TermQuery(new Term("ayatno", NumberTools.LongToString(long.Parse(txtSearch.Text))));
            BooleanQuery q = new BooleanQuery();
            long l1 = 1; long l2 = 500; long l3 = 1; long l4 = 1;
            //RangeQuery rq = new RangeQuery(new Term("ayatno", l1.ToString("00000")), new Term("ayatno", l2.ToString("00000")), true);
            //q.Add(rq, true, false);
            q.Add(new TermQuery(new Term("sid", l3.ToString("00000"))), true, false);
            q.Add(new TermQuery(new Term("ayatno", l4.ToString("00000"))), true, false);
            MessageBox.Show(q.ToString());
            Sort sort = new Sort(new string[] { "pid", "sid", "ayatno" });
            hits = searcher.Search(q, sort);
            lblHits.Text = hits.Length() + " hit(s).";
            Application.DoEvents();

            for (int i = 0; i < hits.Length(); i++)
            {
                StringBuilder sb = new StringBuilder();
                sb.Append("Para: ").Append(hits.Doc(i).Get("pid"));
                sb.Append(", Surat: ").Append(hits.Doc(i).Get("sid"));
                sb.Append(", Verse: ").Append(hits.Doc(i).Get("ayatno"));
                lstResults.Items.Add(sb.ToString());

            }
        }
            public InstancePerFieldAnalyzerWrapper()
            {
                var analyzer = new Lucene.Net.Analysis.PerFieldAnalyzerWrapper(new GISAServer.Search.Synonyms.SynonymAnalyzer(new GISAServer.Search.Synonyms.XmlSynonymEngine()));

                analyzer.AddAnalyzer("cota", new Lucene.Net.Analysis.KeywordAnalyzer());
                instancePerFieldAnalyzerWrapper = analyzer;
            }
Exemplo n.º 9
0
		public static Query BuildQuery(string query, IndexQuery indexQuery, PerFieldAnalyzerWrapper analyzer)
		{
			var originalQuery = query;
			Analyzer keywordAnalyzer = new KeywordAnalyzer();
			try
			{
				var queryParser = new RangeQueryParser(Version.LUCENE_29, indexQuery.DefaultField ?? string.Empty, analyzer)
				{
					DefaultOperator = indexQuery.DefaultOperator == QueryOperator.Or
										? QueryParser.Operator.OR
										: QueryParser.Operator.AND,
					AllowLeadingWildcard = true
				};
				query = PreProcessUntokenizedTerms(query, queryParser);
				query = PreProcessSearchTerms(query);
				query = PreProcessDateTerms(query, queryParser);
				var generatedQuery = queryParser.Parse(query);
				generatedQuery = HandleMethods(generatedQuery);
				return generatedQuery;
			}
			catch (ParseException pe)
			{
				if (originalQuery == query)
					throw new ParseException("Could not parse: '" + query + "'", pe);
				throw new ParseException("Could not parse modified query: '" + query + "' original was: '" + originalQuery + "'", pe);

			}
			finally
			{
				keywordAnalyzer.Close();
			}
		}
Exemplo n.º 10
0
 public InstancePerFieldAnalyzerWrapper()
 {
     var analyzer = new Lucene.Net.Analysis.PerFieldAnalyzerWrapper(new Synonyms.SynonymAnalyzer(new Synonyms.XmlSynonymEngine()));
     analyzer.AddAnalyzer("cota", new Lucene.Net.Analysis.KeywordAnalyzer());
     analyzer.AddAnalyzer("codigo", new Lucene.Net.Analysis.KeywordAnalyzer());
     instancePerFieldAnalyzerWrapper = analyzer;
 }
        public void StartEmailIndexing()
        {
            if (!Directory.Exists(GlobalData.EmailIndexPath))
                Directory.CreateDirectory(GlobalData.EmailIndexPath);

            IndexWriter index;

            PerFieldAnalyzerWrapper pfaw = new PerFieldAnalyzerWrapper(new KeywordAnalyzer());
            pfaw.AddAnalyzer("body", new StopAnalyzer());
            try
            {
                index = new IndexWriter(GlobalData.EmailIndexPath, pfaw, false);
            }
            catch
            {
                index = new IndexWriter(GlobalData.EmailIndexPath, pfaw, true);
            }

            const string PopServer = "pop.google.in";
            const int PopPort = 995;
            const string User = "******";
            const string Pass = "******";
            using (Pop3Client client = new Pop3Client(PopServer, PopPort, true, User, Pass))
            {
                client.Trace += new Action<string>(Console.WriteLine);
                //connects to Pop3 Server, Executes POP3 USER and PASS
                client.Authenticate();
                client.Stat();
                foreach (Pop3ListItem item in client.List())
                {
                    Document doc = new Document();
                    MailMessageEx message = client.RetrMailMessageEx(item);

                    doc.Add(new Field("subject", message.Subject.ToLower(), Field.Store.YES, Field.Index.NO_NORMS));
                    doc.Add(new Field("from", message.From.ToString().ToLower(), Field.Store.YES, Field.Index.NO_NORMS));
                    doc.Add(new Field("to", message.To.ToString().ToLower(), Field.Store.YES, Field.Index.NO_NORMS));
                    //doc.Add(new Field("date", message.DeliveryDate.ToLower(), Field.Store.YES, Field.Index.NO_NORMS));

                    string code = message.Body;
                    code = Regex.Replace(code, @"<\s*head\s*>(.|\n|\r)*?<\s*/\s*head\s*>", " ", RegexOptions.Compiled); //repalce <head> section with single whitespace
                    code = Regex.Replace(code, @"<\s*script (.|\n|\r)*?<\s*/\s*script\s*>", " ", RegexOptions.Compiled);//repalce remaining <script> tags from body with single whitespace
                    code = Regex.Replace(code, @"<!--(.|\n|\r)*?-->", " ", RegexOptions.Compiled);                      //repalce comments
                    code = Regex.Replace(code, @"<(.|\n|\r)*?>", " ", RegexOptions.Compiled);                           //repalce all tags with single whitespace
                    code = Regex.Replace(code, @"&.*?;", " ", RegexOptions.Compiled);                                   //replace &gt; e.t.c
                    code = Regex.Replace(code, @"\s+", " ", RegexOptions.Compiled);                                     //replace multiple whitespaces characters by single whitespace
                    code = Regex.Replace(code, @"\ufffd", " ", RegexOptions.Compiled);

                    doc.Add(new Field("body", code.ToLower(), Field.Store.YES, Field.Index.NO_NORMS));

                    index.AddDocument(doc);
                }
                client.Noop();
                client.Rset();
                client.Quit();
                index.Optimize();
                index.Close();
            }
        }
Exemplo n.º 12
0
 public void TestPerFieldAnalyzer()
 {
     var analyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer());
     analyzer.AddAnalyzer("partnum", new KeywordAnalyzer());
     var query =
         new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "description", analyzer)
             .Parse("partnum:Q36 AND SPACE");
     Assert.AreEqual("+partnum:Q36 +space", query.ToString("description"), "Q36 kept as-is");
     Assert.AreEqual(1, searcher.Search(query, searcher.MaxDoc()).ScoreDocs.Length, "docs found!!!");
 }
Exemplo n.º 13
0
        }//contructor which is used to initialize the objects

        //create index
        public void CreateIndex(string indexPath)
        {
            luceneIndexDirectory = Lucene.Net.Store.FSDirectory.Open(indexPath);
            analyzerstandard     = new Lucene.Net.Analysis.Standard.StandardAnalyzer(VERSION);
            analyzerkeyword      = new Lucene.Net.Analysis.KeywordAnalyzer();
            IndexWriter.MaxFieldLength mfl = new IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH);
            analysor = new PerFieldAnalyzerWrapper(analyzerstandard);
            writer   = new Lucene.Net.Index.IndexWriter(luceneIndexDirectory, analysor, true, mfl);
            writer.SetSimilarity(customSimilarity);//for task 6
        }
		public virtual void  TestPerFieldAnalyzer()
		{
			PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer());
			analyzer.AddAnalyzer("partnum", new KeywordAnalyzer());

			Lucene.Net.QueryParsers.QueryParser queryParser = new Lucene.Net.QueryParsers.QueryParser("description", analyzer);
			Query query = queryParser.Parse("partnum:Q36 AND SPACE");
			
			Hits hits = searcher.Search(query);
			Assert.AreEqual("+partnum:Q36 +space", query.ToString("description"), "Q36 kept as-is");
			Assert.AreEqual(1, hits.Length(), "doc found!");
		}
Exemplo n.º 15
0
		public virtual void  TestPerFieldAnalyzer()
		{
			PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer());
			analyzer.AddAnalyzer("partnum", new KeywordAnalyzer());
			
			QueryParser queryParser = new QueryParser("description", analyzer);
			Query query = queryParser.Parse("partnum:Q36 AND SPACE");
			
			ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs;
			Assert.AreEqual("+partnum:Q36 +space", query.ToString("description"), "Q36 kept as-is");
			Assert.AreEqual(1, hits.Length, "doc found!");
		}
Exemplo n.º 16
0
        public virtual void  TestPerFieldAnalyzer()
        {
            PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer());

            analyzer.AddAnalyzer("partnum", new KeywordAnalyzer());

            QueryParser queryParser = new QueryParser(Version.LUCENE_CURRENT, "description", analyzer);
            Query       query       = queryParser.Parse("partnum:Q36 AND SPACE");

            ScoreDoc[] hits = searcher.Search(query, null, 1000, null).ScoreDocs;
            Assert.AreEqual("+partnum:Q36 +space", query.ToString("description"), "Q36 kept as-is");
            Assert.AreEqual(1, hits.Length, "doc found!");
        }
		public virtual void  TestPerField()
		{
			System.String text = "Qwerty";
			PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer());
			analyzer.AddAnalyzer("special", new SimpleAnalyzer());
			
			TokenStream tokenStream = analyzer.TokenStream("field", new System.IO.StringReader(text));
			Token token = tokenStream.Next();
			Assert.AreEqual("Qwerty", token.TermText(), "WhitespaceAnalyzer does not lowercase");
			
			tokenStream = analyzer.TokenStream("special", new System.IO.StringReader(text));
			token = tokenStream.Next();
			Assert.AreEqual("qwerty", token.TermText(), "SimpleAnalyzer lowercases");
		}
 public EDSIndexer(string desIndexPath, Analyzer analyser, bool overwriteIndexDir)
 {
     keywordAnalyzer = analyser;
     pfaw = new PerFieldAnalyzerWrapper(analyser);
     pfaw.AddAnalyzer("content", stopAnalyzer);          //generally for content v use stop analyser
     try
     {
         index = new IndexWriter(desIndexPath, pfaw, overwriteIndexDir);
     }
     catch
     {
         index = new IndexWriter(desIndexPath, pfaw, true);
     }
 }
Exemplo n.º 19
0
        public void Code()
        {
            Analyzer _keywordanalyzer    = new KeywordAnalyzer();
            Analyzer _simpleanalyzer     = new Lucene.Net.Analysis.SimpleAnalyzer();
            Analyzer _stopanalyzer       = new Lucene.Net.Analysis.StopAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
            Analyzer _whitespaceanalyzer = new Lucene.Net.Analysis.WhitespaceAnalyzer();
            Analyzer _standardanalyzer   = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);


            var _perfieldanalyzer = new Lucene.Net.Analysis.PerFieldAnalyzerWrapper(_standardanalyzer);

            _perfieldanalyzer.AddAnalyzer("firstname", _keywordanalyzer);
            _perfieldanalyzer.AddAnalyzer("lastname", _keywordanalyzer);


            IndexWriter _writer = new IndexWriter(_directory, _perfieldanalyzer, IndexWriter.MaxFieldLength.UNLIMITED);

            IndexReader _reader = _writer.GetReader();

            IndexSearcher _searcher = new IndexSearcher(_reader);


            //QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "title", _standardanalyzer);

            string[] fields = new[] { "text", "title", "author" };
            var      boosts = new Dictionary <string, float>();

            boosts.Add("text", 2.0f);
            boosts.Add("title", 1.5f);
            QueryParser parser = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_30, fields, _standardanalyzer, boosts);
            Query       query  = parser.Parse("lucene is great");


            TopDocs hits = _searcher.Search(query, 1000);

            IEnumerable <Document> docs = hits.ScoreDocs.Select(hit => _searcher.Doc(hit.Doc));

            var books = docs.Select(doc => new Book()
            {
                Text   = doc.Get("text"),
                Title  = doc.Get("title"),
                Author = doc.Get("author"),
                Length = Int32.Parse(doc.Get("length"))
            });


            _writer.Optimize();
            _writer.Commit();
            _writer.DeleteAll();
        }
Exemplo n.º 20
0
		public static Query BuildQuery(string query, PerFieldAnalyzerWrapper analyzer)
		{
			var keywordAnalyzer = new KeywordAnalyzer();
			try
		    {
		    	query = PreProcessUntokenizedTerms(analyzer, query, keywordAnalyzer);
		    	var queryParser = new RangeQueryParser(Version.LUCENE_29, "", analyzer);
				queryParser.SetAllowLeadingWildcard(true);
		    	return queryParser.Parse(query);;
			}
		    finally
		    {
				keywordAnalyzer.Close();
		    }
		}
Exemplo n.º 21
0
		public static Query BuildQuery(string query, PerFieldAnalyzerWrapper analyzer)
		{
			Analyzer keywordAnalyzer = null;
			try
			{
				query = PreProcessUntokenizedTerms(analyzer, query, ref keywordAnalyzer);
				var queryParser = new RangeQueryParser(Version.LUCENE_29, string.Empty, analyzer);
				queryParser.SetAllowLeadingWildcard(true); // not the recommended approach, should rather use ReverseFilter
				return queryParser.Parse(query);
			}
			finally
			{
				if (keywordAnalyzer != null)
					keywordAnalyzer.Close();
			}
		}
Exemplo n.º 22
0
        public virtual void  TestPerField()
        {
            System.String           text     = "Qwerty";
            PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer());

            analyzer.AddAnalyzer("special", new SimpleAnalyzer());

            TokenStream tokenStream = analyzer.TokenStream("Field", new System.IO.StringReader(text));
            Token       token       = tokenStream.Next();

            Assert.AreEqual("Qwerty", token.TermText(), "WhitespaceAnalyzer does not lowercase");

            tokenStream = analyzer.TokenStream("special", new System.IO.StringReader(text));
            token       = tokenStream.Next();
            Assert.AreEqual("qwerty", token.TermText(), "SimpleAnalyzer lowercases");
        }
Exemplo n.º 23
0
 public LuceneApplication()
 {
     luceneIndexDirectory = null;
     analyzerstandard     = null;
     analyzerkeyword      = null;
     writer           = null;
     analysor         = null;
     searcher         = null;
     parser           = null;
     customSimilarity = new CustomSimilarity();//for task 6
     tokenCount       = new Dictionary <string, int>();
     numofdoc         = 0;
     numofrelevant    = 0;
     option           = new List <string>();
     infneed          = new Dictionary <string, string>();
 }//contructor which is used to initialize the objects
		public virtual void  TestPerField()
		{
			System.String text = "Qwerty";
			PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer());
			analyzer.AddAnalyzer("special", new SimpleAnalyzer());
			
			TokenStream tokenStream = analyzer.TokenStream("field", new System.IO.StringReader(text));
            ITermAttribute termAtt = tokenStream.GetAttribute<ITermAttribute>();
			
			Assert.IsTrue(tokenStream.IncrementToken());
			Assert.AreEqual("Qwerty", termAtt.Term, "WhitespaceAnalyzer does not lowercase");
			
			tokenStream = analyzer.TokenStream("special", new System.IO.StringReader(text));
            termAtt = tokenStream.GetAttribute<ITermAttribute>();
			Assert.IsTrue(tokenStream.IncrementToken());
			Assert.AreEqual("qwerty", termAtt.Term, "SimpleAnalyzer lowercases");
		}
Exemplo n.º 25
0
		public static Query BuildQuery(string query, string defaultField, PerFieldAnalyzerWrapper analyzer)
		{
			Analyzer keywordAnalyzer = new KeywordAnalyzer();
			try
			{
				var queryParser = new RangeQueryParser(Version.LUCENE_29, defaultField ?? string.Empty, analyzer);
				query = PreProcessUntokenizedTerms(query, queryParser);
				query = PreProcessSearchTerms(query);
				query = PreProcessDateTerms(query, queryParser);
				queryParser.SetAllowLeadingWildcard(true); // not the recommended approach, should rather use ReverseFilter
				return queryParser.Parse(query);
			}
			finally
			{
				keywordAnalyzer.Close();
			}
		}
Exemplo n.º 26
0
		public void CompareHtmlTokenization()
		{
			const string str = @"test1 <a href=""foo"">testlink</a> test2 test3";

			PerFieldAnalyzerWrapper pfaw = new PerFieldAnalyzerWrapper(new HtmlStandardAnalyzer());
			pfaw.AddAnalyzer("Morph", new HtmlMorphAnalyzer(HspellDict));
			Directory indexDirectory = new RAMDirectory();
			IndexWriter writer = new IndexWriter(indexDirectory, pfaw, true, IndexWriter.MaxFieldLength.UNLIMITED);

			Document doc = new Document();
			doc.Add(new Field("Simple", str, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
			doc.Add(new Field("Morph", str, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
			writer.AddDocument(doc);
			writer.Close();

			CompareTermData(indexDirectory, str);
		}
Exemplo n.º 27
0
        public SearcherContext(Directory dir, Analyzer defaultAnalyzer,
                        TimeSpan targetMinStale, TimeSpan targetMaxStale,
                        TimeSpan commitInterval, TimeSpan optimizeInterval)
        {
            Analyzer = new PerFieldAnalyzerWrapper(defaultAnalyzer);
            _writer = new IndexWriter(dir, Analyzer, IndexWriter.MaxFieldLength.UNLIMITED);

            Manager = new NrtManager(_writer);
            _reopener = new NrtManagerReopener(Manager, targetMaxStale, targetMinStale);
            _committer = new Committer(_writer, commitInterval, optimizeInterval);

            _threads.AddRange(new[] { new Thread(_reopener.Start), new Thread(_committer.Start) });

            foreach (var t in _threads)
            {
                t.Start();
            }
        }
        public virtual void  TestPerField()
        {
            System.String           text     = "Qwerty";
            PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer());

            analyzer.AddAnalyzer("special", new SimpleAnalyzer());

            TokenStream    tokenStream = analyzer.TokenStream("field", new System.IO.StringReader(text));
            ITermAttribute termAtt     = tokenStream.GetAttribute <ITermAttribute>();

            Assert.IsTrue(tokenStream.IncrementToken());
            Assert.AreEqual("Qwerty", termAtt.Term, "WhitespaceAnalyzer does not lowercase");

            tokenStream = analyzer.TokenStream("special", new System.IO.StringReader(text));
            termAtt     = tokenStream.GetAttribute <ITermAttribute>();
            Assert.IsTrue(tokenStream.IncrementToken());
            Assert.AreEqual("qwerty", termAtt.Term, "SimpleAnalyzer lowercases");
        }
        protected override Task<bool> OnProcessBatch(CollectorHttpClient client, IEnumerable<JToken> items, JToken context, DateTime commitTimeStamp, CancellationToken cancellationToken)
        {
            PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30));
            analyzer.AddAnalyzer("Id", new IdentifierKeywordAnalyzer());

            int i = 0;

            using (IndexWriter writer = new IndexWriter(_directory, analyzer, false, IndexWriter.MaxFieldLength.UNLIMITED))
            {
                foreach (JObject item in items)
                {
                    i++;

                    string id = item["nuget:id"].ToString();
                    string version = item["nuget:version"].ToString();

                    BooleanQuery query = new BooleanQuery();
                    query.Add(new BooleanClause(new TermQuery(new Term("Id", id.ToLowerInvariant())), Occur.MUST));
                    query.Add(new BooleanClause(new TermQuery(new Term("Version", version)), Occur.MUST));

                    writer.DeleteDocuments(query);

                    Document doc = new Document();

                    doc.Add(new Field("Id", item["nuget:id"].ToString(), Field.Store.YES, Field.Index.ANALYZED));
                    doc.Add(new Field("Version", item["nuget:version"].ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));

                    writer.AddDocument(doc);
                }

                string trace = Guid.NewGuid().ToString();

                writer.Commit(new Dictionary<string, string> 
                { 
                    { "commitTimeStamp", commitTimeStamp.ToString("O") },
                    { "trace", trace }
                });

                Trace.TraceInformation("COMMIT {0} documents, index contains {1} documents, commitTimeStamp {2}, trace: {3}",
                    i, writer.NumDocs(), commitTimeStamp.ToString("O"), trace);
            }

            return Task.FromResult(true);
        }
        public void Search(string keyword)
        {
            IndexReader reader = null;
            IndexSearcher searcher = null;
            try
            {
                reader = IndexReader.Open(FSDirectory.Open(new DirectoryInfo(indexDirectory)), true);
                searcher = new IndexSearcher(reader);
                //创建查询
                PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(analyzer);
                wrapper.AddAnalyzer("FileName", analyzer);
                wrapper.AddAnalyzer("Author", analyzer);
                wrapper.AddAnalyzer("Content", analyzer);
                string[] fields = { "FileName", "Author", "Content" };

                QueryParser parser = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_30, fields, wrapper);
                Query query = parser.Parse(keyword);
                TopScoreDocCollector collector = TopScoreDocCollector.Create(NumberHits, true);

                searcher.Search(query, collector);
                var hits = collector.TopDocs().ScoreDocs;

                int numTotalHits = collector.TotalHits;

                //以后就可以对获取到的collector数据进行操作
                for (int i = 0; i < hits.Count(); i++)
                {
                    var hit = hits[i];
                    Document doc = searcher.Doc(hit.Doc);
                    Field fileNameField = doc.GetField("FileName");
                    Field authorField = doc.GetField("Author");
                    Field pathField = doc.GetField("Path");

                }
            }
            finally
            {
                if (searcher != null)
                    searcher.Dispose();

                if (reader != null)
                    reader.Dispose();
            }
        }
Exemplo n.º 31
0
        public static Lucene.Net.Analysis.Analyzer GetAnalyzer()
        {
            //return new StandardAnalyzer(new string[] {"的", "之" });

            if (analyzerWrapper == null)
            {
                analyzerWrapper = new Lucene.Net.Analysis.PerFieldAnalyzerWrapper(new StandardAnalyzer(MyLucene.GetLuceneVersion()));
                analyzerWrapper.AddAnalyzer("name", new MyAnalyzer(stopWords));
                analyzerWrapper.AddAnalyzer("japName", new MyAnalyzer(stopWords));
                analyzerWrapper.AddAnalyzer("oldName", new MyAnalyzer(stopWords));
                analyzerWrapper.AddAnalyzer("shortName", new MyAnalyzer(stopWords));
                analyzerWrapper.AddAnalyzer("effect", new MyAnalyzer(stopWords2));
                analyzerWrapper.AddAnalyzer("adjust", new MyAnalyzer(stopWords2));
                analyzerWrapper.AddAnalyzer("tribe", new Lucene.Net.Analysis.KeywordAnalyzer());
                analyzerWrapper.AddAnalyzer("cheatcode", new KeywordAnalyzer());
                analyzerWrapper.AddAnalyzer("aliasList", new PunctuationAnalyzer());
                analyzerWrapper.AddAnalyzer("cardCamp", new Lucene.Net.Analysis.KeywordAnalyzer());

                analyzerWrapper.AddAnalyzer("enName", new LetterDigitAnalyzer());
                analyzerWrapper.AddAnalyzer("pyname", new SimpleAnalyzer());
                analyzerWrapper.AddAnalyzer("pyshortName", new SimpleAnalyzer());
                analyzerWrapper.AddAnalyzer("pyoldName", new SimpleAnalyzer());
                analyzerWrapper.AddAnalyzer("effectType", new SimpleAnalyzer());
                analyzerWrapper.AddAnalyzer("package", new PunctuationAnalyzer());

                //因为高级搜索的关系,中文的字段名也需要分词

                analyzerWrapper.AddAnalyzer("中文名", new MyAnalyzer(stopWords));
                analyzerWrapper.AddAnalyzer("日文名", new MyAnalyzer(stopWords));
                analyzerWrapper.AddAnalyzer("旧卡名", new MyAnalyzer(stopWords));
                analyzerWrapper.AddAnalyzer("曾用名", new MyAnalyzer(stopWords));
                analyzerWrapper.AddAnalyzer("简称", new MyAnalyzer(stopWords));
                analyzerWrapper.AddAnalyzer("俗称", new MyAnalyzer(stopWords));
                analyzerWrapper.AddAnalyzer("缩写", new MyAnalyzer(stopWords));
                analyzerWrapper.AddAnalyzer("效果", new MyAnalyzer(stopWords2));
                analyzerWrapper.AddAnalyzer("效果说明", new MyAnalyzer(stopWords2));
                analyzerWrapper.AddAnalyzer("调整", new MyAnalyzer(stopWords2));
                analyzerWrapper.AddAnalyzer("种族", new Lucene.Net.Analysis.KeywordAnalyzer());
                analyzerWrapper.AddAnalyzer("卡包", new PunctuationAnalyzer());
            }

            return(analyzerWrapper);
        }
Exemplo n.º 32
0
		private static Query HandleMethods(Query query, PerFieldAnalyzerWrapper analyzer)
		{
			var termQuery = query as TermQuery;
			if (termQuery != null && termQuery.Term.Field.StartsWith("@"))
			{
				return HandleMethodsForQueryAndTerm(query, termQuery.Term, analyzer);
			}
			var wildcardQuery = query as WildcardQuery;
			if (wildcardQuery != null)
			{
				return HandleMethodsForQueryAndTerm(query, wildcardQuery.Term, analyzer);
			}
			var booleanQuery = query as BooleanQuery;
			if (booleanQuery != null)
			{
				foreach (var c in booleanQuery.Clauses)
				{
					c.Query = HandleMethods(c.Query, analyzer);
				}
				if (booleanQuery.Clauses.Count == 0)
					return booleanQuery;
			
				var mergeGroups = booleanQuery.Clauses.Select(x=>x.Query).OfType<IRavenLuceneMethodQuery>().GroupBy(x => x.Field).ToArray();
				if (mergeGroups.Length == 0)
					return booleanQuery;

				foreach (var mergeGroup in mergeGroups)
				{
					var clauses = mergeGroup.ToArray();
					var first = clauses[0];
					foreach (var mergedClause in clauses.Skip(1))
					{
						booleanQuery.Clauses.RemoveAll(x => ReferenceEquals(x.Query, mergedClause));
					}
					var ravenLuceneMethodQuery = clauses.Skip(1).Aggregate(first, (methodQuery, clause) => methodQuery.Merge(clause));
					booleanQuery.Clauses.First(x => ReferenceEquals(x.Query, first)).Query = (Query)ravenLuceneMethodQuery;
				}
				if (booleanQuery.Clauses.Count == 1)
					return booleanQuery.Clauses[0].Query;
				return booleanQuery;
			}
			return query;
		}
Exemplo n.º 33
0
		/// <summary>
		/// Detects untokenized fields and sets as NotAnalyzed in analyzer
		/// </summary>
		private static string PreProcessUntokenizedTerms(PerFieldAnalyzerWrapper analyzer, string query, ref Analyzer keywordAnalyzer)
		{
			var untokenizedMatches = untokenizedQuery.Matches(query);
			if (untokenizedMatches.Count < 1)
				return query;

			var sb = new StringBuilder(query);

			// Initialize a KeywordAnalyzer
			// KeywordAnalyzer will not tokenize the values
			keywordAnalyzer = new KeywordAnalyzer();

			// process in reverse order to leverage match string indexes
			for (var i = untokenizedMatches.Count; i > 0; i--)
			{
				var match = untokenizedMatches[i - 1];

				// specify that term for this field should not be tokenized
				analyzer.AddAnalyzer(match.Groups[1].Value, keywordAnalyzer);

				var term = match.Groups[2];

				// introduce " " around the term
				var startIndex = term.Index;
				var length = term.Length - 2;
				if (sb[startIndex + length - 1] != '"')
				{
					sb.Insert(startIndex + length, '"');
					length += 1;
				}
				if (sb[startIndex + 2] != '"')
				{
					sb.Insert(startIndex + 2, '"');
					length += 1;
				}
				// remove enclosing "[[" "]]" from term value (again in reverse order)
				sb.Remove(startIndex + length, 2);
				sb.Remove(startIndex, 2);
			}

			return sb.ToString();
		}
Exemplo n.º 34
0
        public static Lucene.Net.Analysis.Analyzer GetAnalyzer()
        {
            //return new StandardAnalyzer(new string[] {"的", "之" });

            if (analyzerWrapper == null)
            {
                analyzerWrapper = new Lucene.Net.Analysis.PerFieldAnalyzerWrapper(new StandardAnalyzer());
                analyzerWrapper.AddAnalyzer("name", new MyAnalyzer(stopWords));
                analyzerWrapper.AddAnalyzer("japName", new MyAnalyzer(stopWords));
                analyzerWrapper.AddAnalyzer("oldName", new MyAnalyzer(stopWords));
                analyzerWrapper.AddAnalyzer("shortName", new MyAnalyzer(stopWords));
                analyzerWrapper.AddAnalyzer("effect", new MyAnalyzer(stopWords2));
                analyzerWrapper.AddAnalyzer("adjust", new MyAnalyzer(stopWords2));
                analyzerWrapper.AddAnalyzer("tribe", new Lucene.Net.Analysis.KeywordAnalyzer());
                analyzerWrapper.AddAnalyzer("cheatcode", new Lucene.Net.Analysis.KeywordAnalyzer());
                analyzerWrapper.AddAnalyzer("cardCamp", new Lucene.Net.Analysis.KeywordAnalyzer());

                analyzerWrapper.AddAnalyzer("enName", new LetterDigitAnalyzer());
                analyzerWrapper.AddAnalyzer("pyname", new SimpleAnalyzer());
                analyzerWrapper.AddAnalyzer("pyshortName", new SimpleAnalyzer());
                analyzerWrapper.AddAnalyzer("pyoldName", new SimpleAnalyzer());
                analyzerWrapper.AddAnalyzer("effectType", new SimpleAnalyzer());

                //中文的字段名在搜索前已经全部转为了英文字段名,所以无分词的必要

                /*
                 * analyzerWrapper.AddAnalyzer("中文名", new MyAnalyzer(stopWords));
                 * analyzerWrapper.AddAnalyzer("日文名", new MyAnalyzer(stopWords));
                 * analyzerWrapper.AddAnalyzer("旧卡名", new MyAnalyzer(stopWords));
                 * analyzerWrapper.AddAnalyzer("曾用名", new MyAnalyzer(stopWords));
                 * analyzerWrapper.AddAnalyzer("简称", new MyAnalyzer(stopWords));
                 * analyzerWrapper.AddAnalyzer("俗称", new MyAnalyzer(stopWords));
                 * analyzerWrapper.AddAnalyzer("缩写", new MyAnalyzer(stopWords));
                 * analyzerWrapper.AddAnalyzer("效果", new MyAnalyzer(stopWords2));
                 * analyzerWrapper.AddAnalyzer("效果说明", new MyAnalyzer(stopWords2));
                 * analyzerWrapper.AddAnalyzer("调整", new MyAnalyzer(stopWords2));
                 * analyzerWrapper.AddAnalyzer("种族", new Lucene.Net.Analysis.KeywordAnalyzer());
                 */
            }

            return(analyzerWrapper);
        }
 public static Analyzer GetAnalyzer()
 {
     var snowball = new SnowballAndWordSplittingAnalyzer("English");
         PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(snowball);
         SandoField[] fields = new SandoField[]
         {
             SandoField.ClassId,
             SandoField.Source,
             SandoField.AccessLevel,
             SandoField.ProgramElementType,
             SandoField.DefinitionLineNumber,
             SandoField.FileExtension,
             SandoField.FullFilePath,
             SandoField.Id,
             SandoField.IsConstructor,
             SandoField.Modifiers,
             SandoField.DefinitionColumnNumber
         };
         foreach (var field in fields)
             analyzer.AddAnalyzer(field.ToString(), new KeywordAnalyzer());
         return analyzer;
 }
Exemplo n.º 36
0
        internal static Analyzer GetAnalyzer()
        {
            //var masterAnalyzer = new PerFieldAnalyzerWrapper(new KeywordAnalyzer());
            ////TODO: Lucene_FullText2 is failed with new WhitespaceAnalyzer
            ////masterAnalyzer.AddAnalyzer(LucObject.FieldName.AllText, new WhitespaceAnalyzer());
            //masterAnalyzer.AddAnalyzer(LucObject.FieldName.AllText, new StandardAnalyzer());
            //return masterAnalyzer;

            //  Field          Analyzer
            //  -----------------------------------------------------------------
            //  Name           Lucene.Net.Analysis.KeywordAnalyzer
            //  Path           Lucene.Net.Analysis.KeywordAnalyzer
            //  Keywords       Lucene.Net.Analysis.StopAnalyzer
            //  _Text          Lucene.Net.Analysis.Standard.StandardAnalyzer
            //  -----------------------------------------------------------------
            //  Default        Lucene.Net.Analysis.WhitespaceAnalyzer

            var masterAnalyzer = new PerFieldAnalyzerWrapper(new KeywordAnalyzer());
            foreach (var item in SenseNet.ContentRepository.Storage.StorageContext.Search.SearchEngine.GetAnalyzers())
                masterAnalyzer.AddAnalyzer(item.Key, (Analyzer)Activator.CreateInstance(item.Value));
            masterAnalyzer.AddAnalyzer(LucObject.FieldName.AllText, new StandardAnalyzer());
            //masterAnalyzer.AddAnalyzer(LucObject.FieldName.AllText, new StandardAnalyzer(SenseNet.Search.Indexing.LuceneManager.LuceneVersion));
            return masterAnalyzer;
        }
Exemplo n.º 37
0
		public static Query BuildQuery(string query, PerFieldAnalyzerWrapper analyzer)
		{
			return BuildQuery(query, new IndexQuery(), analyzer);
		}
Exemplo n.º 38
0
        /// <summary>
        /// This method will construct a three folder structure inside <paramref name="targetDirectory"/> containing: Html, Index, and Source
        /// </summary>
        /// <param name="sourceDirectory">Directory containing ldoc files</param>
        /// <param name="targetDirectory">Output directory</param>
        public void Build(string sourceDirectory, string targetDirectory)
        {
            if (Directory.Exists(targetDirectory) && Directory.EnumerateFileSystemEntries(targetDirectory).Any())
                throw new InvalidOperationException("Target path is not empty.");

            this.OnStateChanged(State.Preparing);

            string htmlRoot = Path.Combine(targetDirectory, "Html");
            string indexRoot = Path.Combine(targetDirectory, "Index");
            string sourceRoot = Path.Combine(targetDirectory, "Source");

            DirectoryInfo htmlDir = Directory.CreateDirectory(htmlRoot);
            DirectoryInfo indexDir = Directory.CreateDirectory(indexRoot);
            DirectoryInfo sourceDir = Directory.CreateDirectory(sourceRoot);

            var sourceFiles = Directory.EnumerateFiles(sourceDirectory, "*.ldoc", SearchOption.TopDirectoryOnly);

            // copy all source files to output directory and add to bundle
            Bundle bundle = new Bundle(this.IgnoreVersionComponent);
            foreach (var sourceFile in sourceFiles)
            {
                string targetFile = Path.Combine(sourceDir.FullName, Path.GetFileName(sourceFile));
                File.Copy(sourceFile, targetFile);
                bundle.Add(XDocument.Load(targetFile));
            }

            // merge ldoc files
            this.OnStateChanged(State.Merging);
            AssetRedirectCollection assetRedirects;
            var mergedDoc = bundle.Merge(out assetRedirects);

            // generate output
            var templateData = new TemplateData
                                   {
                                       AssetRedirects = assetRedirects,
                                       Document = mergedDoc,
                                       IgnoredVersionComponent = this.IgnoreVersionComponent,
                                       TargetDirectory = htmlDir.FullName
                                   };

            this.OnStateChanged(State.Templating);
            TemplateOutput templateOutput = this.Template.Generate(templateData);


            this.OnStateChanged(State.Indexing);
            // one stop-word per line
            StringReader stopWordsReader = new StringReader(@"missing");

            // index output
            using (var directory = FSDirectory.Open(indexDir))
            using (stopWordsReader)
            {
                Analyzer analyzer = new StandardAnalyzer(global::Lucene.Net.Util.Version.LUCENE_29, stopWordsReader);
                Analyzer titleAnalyzer = new TitleAnalyzer();
                IDictionary fieldAnalyzers = new Dictionary<string, Analyzer>
                                                 {
                                                     { "title", titleAnalyzer } 
                                                 };
                
                PerFieldAnalyzerWrapper analyzerWrapper = new PerFieldAnalyzerWrapper(analyzer, fieldAnalyzers);
                
                using (var writer = new IndexWriter(directory, analyzerWrapper, IndexWriter.MaxFieldLength.UNLIMITED))
                {
                    foreach (WorkUnitResult result in templateOutput.Results)
                    {
                        //string absPath = Path.Combine(htmlDir.FullName, result.SavedAs);

                        //HtmlDocument htmlDoc = new HtmlDocument();
                        //htmlDoc.Load(absPath);

                        //string htmlTitle = string.Empty;
                        //var titleNode = htmlDoc.DocumentNode.SelectSingleNode("/html/head/title");

                        //if (titleNode != null)
                        //    htmlTitle = HtmlEntity.DeEntitize(titleNode.InnerText);
                        //        //.Replace('.', ' ')
                        //        //.Replace('<', ' ')
                        //        //.Replace('>', ' ')
                        //        //.Replace('[', ' ')
                        //        //.Replace(']', ' ')
                        //        //.Replace('(', ' ')
                        //        //.Replace(')', ' ');

                        //HtmlNode contentNode = htmlDoc.GetElementbyId("content");

                        //HtmlNode summaryNode = contentNode.SelectSingleNode(".//p[@class='summary']");

                        //string summary = string.Empty;

                        //if (summaryNode != null && summaryNode.SelectSingleNode("span[@class='error']") == null)
                        //    summary = HtmlEntity.DeEntitize(summaryNode.InnerText);

                        //string body = HtmlEntity.DeEntitize(contentNode.InnerText);

                        //var doc = new Document();

                        //doc.Add(new Field("uri", new Uri(result.SavedAs, UriKind.Relative).ToString(), Field.Store.YES, Field.Index.NO));
                        //doc.Add(new Field("aid", result.Asset, Field.Store.YES, Field.Index.NOT_ANALYZED));
                        //foreach (AssetIdentifier aid in result.Aliases)
                        //    doc.Add(new Field("alias", aid, Field.Store.NO, Field.Index.NOT_ANALYZED));

                        //foreach (var section in result.Sections)
                        //{
                        //    doc.Add(new Field("section", section.AssetIdentifier,
                        //                      Field.Store.NO,
                        //                      Field.Index.NOT_ANALYZED));
                        //}

                        //doc.Add(new Field("title", htmlTitle, Field.Store.YES, Field.Index.ANALYZED));
                        //doc.Add(new Field("summary", summary, Field.Store.YES, Field.Index.ANALYZED));
                        //doc.Add(new Field("content", body, Field.Store.YES, Field.Index.ANALYZED));
                        //TraceSources.ContentBuilderSource.TraceVerbose("Indexing document: {0}", doc.ToString());
                        //writer.AddDocument(doc);
                    }

                    writer.Optimize();
                    writer.Commit();
                    writer.Close();
                }
                analyzerWrapper.Close();
                analyzer.Close();
                directory.Close();
            }
            this.OnStateChanged(State.Finalizing);

            var infoDoc = new XDocument(
                new XElement("content",
                             new XAttribute("created",
                                            XmlConvert.ToString(DateTime.UtcNow, XmlDateTimeSerializationMode.Utc)),
                             templateOutput.Results.Select(ConvertToXml)));

            infoDoc.Save(Path.Combine(targetDirectory, "info.xml"));

            this.OnStateChanged(State.Idle);
        }
Exemplo n.º 39
0
			private static void DisposeAnalyzerAndFriends(List<Action> toDispose, PerFieldAnalyzerWrapper analyzer)
			{
				if (analyzer != null)
					analyzer.Close();
				foreach (Action dispose in toDispose)
				{
					dispose();
				}
				toDispose.Clear();
			}