protected override Analyzer GetAnalyzer(Net.Util.Version version)
 {
     var a = new PerFieldAnalyzerWrapper(base.GetAnalyzer(version));
     a.AddAnalyzer("Version", new KeywordAnalyzer());
     a.AddAnalyzer("Flag", new KeywordAnalyzer());
     return a;
 }
예제 #2
0
 public InstancePerFieldAnalyzerWrapper()
 {
     var analyzer = new Lucene.Net.Analysis.PerFieldAnalyzerWrapper(new Synonyms.SynonymAnalyzer(new Synonyms.XmlSynonymEngine()));
     analyzer.AddAnalyzer("cota", new Lucene.Net.Analysis.KeywordAnalyzer());
     analyzer.AddAnalyzer("codigo", new Lucene.Net.Analysis.KeywordAnalyzer());
     instancePerFieldAnalyzerWrapper = analyzer;
 }
 protected override Analyzer GetAnalyzer(Net.Util.Version version)
 {
     var analyzer = new PerFieldAnalyzerWrapper(base.GetAnalyzer(version));
     analyzer.AddAnalyzer("Path", new CaseInsensitiveKeywordAnalyzer());
     analyzer.AddAnalyzer("Key", new KeywordAnalyzer());
     return analyzer;
 }
예제 #4
0
 protected override Analyzer GetAnalyzer(Net.Util.Version version)
 {
     analyzer = new PerFieldAnalyzerWrapper(base.GetAnalyzer(version));
     analyzer.AddAnalyzer<SampleDocument>(t => t.Id, new KeywordAnalyzer());
     analyzer.AddAnalyzer<SampleDocument>(t => t.Key, new CaseInsensitiveKeywordAnalyzer());
     return analyzer;
 }
            public InstancePerFieldAnalyzerWrapper()
            {
                var analyzer = new Lucene.Net.Analysis.PerFieldAnalyzerWrapper(new Synonyms.SynonymAnalyzer(new Synonyms.XmlSynonymEngine()));

                analyzer.AddAnalyzer("cota", new Lucene.Net.Analysis.KeywordAnalyzer());
                analyzer.AddAnalyzer("codigo", new Lucene.Net.Analysis.KeywordAnalyzer());
                instancePerFieldAnalyzerWrapper = analyzer;
            }
예제 #6
0
        public void Code()
        {
            Analyzer _keywordanalyzer    = new KeywordAnalyzer();
            Analyzer _simpleanalyzer     = new Lucene.Net.Analysis.SimpleAnalyzer();
            Analyzer _stopanalyzer       = new Lucene.Net.Analysis.StopAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
            Analyzer _whitespaceanalyzer = new Lucene.Net.Analysis.WhitespaceAnalyzer();
            Analyzer _standardanalyzer   = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);


            var _perfieldanalyzer = new Lucene.Net.Analysis.PerFieldAnalyzerWrapper(_standardanalyzer);

            _perfieldanalyzer.AddAnalyzer("firstname", _keywordanalyzer);
            _perfieldanalyzer.AddAnalyzer("lastname", _keywordanalyzer);


            IndexWriter _writer = new IndexWriter(_directory, _perfieldanalyzer, IndexWriter.MaxFieldLength.UNLIMITED);

            IndexReader _reader = _writer.GetReader();

            IndexSearcher _searcher = new IndexSearcher(_reader);


            //QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "title", _standardanalyzer);

            string[] fields = new[] { "text", "title", "author" };
            var      boosts = new Dictionary <string, float>();

            boosts.Add("text", 2.0f);
            boosts.Add("title", 1.5f);
            QueryParser parser = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_30, fields, _standardanalyzer, boosts);
            Query       query  = parser.Parse("lucene is great");


            TopDocs hits = _searcher.Search(query, 1000);

            IEnumerable <Document> docs = hits.ScoreDocs.Select(hit => _searcher.Doc(hit.Doc));

            var books = docs.Select(doc => new Book()
            {
                Text   = doc.Get("text"),
                Title  = doc.Get("title"),
                Author = doc.Get("author"),
                Length = Int32.Parse(doc.Get("length"))
            });


            _writer.Optimize();
            _writer.Commit();
            _writer.DeleteAll();
        }
예제 #7
0
        void SetAnalyzerType(Type defaultType, IEnumerable<FieldDetails> fields)
        {
            if (defaultType == null) {
                defaultType = typeof(StandardAnalyzer);
            }

            // create default analyzer
            _defaultAnalyzer = Activator.CreateInstance(defaultType) as Analyzer;
            if (_defaultAnalyzer == null) {
                throw new ArgumentException("defaultType is not an Analyzer type");
            }

            var wrapper = new PerFieldAnalyzerWrapper(_defaultAnalyzer);
            if (fields != null) {
                foreach (var fd in fields) {
                    if (fd.Field.Analyzer!=null) {
                        var fieldAnalyzer = CreateAnalyzerFromType(fd.Field.Analyzer);
                        if (fieldAnalyzer != null) {
                            wrapper.AddAnalyzer(fd.Name, fieldAnalyzer);
                        }
                    }

                }
            }
            Analyzer = wrapper;
        }
예제 #8
0
		/// <summary>
		/// Detects untokenized fields and sets as NotAnalyzed in analyzer
		/// </summary>
		private static string PreProcessUntokenizedTerms(PerFieldAnalyzerWrapper analyzer, string query, Analyzer keywordAnlyzer)
		{
			var untokenizedMatches = untokenizedQuery.Matches(query);
			if (untokenizedMatches.Count < 1)
			{
				return query;
			}

			var sb = new StringBuilder(query);

			// KeywordAnalyzer will not tokenize the values

			// process in reverse order to leverage match string indexes
			for (int i=untokenizedMatches.Count; i>0; i--)
			{
				Match match = untokenizedMatches[i-1];

				// specify that term for this field should not be tokenized
				analyzer.AddAnalyzer(match.Groups[1].Value, keywordAnlyzer);

				Group term = match.Groups[2];

				// remove enclosing "[[" "]]" from term value (again in reverse order)
				sb.Remove(term.Index+term.Length-2, 2);
				sb.Remove(term.Index, 2);
			}

			return sb.ToString();
		}
예제 #9
0
파일: Form1.cs 프로젝트: usmanghani/Misc
        private void btnSearch_Click(object sender, EventArgs e)
        {
            lstResults.Items.Clear();
            searcher = new IndexSearcher(new RAMDirectory(_indexTarget));
            PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new StandardAnalyzer());
            analyzer.AddAnalyzer("ayat_arabic", new DiacriticAnalyzer(FilterData.stopWords));
            //MyQueryParser parser = new MyQueryParser(new string[] { "ayat_desc", "ayat_urdu", "ayat_arabic" }, analyzer);
            //parser.SetDefaultOperator(QueryParser.Operator.AND);
            //Query q = parser.Parse(txtSearch.Text);
            //Query q = new TermQuery(new Term("ayatno", NumberTools.LongToString(long.Parse(txtSearch.Text))));
            BooleanQuery q = new BooleanQuery();
            long l1 = 1; long l2 = 500; long l3 = 1; long l4 = 1;
            //RangeQuery rq = new RangeQuery(new Term("ayatno", l1.ToString("00000")), new Term("ayatno", l2.ToString("00000")), true);
            //q.Add(rq, true, false);
            q.Add(new TermQuery(new Term("sid", l3.ToString("00000"))), true, false);
            q.Add(new TermQuery(new Term("ayatno", l4.ToString("00000"))), true, false);
            MessageBox.Show(q.ToString());
            Sort sort = new Sort(new string[] { "pid", "sid", "ayatno" });
            hits = searcher.Search(q, sort);
            lblHits.Text = hits.Length() + " hit(s).";
            Application.DoEvents();

            for (int i = 0; i < hits.Length(); i++)
            {
                StringBuilder sb = new StringBuilder();
                sb.Append("Para: ").Append(hits.Doc(i).Get("pid"));
                sb.Append(", Surat: ").Append(hits.Doc(i).Get("sid"));
                sb.Append(", Verse: ").Append(hits.Doc(i).Get("ayatno"));
                lstResults.Items.Add(sb.ToString());

            }
        }
        public void StartEmailIndexing()
        {
            if (!Directory.Exists(GlobalData.EmailIndexPath))
                Directory.CreateDirectory(GlobalData.EmailIndexPath);

            IndexWriter index;

            PerFieldAnalyzerWrapper pfaw = new PerFieldAnalyzerWrapper(new KeywordAnalyzer());
            pfaw.AddAnalyzer("body", new StopAnalyzer());
            try
            {
                index = new IndexWriter(GlobalData.EmailIndexPath, pfaw, false);
            }
            catch
            {
                index = new IndexWriter(GlobalData.EmailIndexPath, pfaw, true);
            }

            const string PopServer = "pop.google.in";
            const int PopPort = 995;
            const string User = "******";
            const string Pass = "******";
            using (Pop3Client client = new Pop3Client(PopServer, PopPort, true, User, Pass))
            {
                client.Trace += new Action<string>(Console.WriteLine);
                //connects to Pop3 Server, Executes POP3 USER and PASS
                client.Authenticate();
                client.Stat();
                foreach (Pop3ListItem item in client.List())
                {
                    Document doc = new Document();
                    MailMessageEx message = client.RetrMailMessageEx(item);

                    doc.Add(new Field("subject", message.Subject.ToLower(), Field.Store.YES, Field.Index.NO_NORMS));
                    doc.Add(new Field("from", message.From.ToString().ToLower(), Field.Store.YES, Field.Index.NO_NORMS));
                    doc.Add(new Field("to", message.To.ToString().ToLower(), Field.Store.YES, Field.Index.NO_NORMS));
                    //doc.Add(new Field("date", message.DeliveryDate.ToLower(), Field.Store.YES, Field.Index.NO_NORMS));

                    string code = message.Body;
                    code = Regex.Replace(code, @"<\s*head\s*>(.|\n|\r)*?<\s*/\s*head\s*>", " ", RegexOptions.Compiled); //repalce <head> section with single whitespace
                    code = Regex.Replace(code, @"<\s*script (.|\n|\r)*?<\s*/\s*script\s*>", " ", RegexOptions.Compiled);//repalce remaining <script> tags from body with single whitespace
                    code = Regex.Replace(code, @"<!--(.|\n|\r)*?-->", " ", RegexOptions.Compiled);                      //repalce comments
                    code = Regex.Replace(code, @"<(.|\n|\r)*?>", " ", RegexOptions.Compiled);                           //repalce all tags with single whitespace
                    code = Regex.Replace(code, @"&.*?;", " ", RegexOptions.Compiled);                                   //replace &gt; e.t.c
                    code = Regex.Replace(code, @"\s+", " ", RegexOptions.Compiled);                                     //replace multiple whitespaces characters by single whitespace
                    code = Regex.Replace(code, @"\ufffd", " ", RegexOptions.Compiled);

                    doc.Add(new Field("body", code.ToLower(), Field.Store.YES, Field.Index.NO_NORMS));

                    index.AddDocument(doc);
                }
                client.Noop();
                client.Rset();
                client.Quit();
                index.Optimize();
                index.Close();
            }
        }
        public void Search(string keyword)
        {
            IndexReader reader = null;
            IndexSearcher searcher = null;
            try
            {
                reader = IndexReader.Open(FSDirectory.Open(new DirectoryInfo(indexDirectory)), true);
                searcher = new IndexSearcher(reader);
                //创建查询
                PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(analyzer);
                wrapper.AddAnalyzer("FileName", analyzer);
                wrapper.AddAnalyzer("Author", analyzer);
                wrapper.AddAnalyzer("Content", analyzer);
                string[] fields = { "FileName", "Author", "Content" };

                QueryParser parser = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_30, fields, wrapper);
                Query query = parser.Parse(keyword);
                TopScoreDocCollector collector = TopScoreDocCollector.Create(NumberHits, true);

                searcher.Search(query, collector);
                var hits = collector.TopDocs().ScoreDocs;

                int numTotalHits = collector.TotalHits;

                //以后就可以对获取到的collector数据进行操作
                for (int i = 0; i < hits.Count(); i++)
                {
                    var hit = hits[i];
                    Document doc = searcher.Doc(hit.Doc);
                    Field fileNameField = doc.GetField("FileName");
                    Field authorField = doc.GetField("Author");
                    Field pathField = doc.GetField("Path");

                }
            }
            finally
            {
                if (searcher != null)
                    searcher.Dispose();

                if (reader != null)
                    reader.Dispose();
            }
        }
예제 #12
0
 public void TestPerFieldAnalyzer()
 {
     var analyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer());
     analyzer.AddAnalyzer("partnum", new KeywordAnalyzer());
     var query =
         new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "description", analyzer)
             .Parse("partnum:Q36 AND SPACE");
     Assert.AreEqual("+partnum:Q36 +space", query.ToString("description"), "Q36 kept as-is");
     Assert.AreEqual(1, searcher.Search(query, searcher.MaxDoc()).ScoreDocs.Length, "docs found!!!");
 }
		public virtual void  TestPerFieldAnalyzer()
		{
			PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer());
			analyzer.AddAnalyzer("partnum", new KeywordAnalyzer());

			Lucene.Net.QueryParsers.QueryParser queryParser = new Lucene.Net.QueryParsers.QueryParser("description", analyzer);
			Query query = queryParser.Parse("partnum:Q36 AND SPACE");
			
			Hits hits = searcher.Search(query);
			Assert.AreEqual("+partnum:Q36 +space", query.ToString("description"), "Q36 kept as-is");
			Assert.AreEqual(1, hits.Length(), "doc found!");
		}
예제 #14
0
		public virtual void  TestPerFieldAnalyzer()
		{
			PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer());
			analyzer.AddAnalyzer("partnum", new KeywordAnalyzer());
			
			QueryParser queryParser = new QueryParser("description", analyzer);
			Query query = queryParser.Parse("partnum:Q36 AND SPACE");
			
			ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs;
			Assert.AreEqual("+partnum:Q36 +space", query.ToString("description"), "Q36 kept as-is");
			Assert.AreEqual(1, hits.Length, "doc found!");
		}
예제 #15
0
        public virtual void  TestPerFieldAnalyzer()
        {
            PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer());

            analyzer.AddAnalyzer("partnum", new KeywordAnalyzer());

            QueryParser queryParser = new QueryParser(Version.LUCENE_CURRENT, "description", analyzer);
            Query       query       = queryParser.Parse("partnum:Q36 AND SPACE");

            ScoreDoc[] hits = searcher.Search(query, null, 1000, null).ScoreDocs;
            Assert.AreEqual("+partnum:Q36 +space", query.ToString("description"), "Q36 kept as-is");
            Assert.AreEqual(1, hits.Length, "doc found!");
        }
		public virtual void  TestPerField()
		{
			System.String text = "Qwerty";
			PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer());
			analyzer.AddAnalyzer("special", new SimpleAnalyzer());
			
			TokenStream tokenStream = analyzer.TokenStream("field", new System.IO.StringReader(text));
			Token token = tokenStream.Next();
			Assert.AreEqual("Qwerty", token.TermText(), "WhitespaceAnalyzer does not lowercase");
			
			tokenStream = analyzer.TokenStream("special", new System.IO.StringReader(text));
			token = tokenStream.Next();
			Assert.AreEqual("qwerty", token.TermText(), "SimpleAnalyzer lowercases");
		}
 public EDSIndexer(string desIndexPath, Analyzer analyser, bool overwriteIndexDir)
 {
     keywordAnalyzer = analyser;
     pfaw = new PerFieldAnalyzerWrapper(analyser);
     pfaw.AddAnalyzer("content", stopAnalyzer);          //generally for content v use stop analyser
     try
     {
         index = new IndexWriter(desIndexPath, pfaw, overwriteIndexDir);
     }
     catch
     {
         index = new IndexWriter(desIndexPath, pfaw, true);
     }
 }
예제 #18
0
        internal static Analyzer GetAnalyzer()
        {
            //var masterAnalyzer = new PerFieldAnalyzerWrapper(new KeywordAnalyzer());
            ////TODO: Lucene_FullText2 is failed with new WhitespaceAnalyzer
            ////masterAnalyzer.AddAnalyzer(LucObject.FieldName.AllText, new WhitespaceAnalyzer());
            //masterAnalyzer.AddAnalyzer(LucObject.FieldName.AllText, new StandardAnalyzer());
            //return masterAnalyzer;

            //  Field          Analyzer
            //  -----------------------------------------------------------------
            //  Name           Lucene.Net.Analysis.KeywordAnalyzer
            //  Path           Lucene.Net.Analysis.KeywordAnalyzer
            //  Keywords       Lucene.Net.Analysis.StopAnalyzer
            //  _Text          Lucene.Net.Analysis.Standard.StandardAnalyzer
            //  -----------------------------------------------------------------
            //  Default        Lucene.Net.Analysis.WhitespaceAnalyzer

            var masterAnalyzer = new PerFieldAnalyzerWrapper(new KeywordAnalyzer());
            foreach (var item in SenseNet.ContentRepository.Storage.StorageContext.Search.SearchEngine.GetAnalyzers())
                masterAnalyzer.AddAnalyzer(item.Key, (Analyzer)Activator.CreateInstance(item.Value));
            masterAnalyzer.AddAnalyzer(LucObject.FieldName.AllText, new StandardAnalyzer());
            //masterAnalyzer.AddAnalyzer(LucObject.FieldName.AllText, new StandardAnalyzer(SenseNet.Search.Indexing.LuceneManager.LuceneVersion));
            return masterAnalyzer;
        }
예제 #19
0
        public virtual void  TestPerField()
        {
            System.String           text     = "Qwerty";
            PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer());

            analyzer.AddAnalyzer("special", new SimpleAnalyzer());

            TokenStream tokenStream = analyzer.TokenStream("Field", new System.IO.StringReader(text));
            Token       token       = tokenStream.Next();

            Assert.AreEqual("Qwerty", token.TermText(), "WhitespaceAnalyzer does not lowercase");

            tokenStream = analyzer.TokenStream("special", new System.IO.StringReader(text));
            token       = tokenStream.Next();
            Assert.AreEqual("qwerty", token.TermText(), "SimpleAnalyzer lowercases");
        }
예제 #20
0
		public void CompareHtmlTokenization()
		{
			const string str = @"test1 <a href=""foo"">testlink</a> test2 test3";

			PerFieldAnalyzerWrapper pfaw = new PerFieldAnalyzerWrapper(new HtmlStandardAnalyzer());
			pfaw.AddAnalyzer("Morph", new HtmlMorphAnalyzer(HspellDict));
			Directory indexDirectory = new RAMDirectory();
			IndexWriter writer = new IndexWriter(indexDirectory, pfaw, true, IndexWriter.MaxFieldLength.UNLIMITED);

			Document doc = new Document();
			doc.Add(new Field("Simple", str, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
			doc.Add(new Field("Morph", str, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
			writer.AddDocument(doc);
			writer.Close();

			CompareTermData(indexDirectory, str);
		}
		public virtual void  TestPerField()
		{
			System.String text = "Qwerty";
			PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer());
			analyzer.AddAnalyzer("special", new SimpleAnalyzer());
			
			TokenStream tokenStream = analyzer.TokenStream("field", new System.IO.StringReader(text));
            ITermAttribute termAtt = tokenStream.GetAttribute<ITermAttribute>();
			
			Assert.IsTrue(tokenStream.IncrementToken());
			Assert.AreEqual("Qwerty", termAtt.Term, "WhitespaceAnalyzer does not lowercase");
			
			tokenStream = analyzer.TokenStream("special", new System.IO.StringReader(text));
            termAtt = tokenStream.GetAttribute<ITermAttribute>();
			Assert.IsTrue(tokenStream.IncrementToken());
			Assert.AreEqual("qwerty", termAtt.Term, "SimpleAnalyzer lowercases");
		}
        public virtual void  TestPerField()
        {
            System.String           text     = "Qwerty";
            PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer());

            analyzer.AddAnalyzer("special", new SimpleAnalyzer());

            TokenStream    tokenStream = analyzer.TokenStream("field", new System.IO.StringReader(text));
            ITermAttribute termAtt     = tokenStream.GetAttribute <ITermAttribute>();

            Assert.IsTrue(tokenStream.IncrementToken());
            Assert.AreEqual("Qwerty", termAtt.Term, "WhitespaceAnalyzer does not lowercase");

            tokenStream = analyzer.TokenStream("special", new System.IO.StringReader(text));
            termAtt     = tokenStream.GetAttribute <ITermAttribute>();
            Assert.IsTrue(tokenStream.IncrementToken());
            Assert.AreEqual("qwerty", termAtt.Term, "SimpleAnalyzer lowercases");
        }
        protected override Task<bool> OnProcessBatch(CollectorHttpClient client, IEnumerable<JToken> items, JToken context, DateTime commitTimeStamp, CancellationToken cancellationToken)
        {
            PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30));
            analyzer.AddAnalyzer("Id", new IdentifierKeywordAnalyzer());

            int i = 0;

            using (IndexWriter writer = new IndexWriter(_directory, analyzer, false, IndexWriter.MaxFieldLength.UNLIMITED))
            {
                foreach (JObject item in items)
                {
                    i++;

                    string id = item["nuget:id"].ToString();
                    string version = item["nuget:version"].ToString();

                    BooleanQuery query = new BooleanQuery();
                    query.Add(new BooleanClause(new TermQuery(new Term("Id", id.ToLowerInvariant())), Occur.MUST));
                    query.Add(new BooleanClause(new TermQuery(new Term("Version", version)), Occur.MUST));

                    writer.DeleteDocuments(query);

                    Document doc = new Document();

                    doc.Add(new Field("Id", item["nuget:id"].ToString(), Field.Store.YES, Field.Index.ANALYZED));
                    doc.Add(new Field("Version", item["nuget:version"].ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));

                    writer.AddDocument(doc);
                }

                string trace = Guid.NewGuid().ToString();

                writer.Commit(new Dictionary<string, string> 
                { 
                    { "commitTimeStamp", commitTimeStamp.ToString("O") },
                    { "trace", trace }
                });

                Trace.TraceInformation("COMMIT {0} documents, index contains {1} documents, commitTimeStamp {2}, trace: {3}",
                    i, writer.NumDocs(), commitTimeStamp.ToString("O"), trace);
            }

            return Task.FromResult(true);
        }
예제 #24
0
		/// <summary>
		/// Detects untokenized fields and sets as NotAnalyzed in analyzer
		/// </summary>
		private static string PreProcessUntokenizedTerms(PerFieldAnalyzerWrapper analyzer, string query, ref Analyzer keywordAnalyzer)
		{
			var untokenizedMatches = untokenizedQuery.Matches(query);
			if (untokenizedMatches.Count < 1)
				return query;

			var sb = new StringBuilder(query);

			// Initialize a KeywordAnalyzer
			// KeywordAnalyzer will not tokenize the values
			keywordAnalyzer = new KeywordAnalyzer();

			// process in reverse order to leverage match string indexes
			for (var i = untokenizedMatches.Count; i > 0; i--)
			{
				var match = untokenizedMatches[i - 1];

				// specify that term for this field should not be tokenized
				analyzer.AddAnalyzer(match.Groups[1].Value, keywordAnalyzer);

				var term = match.Groups[2];

				// introduce " " around the term
				var startIndex = term.Index;
				var length = term.Length - 2;
				if (sb[startIndex + length - 1] != '"')
				{
					sb.Insert(startIndex + length, '"');
					length += 1;
				}
				if (sb[startIndex + 2] != '"')
				{
					sb.Insert(startIndex + 2, '"');
					length += 1;
				}
				// remove enclosing "[[" "]]" from term value (again in reverse order)
				sb.Remove(startIndex + length, 2);
				sb.Remove(startIndex, 2);
			}

			return sb.ToString();
		}
 public static Analyzer GetAnalyzer()
 {
     var snowball = new SnowballAndWordSplittingAnalyzer("English");
         PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(snowball);
         SandoField[] fields = new SandoField[]
         {
             SandoField.ClassId,
             SandoField.Source,
             SandoField.AccessLevel,
             SandoField.ProgramElementType,
             SandoField.DefinitionLineNumber,
             SandoField.FileExtension,
             SandoField.FullFilePath,
             SandoField.Id,
             SandoField.IsConstructor,
             SandoField.Modifiers,
             SandoField.DefinitionColumnNumber
         };
         foreach (var field in fields)
             analyzer.AddAnalyzer(field.ToString(), new KeywordAnalyzer());
         return analyzer;
 }
예제 #26
0
        //create an index system, you need to indicate where the source files are
        public void IndexText(string filepath, bool titleboost, bool authorboost, string titlevalue, string authorvalue)
        {
            StreamReader file    = new StreamReader(filepath);
            string       content = file.ReadToEnd();

            string[] delimiter   = { ".I", ".T", ".A", ".B", ".W" };
            string[] words       = content.Split(delimiter, StringSplitOptions.RemoveEmptyEntries);
            string   length      = "";
            int      countfordoc = 0;

            countfordoc++;
            //because there are five parts in sourcefile, they need to be seperated.
            if (words.Length > 5)
            {
                length = words.Length.ToString();
            }
            string[] wordprocessed = new string[words.Length];
            int      i             = 0;

            //get rid of some symbols because string contain some unwanted symbols
            //delete the title in abstract because it can be seen as an error
            if (words[4].Contains(words[1]))
            {
                words[4] = words[4].Replace(words[1], string.Empty);
            }
            // get rid of the symbol which is in charge of changing for a new line
            foreach (string w in words)
            {
                wordprocessed[i] = w.Replace("\n", string.Empty);
                i++;
            }

            //define 5 fields for index
            Lucene.Net.Documents.Field docid            = new Field(DocID, wordprocessed[0], Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
            Lucene.Net.Documents.Field title            = new Field(TITLE, wordprocessed[1], Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
            Lucene.Net.Documents.Field author           = new Field(AUTHOR, wordprocessed[2], Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
            Lucene.Net.Documents.Field bibliinformation = new Field(BIBLiINFO, wordprocessed[3], Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
            Lucene.Net.Documents.Field abstracts        = new Field(ABSTRACT, wordprocessed[4], Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
            if (titleboost == true)
            {
                title.Boost = int.Parse(titlevalue);
            }
            else
            {
                title.Boost = 1;
            }                        //for task 7
            if (authorboost == true)
            {
                author.Boost = int.Parse(authorvalue);
            }
            else
            {
                author.Boost = 1;
            }
            //for task 7
            analysor.AddAnalyzer(DocID, analyzerkeyword);     //set  ID using keyword analyzor
            analysor.AddAnalyzer(AUTHOR, analyzerkeyword);    //set author using keyword analyzor, in my opinion, it cann't be separated.
            analysor.AddAnalyzer(BIBLiINFO, analyzerkeyword); //set bibliography using keyword analyzor
            Lucene.Net.Documents.Document doc = new Document();
            doc.Add(docid);
            doc.Add(title);
            doc.Add(abstracts);
            doc.Add(author);
            doc.Add(bibliinformation);
            writer.AddDocument(doc);//writer is bond with analysor. here, my analysor is a mixture of 2 types of analyzor
            file.Close();
        }
예제 #27
0
파일: Index.cs 프로젝트: Inferis/ravendb
		private PerFieldAnalyzerWrapper CreateAnalyzer(ICollection<Action> toDispose)
    	{
    		var standardAnalyzer = new StandardAnalyzer(Version.LUCENE_29);
			toDispose.Add(standardAnalyzer.Close);
    		var perFieldAnalyzerWrapper = new PerFieldAnalyzerWrapper(standardAnalyzer);
    		foreach (var analyzer in indexDefinition.Analyzers)
    		{
    			var analyzerInstance = indexDefinition.CreateAnalyzerInstance(analyzer.Key, analyzer.Value);
				if(analyzerInstance == null)
					continue;
				toDispose.Add(analyzerInstance.Close);
    			perFieldAnalyzerWrapper.AddAnalyzer(analyzer.Key, analyzerInstance);
    		}
			KeywordAnalyzer keywordAnalyzer = null;
			foreach (var fieldIndexing in indexDefinition.Indexes)
			{
				switch (fieldIndexing.Value)
				{
					case FieldIndexing.NotAnalyzedNoNorms:
					case FieldIndexing.NotAnalyzed:
						if(keywordAnalyzer  == null)
						{
							keywordAnalyzer = new KeywordAnalyzer();
							toDispose.Add(keywordAnalyzer.Close);
						}
						perFieldAnalyzerWrapper.AddAnalyzer(fieldIndexing.Key, keywordAnalyzer);
						break;
				}
			}
    		return perFieldAnalyzerWrapper;
    	}
예제 #28
0
        public static Lucene.Net.Analysis.Analyzer GetAnalyzer()
        {
            //return new StandardAnalyzer(new string[] {"的", "之" });

            if (analyzerWrapper == null)
            {
                analyzerWrapper = new Lucene.Net.Analysis.PerFieldAnalyzerWrapper(new StandardAnalyzer(MyLucene.GetLuceneVersion()));
                analyzerWrapper.AddAnalyzer("name", new MyAnalyzer(stopWords));
                analyzerWrapper.AddAnalyzer("japName", new MyAnalyzer(stopWords));
                analyzerWrapper.AddAnalyzer("oldName", new MyAnalyzer(stopWords));
                analyzerWrapper.AddAnalyzer("shortName", new MyAnalyzer(stopWords));
                analyzerWrapper.AddAnalyzer("effect", new MyAnalyzer(stopWords2));
                analyzerWrapper.AddAnalyzer("adjust", new MyAnalyzer(stopWords2));
                analyzerWrapper.AddAnalyzer("tribe", new Lucene.Net.Analysis.KeywordAnalyzer());
                analyzerWrapper.AddAnalyzer("cheatcode", new KeywordAnalyzer());
                analyzerWrapper.AddAnalyzer("aliasList", new PunctuationAnalyzer());
                analyzerWrapper.AddAnalyzer("cardCamp", new Lucene.Net.Analysis.KeywordAnalyzer());

                analyzerWrapper.AddAnalyzer("enName", new LetterDigitAnalyzer());
                analyzerWrapper.AddAnalyzer("pyname", new SimpleAnalyzer());
                analyzerWrapper.AddAnalyzer("pyshortName", new SimpleAnalyzer());
                analyzerWrapper.AddAnalyzer("pyoldName", new SimpleAnalyzer());
                analyzerWrapper.AddAnalyzer("effectType", new SimpleAnalyzer());
                analyzerWrapper.AddAnalyzer("package", new PunctuationAnalyzer());

                //因为高级搜索的关系,中文的字段名也需要分词

                analyzerWrapper.AddAnalyzer("中文名", new MyAnalyzer(stopWords));
                analyzerWrapper.AddAnalyzer("日文名", new MyAnalyzer(stopWords));
                analyzerWrapper.AddAnalyzer("旧卡名", new MyAnalyzer(stopWords));
                analyzerWrapper.AddAnalyzer("曾用名", new MyAnalyzer(stopWords));
                analyzerWrapper.AddAnalyzer("简称", new MyAnalyzer(stopWords));
                analyzerWrapper.AddAnalyzer("俗称", new MyAnalyzer(stopWords));
                analyzerWrapper.AddAnalyzer("缩写", new MyAnalyzer(stopWords));
                analyzerWrapper.AddAnalyzer("效果", new MyAnalyzer(stopWords2));
                analyzerWrapper.AddAnalyzer("效果说明", new MyAnalyzer(stopWords2));
                analyzerWrapper.AddAnalyzer("调整", new MyAnalyzer(stopWords2));
                analyzerWrapper.AddAnalyzer("种族", new Lucene.Net.Analysis.KeywordAnalyzer());
                analyzerWrapper.AddAnalyzer("卡包", new PunctuationAnalyzer());
            }

            return(analyzerWrapper);
        }
예제 #29
0
        public static Lucene.Net.Analysis.Analyzer GetAnalyzer()
        {
            //return new StandardAnalyzer(new string[] {"的", "之" });

            if (analyzerWrapper == null)
            {
                analyzerWrapper = new Lucene.Net.Analysis.PerFieldAnalyzerWrapper(new StandardAnalyzer());
                analyzerWrapper.AddAnalyzer("name", new MyAnalyzer(stopWords));
                analyzerWrapper.AddAnalyzer("japName", new MyAnalyzer(stopWords));
                analyzerWrapper.AddAnalyzer("oldName", new MyAnalyzer(stopWords));
                analyzerWrapper.AddAnalyzer("shortName", new MyAnalyzer(stopWords));
                analyzerWrapper.AddAnalyzer("effect", new MyAnalyzer(stopWords2));
                analyzerWrapper.AddAnalyzer("adjust", new MyAnalyzer(stopWords2));
                analyzerWrapper.AddAnalyzer("tribe", new Lucene.Net.Analysis.KeywordAnalyzer());
                analyzerWrapper.AddAnalyzer("cheatcode", new Lucene.Net.Analysis.KeywordAnalyzer());
                analyzerWrapper.AddAnalyzer("cardCamp", new Lucene.Net.Analysis.KeywordAnalyzer());

                analyzerWrapper.AddAnalyzer("enName", new LetterDigitAnalyzer());
                analyzerWrapper.AddAnalyzer("pyname", new SimpleAnalyzer());
                analyzerWrapper.AddAnalyzer("pyshortName", new SimpleAnalyzer());
                analyzerWrapper.AddAnalyzer("pyoldName", new SimpleAnalyzer());

                //中文的字段名在搜索前已经全部转为了英文字段名,所以无分词的必要

                /*
                 * analyzerWrapper.AddAnalyzer("中文名", new MyAnalyzer(stopWords));
                 * analyzerWrapper.AddAnalyzer("日文名", new MyAnalyzer(stopWords));
                 * analyzerWrapper.AddAnalyzer("旧卡名", new MyAnalyzer(stopWords));
                 * analyzerWrapper.AddAnalyzer("曾用名", new MyAnalyzer(stopWords));
                 * analyzerWrapper.AddAnalyzer("简称", new MyAnalyzer(stopWords));
                 * analyzerWrapper.AddAnalyzer("俗称", new MyAnalyzer(stopWords));
                 * analyzerWrapper.AddAnalyzer("缩写", new MyAnalyzer(stopWords));
                 * analyzerWrapper.AddAnalyzer("效果", new MyAnalyzer(stopWords2));
                 * analyzerWrapper.AddAnalyzer("效果说明", new MyAnalyzer(stopWords2));
                 * analyzerWrapper.AddAnalyzer("调整", new MyAnalyzer(stopWords2));
                 * analyzerWrapper.AddAnalyzer("种族", new Lucene.Net.Analysis.KeywordAnalyzer());
                 */
            }

            return(analyzerWrapper);
        }
예제 #30
0
		public PerFieldAnalyzerWrapper CreateAnalyzer(Analyzer defaultAnalyzer, ICollection<Action> toDispose)
		{
			toDispose.Add(defaultAnalyzer.Close);
			var perFieldAnalyzerWrapper = new PerFieldAnalyzerWrapper(defaultAnalyzer);
			foreach (var analyzer in indexDefinition.Analyzers)
			{
				Analyzer analyzerInstance = IndexingExtensions.CreateAnalyzerInstance(analyzer.Key, analyzer.Value);
				if (analyzerInstance == null)
					continue;
				toDispose.Add(analyzerInstance.Close);
				perFieldAnalyzerWrapper.AddAnalyzer(analyzer.Key, analyzerInstance);
			}
			StandardAnalyzer standardAnalyzer = null;
			KeywordAnalyzer keywordAnalyzer = null;
			foreach (var fieldIndexing in indexDefinition.Indexes)
			{
				switch (fieldIndexing.Value)
				{
					case FieldIndexing.NotAnalyzed:
						if (keywordAnalyzer == null)
						{
							keywordAnalyzer = new KeywordAnalyzer();
							toDispose.Add(keywordAnalyzer.Close);
						}
						perFieldAnalyzerWrapper.AddAnalyzer(fieldIndexing.Key, keywordAnalyzer);
						break;
					case FieldIndexing.Analyzed:
						if (indexDefinition.Analyzers.ContainsKey(fieldIndexing.Key))
							continue;
						if (standardAnalyzer == null)
						{
							standardAnalyzer = new StandardAnalyzer(Version.LUCENE_29);
							toDispose.Add(standardAnalyzer.Close);
						}
						perFieldAnalyzerWrapper.AddAnalyzer(fieldIndexing.Key, standardAnalyzer);
						break;
				}
			}
			return perFieldAnalyzerWrapper;
		}
 protected void Initialize(ILuceneIndex index, bool close)
 {
     Assert.ArgumentNotNull(index, "index");
     PerFieldAnalyzerWrapper aw = new PerFieldAnalyzerWrapper(index.Analyzer);
     aw.AddAnalyzer("_language", new KeywordAnalyzer());
     this._analyzer = aw;
     Assert.IsNotNull(this._analyzer, "Failed to request analyzer from the index");
 }
예제 #32
0
        public ScoreDoc[] Search(string keyword)
        {
            int num = 10;
            IndexReader reader = null;
            var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
            PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(analyzer);
            wrapper.AddAnalyzer("Name", analyzer);
            string[] fields = { "Name" };
            // try
            // {
                // reader = IndexReader.Open(FSDirectory.Open(new DirectoryInfo(_indexDirectory)), true);
                // searcher = new IndexSearcher(reader);
                QueryParser parser = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_30, fields, wrapper);
                Query query = parser.Parse(keyword);
                TopScoreDocCollector collector = TopScoreDocCollector.Create(num, true);

                _indexSearcher.Search(query, collector);
                var hits = collector.TopDocs().ScoreDocs;

                return hits;
            // }
        }
 internal static void OptimizeAllIndexes()
 {
     PerFieldAnalyzerWrapper pfaw = new PerFieldAnalyzerWrapper(new KeywordAnalyzer());
     pfaw.AddAnalyzer("content", new StopAnalyzer());
     foreach (string dir in Directory.GetDirectories(GlobalData.IndexRootPath))
     {
         IndexWriter writer = new IndexWriter(dir, pfaw, false);
         writer.Optimize();
         writer.Close();
     }
 }
예제 #34
0
파일: BexisIndexer.cs 프로젝트: BEXIS2/Core
        private void configureBexisIndexing(bool recreateIndex)
        {
            configXML = new XmlDocument();
            configXML.Load(FileHelper.ConfigFilePath);

            LoadBeforeIndexing();
            Lucene.Net.Store.Directory pathIndex = FSDirectory.Open(new DirectoryInfo(luceneIndexPath));
            Lucene.Net.Store.Directory autoCompleteIndex = FSDirectory.Open(new DirectoryInfo(autoCompleteIndexPath));

            PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new BexisAnalyzer());

            indexWriter = new IndexWriter(pathIndex, analyzer, recreateIndex, IndexWriter.MaxFieldLength.UNLIMITED);
            autoCompleteIndexWriter = new IndexWriter(autoCompleteIndex, new NGramAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);

            foreach (XmlNode a in categoryXmlNodeList)
            {
                analyzer.AddAnalyzer("ng_" + a.Attributes.GetNamedItem("lucene_name").Value, new NGramAnalyzer());
            }
            analyzer.AddAnalyzer("ng_all", new NGramAnalyzer());

            isIndexConfigured = true;
        }
예제 #35
0
파일: Index.cs 프로젝트: csainty/ravendb
		public PerFieldAnalyzerWrapper CreateAnalyzer(Analyzer defaultAnalyzer, ICollection<Action> toDispose, bool forQuerying = false)
		{
			toDispose.Add(defaultAnalyzer.Close);
			var perFieldAnalyzerWrapper = new PerFieldAnalyzerWrapper(defaultAnalyzer);
			foreach (var analyzer in indexDefinition.Analyzers)
			{
				Analyzer analyzerInstance = IndexingExtensions.CreateAnalyzerInstance(analyzer.Key, analyzer.Value);
				if (analyzerInstance == null)
					continue;
				toDispose.Add(analyzerInstance.Close);

				if (forQuerying)
				{
					var customAttributes = analyzerInstance.GetType().GetCustomAttributes(typeof(NotForQueryingAttribute), false);
					if (customAttributes.Length > 0)
						continue;
				}

				perFieldAnalyzerWrapper.AddAnalyzer(analyzer.Key, analyzerInstance);
			}
			StandardAnalyzer standardAnalyzer = null;
			KeywordAnalyzer keywordAnalyzer = null;
			foreach (var fieldIndexing in indexDefinition.Indexes)
			{
				switch (fieldIndexing.Value)
				{
					case FieldIndexing.NotAnalyzed:
						if (keywordAnalyzer == null)
						{
							keywordAnalyzer = new KeywordAnalyzer();
							toDispose.Add(keywordAnalyzer.Close);
						}
						perFieldAnalyzerWrapper.AddAnalyzer(fieldIndexing.Key, keywordAnalyzer);
						break;
					case FieldIndexing.Analyzed:
						if (indexDefinition.Analyzers.ContainsKey(fieldIndexing.Key))
							continue;
						if (standardAnalyzer == null)
						{
							standardAnalyzer = new StandardAnalyzer(Version.LUCENE_29);
							toDispose.Add(standardAnalyzer.Close);
						}
						perFieldAnalyzerWrapper.AddAnalyzer(fieldIndexing.Key, standardAnalyzer);
						break;
				}
			}
			return perFieldAnalyzerWrapper;
		}
예제 #36
0
        /// <summary>
        /// Loads the data into the Lucene index
        /// </summary>
        /// <param name="directory">
        /// Directory where the index is located.
        /// </param>
        private void LoadLuceneIndex(SimpleFSDirectory directory)
        {
            // Create an analyzer that uses UpperCaseLetterOrDigitAnalyzer for all fields, but UpperCaseKeywordAnalyzer for ProductCode
            // (because we want to regard product codes as 1 word).

            var analyzer = new PerFieldAnalyzerWrapper(new UpperCaseLetterOrDigitAnalyzer());
            analyzer.AddAnalyzer("ProductCode", new UpperCaseKeywordAnalyzer());

            // -----------
            // Store products into Lucene.
            // This will create a new index. Other requests will still be able to read the existing index.

            // Create writer that will overwrite the existing index
            using (var writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED))
            {
                IEnumerable<ProductSearchResult> results = _productRepository.GetAllProductSearchResults();

                foreach (var result in results)
                {
                    var doc = new Document();
                    doc.Add(new Field("ProductId", result.ProductId.ToString(CultureInfo.InvariantCulture), Field.Store.YES, Field.Index.NO));

                    // Store field in index so it can be searched, but don't analyse it - just store as is.
                    var productCodeField = new Field("ProductCode", result.ProductCode, Field.Store.YES, Field.Index.ANALYZED);
                    doc.Add(productCodeField);

                    doc.Add(new Field("ProductDescription", result.ProductDescription, Field.Store.YES, Field.Index.ANALYZED));

                    writer.AddDocument(doc);
                }
            }
        }
예제 #37
0
        public static IndexWriter Create(AbstractConnection connection, Process process, Entity entity) {
            using (var dir = LuceneDirectoryFactory.Create(connection, entity)) {
                Analyzer defaultAnalyzer = new KeywordAnalyzer();
                if (process.SearchTypes.ContainsKey("default")) {
                    defaultAnalyzer = LuceneAnalyzerFactory.Create(process.SearchTypes["default"].Analyzer, connection.Version);
                }

                var analyzer = new PerFieldAnalyzerWrapper(defaultAnalyzer);
                foreach (var field in GetFields(entity, connection.Version, connection.Logger)) {
                    analyzer.AddAnalyzer(field.Key, field.Value);
                }
                return new IndexWriter(dir, analyzer, IndexWriter.MaxFieldLength.UNLIMITED);
            }
        }
예제 #38
0
        public static IndexWriter Create(AbstractConnection connection, Entity entity) {
            var dir = LuceneDirectoryFactory.Create(connection, entity);
            Analyzer defaultAnalyzer = new KeywordAnalyzer();

            var analyzer = new PerFieldAnalyzerWrapper(defaultAnalyzer);
            foreach (var field in GetFields(entity, connection.Version, connection.Logger)) {
                analyzer.AddAnalyzer(field.Key, field.Value);
            }
            return new IndexWriter(dir, analyzer, IndexWriter.MaxFieldLength.UNLIMITED);
        }