An Analyzer represents a policy for extracting terms that are indexed from text. The Analyzer builds TokenStreams, which breaks down text into tokens.

A typical Analyzer implementation will first build a Tokenizer. The Tokenizer will break down the stream of characters from the System.IO.TextReader into raw Tokens. One or more TokenFilters may then be applied to the output of the Tokenizer.

Inheritance: IDisposable
 /// <summary>
 /// Creates the index at indexPath
 /// </summary>
 /// <param name="indexPath">Directory path to create the index</param>
 public void CreateIndex(string indexPath)
 {
     luceneIndexDirectory = Lucene.Net.Store.FSDirectory.Open(indexPath);
     analyzer             = new Lucene.Net.Analysis.SimpleAnalyzer();
     IndexWriter.MaxFieldLength mfl = new IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH);
     writer = new Lucene.Net.Index.IndexWriter(luceneIndexDirectory, analyzer, true, mfl);
 }
Example #2
0
        public LuceneIndexSearch(string analyzer_str)
        {
            luceneIndexDirectory = null;
            writer = null;
            switch (analyzer_str)
            {
            case "Simple Analyzer":
                analyzer = new Lucene.Net.Analysis.SimpleAnalyzer();
                Console.WriteLine("Simple Analyzer");
                break;

            case "Standard Analyzer":
                analyzer = analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
                Console.WriteLine("Standard Analyzer");
                break;

            case "Snowball Analyzer":
                // SnowballAnalyzer's second var "name" is the language of stemmer
                analyzer = new Lucene.Net.Analysis.Snowball.SnowballAnalyzer(Lucene.Net.Util.Version.LUCENE_30, "English");
                Console.WriteLine("Snowball Analyzer");
                break;

            default:
                analyzer = new Lucene.Net.Analysis.SimpleAnalyzer();
                break;
            }

            parser       = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, TEXT_FN_PASS_TEXT, analyzer);
            mySimilarity = new NewSimilarity();
        }
Example #3
0
        void SetAnalyzerType(Type defaultType, IEnumerable<FieldDetails> fields)
        {
            if (defaultType == null) {
                defaultType = typeof(StandardAnalyzer);
            }

            // create default analyzer
            _defaultAnalyzer = Activator.CreateInstance(defaultType) as Analyzer;
            if (_defaultAnalyzer == null) {
                throw new ArgumentException("defaultType is not an Analyzer type");
            }

            var wrapper = new PerFieldAnalyzerWrapper(_defaultAnalyzer);
            if (fields != null) {
                foreach (var fd in fields) {
                    if (fd.Field.Analyzer!=null) {
                        var fieldAnalyzer = CreateAnalyzerFromType(fd.Field.Analyzer);
                        if (fieldAnalyzer != null) {
                            wrapper.AddAnalyzer(fd.Name, fieldAnalyzer);
                        }
                    }

                }
            }
            Analyzer = wrapper;
        }
 public InstancePerFieldAnalyzerWrapper()
 {
     var analyzer = new Lucene.Net.Analysis.PerFieldAnalyzerWrapper(new Synonyms.SynonymAnalyzer(new Synonyms.XmlSynonymEngine()));
     analyzer.AddAnalyzer("cota", new Lucene.Net.Analysis.KeywordAnalyzer());
     analyzer.AddAnalyzer("codigo", new Lucene.Net.Analysis.KeywordAnalyzer());
     instancePerFieldAnalyzerWrapper = analyzer;
 }
Example #5
0
 public static TimeSpan WriteIndexWithEvent(Analyzer analyzer, IndexerSet indexer, Source source, bool create,
                                         IndexCompletedEventHandler OnIndexCompleted,
                                         IndexProgressChangedEventHandler OnProgressChanged)
 {
     try
     {
         //ChineseSegAnalysis csa = new ChineseSegAnalysis(index.BasePath, index.NamePath, index.NumberPath, index.CustomPaths);
         //csa.FilterFilePath = index.FilterPath;
         //Analyzer analyzer = csa.GetAnalyzer();
         string connect = source.GetConnString();
         DateTime start;
         if (create)
         {
             DBRamCreateIndexer dbcIndexer = new DBRamCreateIndexer(analyzer, source.DBType, connect, index.Path,index.Caption);
             dbcIndexer.OnIndexCompleted += OnIndexCompleted;
             dbcIndexer.OnProgressChanged += OnProgressChanged;
             start = DateTime.Now;
             dbcIndexer.WriteResultsWithEvent(source.Query, indexer.MaxFieldLength, indexer.RamBufferSize, indexer.MergeFactor, indexer.MaxBufferedDocs);
             return DateTime.Now - start;
         }
         else
         {
             DBRamIncremIndexer dbiIndexer = new DBRamIncremIndexer(analyzer, source.DBType, connect, index.Path,index.Caption);
             dbiIndexer.OnIndexCompleted += OnIndexCompleted;
             dbiIndexer.OnProgressChanged += OnProgressChanged;
             start = DateTime.Now;
             dbiIndexer.WriteResultsWithEvent(source.Query, indexer.MaxFieldLength, indexer.RamBufferSize, indexer.MergeFactor, indexer.MaxBufferedDocs);
             return DateTime.Now - start;
         }
     }
     catch (Exception e)
     {
         throw e;
     }
 }
		public static UmbracoContentIndexer GetUmbracoIndexer(
            Lucene.Net.Store.Directory luceneDir, 
            Analyzer analyzer = null,
            IDataService dataService = null)
		{
            if (dataService == null)
            {
                dataService = new TestDataService();
            }

            if (analyzer == null)
            {
                analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29);
            }

		    var indexSet = new IndexSet();
            var indexCriteria = indexSet.ToIndexCriteria(dataService, UmbracoContentIndexer.IndexFieldPolicies);

		    var i = new UmbracoContentIndexer(indexCriteria,
		                                      luceneDir, //custom lucene directory
                                              dataService,
		                                      analyzer,
		                                      false);

			//i.IndexSecondsInterval = 1;

			i.IndexingError += IndexingError;

			return i;
		}
Example #7
0
 public void Searcher(string path)
 {
     luceneIndexDirectory = Lucene.Net.Store.FSDirectory.Open(path);
     writer   = null;
     analyzer = new Lucene.Net.Analysis.SimpleAnalyzer();
     parser   = new QueryParser(VERSION, TEXT, analyzer);
 }
 public void Dispose()
 {
     facetHandlers = null;
     directory = null;
     analyzer = null;
     selectionProperties = null;
 }
        //method to determine the analyzer users choose to use
        public void AnalyzerSelection(string analyzerSelection)
        {
            switch (analyzerSelection)
            {
            case "Simple Analyzer": analyzer = new Lucene.Net.Analysis.SimpleAnalyzer();
                break;

            case "Standard Analyzer": analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(VERSION);
                break;

            case "Snowball Analyzer": analyzer = new Lucene.Net.Analysis.Snowball.SnowballAnalyzer(VERSION, "English");
                break;

            case "Keyword Analyzer": analyzer = new Lucene.Net.Analysis.KeywordAnalyzer();
                break;

            case "Stop Analyzer": analyzer = new Lucene.Net.Analysis.StopAnalyzer(VERSION);
                break;

            case "Whitespace Analyzer": analyzer = new Lucene.Net.Analysis.WhitespaceAnalyzer();
                break;

            default: analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(VERSION);
                break;
            }
        }
Example #10
0
        public void CreateIndex(Analyzer analayer) 
        {
            FSDirectory fsDir = new SimpleFSDirectory(new DirectoryInfo(_indexerFolder));
            IndexWriter indexWriter = new IndexWriter(fsDir, analayer, true, Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED);

            string[] files = System.IO.Directory.GetFiles(_textFilesFolder, Config.FileSearchPattern, SearchOption.AllDirectories);
            foreach (string file in files)
            {
                string name = new FileInfo(file).Name;
                string content = File.ReadAllText(file);

                Document doc = new Document();
                doc.Add(new Field(Config.Field_Path, file, Field.Store.YES, Field.Index.NOT_ANALYZED));
                doc.Add(new Field(Config.Field_Name, name, Field.Store.YES, Field.Index.ANALYZED));
                doc.Add(new Field(Config.Field_Content, content, Field.Store.NO, Field.Index.ANALYZED));

                indexWriter.AddDocument(doc);

                Console.WriteLine("{0} - {1}", file, name);
            }

            indexWriter.Optimize();
            indexWriter.Dispose();

            Console.WriteLine("File count: {0}", files.Length);
        }
 public Searcher(Analyzer an)
 {
     analyzer = an;
     parser   = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, TEXT_FN, analyzer);
     //parser = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_30, TEXT_FN, analyzer);
     //newSimilarity = new NewSimilarity(); // Activity 9
 }
        public override IQueryNode Process(IQueryNode queryTree)
        {
            Analyzer analyzer = GetQueryConfigHandler().Get(ConfigurationKeys.ANALYZER);

            if (analyzer != null)
            {
                this.analyzer = analyzer;
                this.positionIncrementsEnabled = false;
                bool? positionIncrementsEnabled = GetQueryConfigHandler().Get(ConfigurationKeys.ENABLE_POSITION_INCREMENTS);
                var defaultOperator = GetQueryConfigHandler().Get(ConfigurationKeys.DEFAULT_OPERATOR);
                this.defaultOperator = defaultOperator != null ? defaultOperator.Value : Operator.OR;

                if (positionIncrementsEnabled != null)
                {
                    this.positionIncrementsEnabled = positionIncrementsEnabled.Value;
                }

                if (this.analyzer != null)
                {
                    return base.Process(queryTree);
                }
            }

            return queryTree;
        }
Example #13
0
 public LuceneSearcher(DirectoryInfo workingFolder, Analyzer analyzer)
     : base(analyzer)
 {
     _disposer = new DisposableSearcher(this);
     LuceneIndexFolder = new DirectoryInfo(Path.Combine(workingFolder.FullName, "Index"));
     InitializeDirectory();
 }
        // Creates index based on selection of analyzer
        public void CreateIndex(string indexPath, string name)
        {
            luceneIndexDirectory = Lucene.Net.Store.FSDirectory.Open(indexPath);
            IndexWriter.MaxFieldLength mfl = new IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH);
            if (name == "WhitespaceAnalyzer")
            {
                analyzer = new Lucene.Net.Analysis.WhitespaceAnalyzer();
            }
            if (name == "SimpleAnalyzer")
            {
                analyzer = new Lucene.Net.Analysis.SimpleAnalyzer();
            }

            if (name == "StandardAnalyzer")
            {
                analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(VERSION);
            }


            if (name == "StopAnalyzer")
            {
                analyzer = new Lucene.Net.Analysis.StopAnalyzer(VERSION);
            }
            else
            {
                writer = new Lucene.Net.Index.IndexWriter(luceneIndexDirectory, analyzer, true, mfl);
            }

            writer.SetSimilarity(customSimilarity);
        }
Example #15
0
 public LuceneSearcher(IndexWriter writer, Analyzer analyzer)
     : base(analyzer)
 {
     if (writer == null) throw new ArgumentNullException("writer");
     _disposer = new DisposableSearcher(this);
     _nrtWriter = writer;
 }
Example #16
0
 public SearchEngine()
 {
     luceneIndexDirectory = null; // Is set in Create Index
     analyzer             = null; // Is set in CreateAnalyser
     writer = null;               // Is set in CreateWriter
     CSVdoc = new CSVDocument();
 }
        public void Init()
        {
            facetHandlers = new List<IFacetHandler>();

            directory = new RAMDirectory();
            analyzer = new WhitespaceAnalyzer();
            selectionProperties = new Dictionary<string, string>();
            IndexWriter writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);

            writer.AddDocument(Doc("prop1=val1", "prop2=val1", "prop5=val1"));
            writer.AddDocument(Doc("prop1=val2", "prop3=val1", "prop7=val7"));
            writer.AddDocument(Doc("prop1=val2", "prop3=val2", "prop3=val3"));
            writer.AddDocument(Doc("prop1=val1", "prop2=val1"));
            writer.AddDocument(Doc("prop1=val1", "prop2=val1"));
            writer.AddDocument(Doc("prop1=val1", "prop2=val1", "prop4=val2", "prop4=val3"));
            writer.Commit();

            attributesFacetHandler = new AttributesFacetHandler(AttributeHandlerName, AttributeHandlerName, null, null,
                new Dictionary<string, string>());
            facetHandlers.Add(attributesFacetHandler);
            IndexReader reader = IndexReader.Open(directory, true);
            boboReader = BoboIndexReader.GetInstance(reader, facetHandlers);
            attributesFacetHandler.LoadFacetData(boboReader);
            browser = new BoboBrowser(boboReader);
        }
Example #18
0
        private void CreateAnalyser()
        {
            //analyzer = new Lucene.Net.Analysis.SimpleAnalyzer();
            //analyzer = new Lucene.Net.Analysis.Snowball.SnowballAnalyzer("English");

            string[] StpWrds = new string[]
            { "a", "about", "above", "above", "across", "after", "afterwards", "again", "against", "all", "almost",
              "alone", "along", "already", "also", "although", "always", "am", "among", "amongst", "amoungst", "amount", "an", "and", "another",
              "any", "anyhow", "anyone", "anything", "anyway", "anywhere", "are", "around", "as", "at", "back", "be", "became", "because", "become",
              "becomes", "becoming", "been", "before", "beforehand", "behind", "being", "below", "beside", "besides", "between", "beyond", "bill",
              "both", "bottom", "but", "by", "call", "can", "cannot", "cant", "co", "con", "could", "couldnt", "cry", "de", "describe", "detail",
              "do", "done", "down", "due", "during", "each", "eg", "eight", "either", "eleven", "else", "elsewhere", "empty", "enough", "etc", "even",
              "ever", "every", "everyone", "everything", "everywhere", "except", "few", "fifteen", "fify", "fill", "find", "fire", "first", "five", "for",
              "former", "formerly", "forty", "found", "four", "from", "front", "full", "further", "get", "give", "go", "had", "has", "hasnt", "have", "he",
              "hence", "her", "here", "hereafter", "hereby", "herein", "hereupon", "hers", "herself", "him", "himself", "his", "how", "however", "hundred", "ie",
              "if", "in", "inc", "indeed", "interest", "into", "is", "it", "its", "itself", "keep", "last", "latter", "latterly", "least", "less", "ltd",
              "made", "many", "may", "me", "meanwhile", "might", "mill", "mine", "more", "moreover", "most", "mostly", "move", "much", "must", "my", "myself",
              "name", "namely", "neither", "never", "nevertheless", "next", "nine", "no", "nobody", "none", "noone", "nor", "not", "nothing", "now", "nowhere",
              "of", "off", "often", "on", "once", "one", "only", "onto", "or", "other", "others", "otherwise", "our", "ours", "ourselves", "out", "over", "own",
              "part", "per", "perhaps", "please", "put", "rather", "re", "same", "see", "seem", "seemed", "seeming", "seems", "serious", "several", "she", "should",
              "show", "side", "since", "sincere", "six", "sixty", "so", "some", "somehow", "someone", "something", "sometime", "sometimes", "somewhere", "still", "such",
              "system", "take", "ten", "than", "that", "the", "their", "them", "themselves", "then", "thence", "there", "thereafter", "thereby", "therefore", "therein", "thereupon",
              "these", "they", "thickv", "thin", "third", "this", "those", "though", "three", "through", "throughout", "thru", "thus", "to", "together", "too", "top", "toward",
              "towards", "twelve", "twenty", "two", "un", "under", "until", "up", "upon", "us", "very", "via", "was", "we", "well", "were", "what", "whatever", "when", "whence",
              "whenever", "where", "whereafter", "whereas", "whereby", "wherein", "whereupon", "wherever", "whether", "which", "while", "whither", "who", "whoever", "whole", "whom",
              "whose", "why", "will", "with", "within", "without", "would", "yet", "you", "your", "yours", "yourself", "yourselves", "the" };

            analyzer = new Lucene.Net.Analysis.Snowball.SnowballAnalyzer(Version, "English", StpWrds);
        }
 public LuceneTesterBase(LuceneDirectory directory, LuceneAnalyzer analyzer, LuceneVersion version)
 {
     Analyzer = analyzer;
     CurrentLuceneVersion = version;
     IndexDirectory = directory;
     Debug = false;
 }
 public LuceneInteractive()
 {
     luceneIndexDirectory = null;
     writer   = null;
     analyzer = new Lucene.Net.Analysis.SimpleAnalyzer();
     parser   = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, TEXT_FN, analyzer);
 }
Example #21
0
        public virtual void TestFarsiRangeFilterCollating(Analyzer analyzer, BytesRef firstBeg, BytesRef firstEnd, BytesRef secondBeg, BytesRef secondEnd)
        {
            Directory dir = NewDirectory();
            IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
            Document doc = new Document();
            doc.Add(new TextField("content", "\u0633\u0627\u0628", Field.Store.YES));
            doc.Add(new StringField("body", "body", Field.Store.YES));
            writer.AddDocument(doc);
            writer.Dispose();
            IndexReader reader = DirectoryReader.Open(dir);
            IndexSearcher searcher = new IndexSearcher(reader);
            Query query = new TermQuery(new Term("body", "body"));

            // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
            // orders the U+0698 character before the U+0633 character, so the single
            // index Term below should NOT be returned by a TermRangeFilter with a Farsi
            // Collator (or an Arabic one for the case when Farsi searcher not
            // supported).
            ScoreDoc[] result = searcher.Search(query, new TermRangeFilter("content", firstBeg, firstEnd, true, true), 1).ScoreDocs;
            Assert.AreEqual(0, result.Length, "The index Term should not be included.");

            result = searcher.Search(query, new TermRangeFilter("content", secondBeg, secondEnd, true, true), 1).ScoreDocs;
            Assert.AreEqual(1, result.Length, "The index Term should be included.");

            reader.Dispose();
            dir.Dispose();
        }
        public List <String> cutWord(string word, Lucene.Net.Analysis.Analyzer analysis)
        {
            List <string> result = new List <string>();
            //TokenStream tokenStream = analysis.ReusableTokenStream("", new StringReader(word));
            TokenStream tokenStream = analysis.TokenStream("field1", new StringReader(word));
            //IndexWriterConfig iwc = new IndexWriterConfig(LuceneVersion.LUCENE_48, analyzer);
            bool           boolHas = tokenStream.HasAttributes;
            ITermAttribute attrbutes;// = tokenStream.GetAttribute<ITermAttribute>();

            //IEnumerable<Lucene.Net.Util.Attribute> aaa = tokenStream.GetAttributeImplsIterator();
            //IEnumerable<Type> bbb = tokenStream.GetAttributeTypesIterator();

            while (tokenStream.IncrementToken())
            {
                attrbutes = tokenStream.GetAttribute <ITermAttribute>();
                result.Add(attrbutes.Term.ToString());
            }

            tokenStream.Reset();
            //attrbutes.
            //Token token = tokenStream.;
            //PanGu.Segment segment = new PanGu.Segment();

            tokenStream.End();
            return(result);
        }
Example #23
0
        public void CreateSearchIndex()
        {
            directory = new RAMDirectory();
            analyzer = new StandardAnalyzer(Version.LUCENE_30);
            var ixw = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);
            LookupTable = new Dictionary<string, BaseContent>();
            foreach (BaseContent p in Service.PoIs.ToList())
            {
                var document = new Document();
                document.Add(new Field("id", p.Id.ToString(), Field.Store.YES, Field.Index.NO, Field.TermVector.NO));
                string all = p.Name + " ";
                foreach (MetaInfo mi in p.EffectiveMetaInfo)
                {
                    string value;
                    if (mi.Type != MetaTypes.text || !p.Labels.TryGetValue(mi.Label, out value)) continue;
                    document.Add(new Field(mi.Label, value, Field.Store.YES, Field.Index.ANALYZED));
                    all += value + " ";
                }
                document.Add(new Field("All", all, Field.Store.YES, Field.Index.ANALYZED));

                LookupTable[p.Id.ToString()] = p;
                ixw.AddDocument(document);
            }
            ixw.Commit();
        }
Example #24
0
        private void InitSearchServiceAnalyzer(Type indexingServiceSettingsType, Analyzer defaultAnalyzer, Analyzer textAnalyzer)
        {
            var perFieldAnalyzerWrapper = new PerFieldAnalyzerWrapper(defaultAnalyzer);
            perFieldAnalyzerWrapper.AddAnalyzer("EPISERVER_SEARCH_ID", new KeywordAnalyzer());
            perFieldAnalyzerWrapper.AddAnalyzer("EPISERVER_SEARCH_CULTURE", new KeywordAnalyzer());
            perFieldAnalyzerWrapper.AddAnalyzer("EPISERVER_SEARCH_REFERENCEID", new KeywordAnalyzer());
            perFieldAnalyzerWrapper.AddAnalyzer("EPISERVER_SEARCH_AUTHORSTORAGE", new KeywordAnalyzer());
            perFieldAnalyzerWrapper.AddAnalyzer("EPISERVER_SEARCH_CATEGORIES", new WhitespaceAnalyzer());
            perFieldAnalyzerWrapper.AddAnalyzer("EPISERVER_SEARCH_ACL", new WhitespaceAnalyzer());
            perFieldAnalyzerWrapper.AddAnalyzer("EPISERVER_SEARCH_VIRTUALPATH", new WhitespaceAnalyzer());
            perFieldAnalyzerWrapper.AddAnalyzer("EPISERVER_SEARCH_TYPE", new WhitespaceAnalyzer());
            perFieldAnalyzerWrapper.AddAnalyzer("EPISERVER_SEARCH_CREATED", new WhitespaceAnalyzer());
            perFieldAnalyzerWrapper.AddAnalyzer("EPISERVER_SEARCH_MODIFIED", new WhitespaceAnalyzer());
            perFieldAnalyzerWrapper.AddAnalyzer("EPISERVER_SEARCH_PUBLICATIONEND", new WhitespaceAnalyzer());
            perFieldAnalyzerWrapper.AddAnalyzer("EPISERVER_SEARCH_PUBLICATIONSTART", new WhitespaceAnalyzer());
            perFieldAnalyzerWrapper.AddAnalyzer("EPISERVER_SEARCH_ITEMSTATUS", new WhitespaceAnalyzer());

            perFieldAnalyzerWrapper.AddAnalyzer("EPISERVER_SEARCH_TITLE", textAnalyzer);
            perFieldAnalyzerWrapper.AddAnalyzer("EPISERVER_SEARCH_DISPLAYTEXT", textAnalyzer);
            perFieldAnalyzerWrapper.AddAnalyzer("EPISERVER_SEARCH_AUTHORS", textAnalyzer);
            perFieldAnalyzerWrapper.AddAnalyzer("EPISERVER_SEARCH_DEFAULT", textAnalyzer);

            indexingServiceSettingsType
                .GetField("_analyzer", BindingFlags.Static | BindingFlags.NonPublic)
                .SetValue(null, perFieldAnalyzerWrapper);
        }
Example #25
0
 public void Dispose()
 {
     facetHandlers = null;
     if (directory.isOpen_ForNUnit) directory.Dispose();
     directory = null;
     analyzer = null;
 }
        public ObjectQueryTranslator(string[] defaultFieldNames, Analyzer defaultAnalyzer)
        {
            this._defaultAnalyzer = defaultAnalyzer;
            this._defaultFieldNames = defaultFieldNames;

            _root = new BooleanQuery();
        }
Example #27
0
 public LuceneIndexer()
 {
     luceneIndexDirectory = null;
     writer   = null;
     analyzer = null;
     parser   = null;
 }
 public LuceneAdvancedSearchApplication()
 {
     luceneIndexDirectory = null;
     writer     = null;
     analyzer   = new SimpleAnalyzer();
     similarity = new Newsimilarity();
 }
Example #29
0
        public LuceneApp()
        {
            luceneIndexDirectory = null;
            analyzer             = null;
            writer        = null;
            newSimilarity = new NewSimilarity();
            parserFields  = new string[] { DOC_TITLE, DOC_AUTHOR, DOC_BIB, DOC_BODY };
            fieldWeights  = new Dictionary <string, float>();
            foreach (string field in parserFields)
            {
                fieldWeights.Add(field, 1);
            }

            // Init WordNet
            // Src: https://developer.syn.co.in/tutorial/wordnet/tutorial.html
            var directory = "../../../wordnetdic";

            wordNetEngine = new WordNetEngine();

            // data sources
            wordNetEngine.AddDataSource(new StreamReader(Path.Combine(directory, "data.adj")), PartOfSpeech.Adjective);
            wordNetEngine.AddDataSource(new StreamReader(Path.Combine(directory, "data.adv")), PartOfSpeech.Adverb);
            wordNetEngine.AddDataSource(new StreamReader(Path.Combine(directory, "data.noun")), PartOfSpeech.Noun);
            wordNetEngine.AddDataSource(new StreamReader(Path.Combine(directory, "data.verb")), PartOfSpeech.Verb);

            // indexes
            wordNetEngine.AddIndexSource(new StreamReader(Path.Combine(directory, "index.adj")), PartOfSpeech.Adjective);
            wordNetEngine.AddIndexSource(new StreamReader(Path.Combine(directory, "index.adv")), PartOfSpeech.Adverb);
            wordNetEngine.AddIndexSource(new StreamReader(Path.Combine(directory, "index.noun")), PartOfSpeech.Noun);
            wordNetEngine.AddIndexSource(new StreamReader(Path.Combine(directory, "index.verb")), PartOfSpeech.Verb);

            Console.WriteLine("Loading database...");
            wordNetEngine.Load();
            Console.WriteLine("Load completed.");
        }
Example #30
0
        /// <summary> Simple similarity query generators.
        /// Takes every unique word and forms a boolean query where all words are optional.
        /// After you get this you'll use to to query your <see cref="IndexSearcher"/> for similar docs.
        /// The only caveat is the first hit returned <b>should be</b> your source document - you'll
        /// need to then ignore that.
        /// 
        /// <p/>
        /// 
        /// So, if you have a code fragment like this:
        /// <br/>
        /// <code>
        /// Query q = formSimilaryQuery( "I use Lucene to search fast. Fast searchers are good", new StandardAnalyzer(), "contents", null);
        /// </code>
        /// 
        /// <p/>
        /// 
        ///  The query returned, in string form, will be <c>'(i use lucene to search fast searchers are good')</c>.
        /// 
        /// <p/>
        /// The philosophy behind this method is "two documents are similar if they share lots of words".
        /// Note that behind the scenes, Lucenes scoring algorithm will tend to give two documents a higher similarity score if the share more uncommon words.
        /// 
        /// <P/>
        /// This method is fail-safe in that if a long 'body' is passed in and
        /// <see cref="BooleanQuery.Add"/> (used internally)
        /// throws
        /// <see cref="BooleanQuery.TooManyClauses"/>, the
        /// query as it is will be returned.
        /// </summary>
        /// <param name="body">the body of the document you want to find similar documents to
        /// </param>
        /// <param name="a">the analyzer to use to parse the body
        /// </param>
        /// <param name="field">the field you want to search on, probably something like "contents" or "body"
        /// </param>
        /// <param name="stop">optional set of stop words to ignore
        /// </param>
        /// <returns> a query with all unique words in 'body'
        /// </returns>
        /// <throws>  IOException this can't happen... </throws>
        public static Query FormSimilarQuery(System.String body, Analyzer a, System.String field, ISet<string> stop)
        {
            TokenStream ts = a.TokenStream(field, new System.IO.StringReader(body));
            ITermAttribute termAtt = ts.AddAttribute<ITermAttribute>();

            BooleanQuery tmp = new BooleanQuery();
            ISet<string> already = Lucene.Net.Support.Compatibility.SetFactory.CreateHashSet<string>(); // ignore dups
            while (ts.IncrementToken())
            {
                String word = termAtt.Term;
                // ignore opt stop words
                if (stop != null && stop.Contains(word))
                    continue;
                // ignore dups
                if (already.Contains(word))
                    continue;
                already.Add(word);
                // add to query
                TermQuery tq = new TermQuery(new Term(field, word));
                try
                {
                    tmp.Add(tq, Occur.SHOULD);
                }
                catch (BooleanQuery.TooManyClauses)
                {
                    // fail-safe, just return what we have, not the end of the world
                    break;
                }
            }
            return tmp;
        }
Example #31
0
 public LuceneSearcher(Lucene.Net.Store.Directory luceneDirectory, Analyzer analyzer)
     : base(analyzer)
 {
     _disposer = new DisposableSearcher(this);
     LuceneIndexFolder = null;
     _luceneDirectory = luceneDirectory;
 }
 public virtual QueryParser GetParser(Analyzer a)
 {
     if (a == null) a = new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true);
     QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, DefaultField, a);
     qp.DefaultOperator = (QueryParserBase.OR_OPERATOR);
     return qp;
 }
Example #33
0
 public LuceneAdvancedSearchApplication()
 {
     luceneIndexDirectory = null;
     writer   = null;
     analyzer = new Lucene.Net.Analysis.Snowball.SnowballAnalyzer(VERSION, "English");
     parser   = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, TEXT_FN, analyzer);
 }
Example #34
0
 public override void TearDown()
 {
     base.TearDown();
     dir.Dispose();
     dir = null;
     anlzr = null;
 }
Example #35
0
 public static TimeSpan WriteIndex(Analyzer analyzer,IndexerSet indexer, Source source,bool create)
 {
     try
     {
         //ChineseSegAnalysis csa = new ChineseSegAnalysis(index.BasePath, index.NamePath, index.NumberPath, index.CustomPaths);
         //csa.FilterFilePath = index.FilterPath;
         //Analyzer analyzer = csa.GetAnalyzer();
         string connect = source.GetConnString();
         DateTime start;
         if (create)
         {
             DBCreateIndexer dbcIndexer = new DBCreateIndexer(analyzer, source.DBType, connect, index.Path,index.Caption);
             dbcIndexer.PrimaryKey = source.PrimaryKey;
             start = DateTime.Now;
             dbcIndexer.WriteResults(source.Query,indexer.MaxFieldLength,indexer.RamBufferSize, indexer.MergeFactor, indexer.MaxBufferedDocs);
             return DateTime.Now - start;
         }
         else
         {
             DBIncremIndexer dbiIndexer = new DBIncremIndexer(analyzer, source.DBType, connect, index.Path,index.Caption);
             dbiIndexer.PrimaryKey = source.PrimaryKey;
             start = DateTime.Now;
             dbiIndexer.WriteResults(source.Query, indexer.MaxFieldLength, indexer.RamBufferSize, indexer.MergeFactor, indexer.MaxBufferedDocs);                 
             return DateTime.Now - start;
         }
     }
     catch (Exception e)
     {
         throw e;
     }
 }
 public void Dispose()
 {
     searcher.Dispose();
     writer.Dispose();
     directory.Dispose();
     analyzer = null;
 }
Example #37
0
		/// <summary>
		/// Detects untokenized fields and sets as NotAnalyzed in analyzer
		/// </summary>
		private static string PreProcessUntokenizedTerms(PerFieldAnalyzerWrapper analyzer, string query, Analyzer keywordAnlyzer)
		{
			var untokenizedMatches = untokenizedQuery.Matches(query);
			if (untokenizedMatches.Count < 1)
			{
				return query;
			}

			var sb = new StringBuilder(query);

			// KeywordAnalyzer will not tokenize the values

			// process in reverse order to leverage match string indexes
			for (int i=untokenizedMatches.Count; i>0; i--)
			{
				Match match = untokenizedMatches[i-1];

				// specify that term for this field should not be tokenized
				analyzer.AddAnalyzer(match.Groups[1].Value, keywordAnlyzer);

				Group term = match.Groups[2];

				// remove enclosing "[[" "]]" from term value (again in reverse order)
				sb.Remove(term.Index+term.Length-2, 2);
				sb.Remove(term.Index, 2);
			}

			return sb.ToString();
		}
Example #38
0
        protected CoreParser(string defaultField, Analyzer analyzer, QueryParser parser)
        {
            this.analyzer = analyzer;
            this.parser = parser;
            filterFactory = new FilterBuilderFactory();
            filterFactory.AddBuilder("RangeFilter", new RangeFilterBuilder());
            filterFactory.AddBuilder("NumericRangeFilter", new NumericRangeFilterBuilder());

            queryFactory = new QueryBuilderFactory();
            queryFactory.AddBuilder("TermQuery", new TermQueryBuilder());
            queryFactory.AddBuilder("TermsQuery", new TermsQueryBuilder(analyzer));
            queryFactory.AddBuilder("MatchAllDocsQuery", new MatchAllDocsQueryBuilder());
            queryFactory.AddBuilder("BooleanQuery", new BooleanQueryBuilder(queryFactory));
            queryFactory.AddBuilder("NumericRangeQuery", new NumericRangeQueryBuilder());
            queryFactory.AddBuilder("DisjunctionMaxQuery", new DisjunctionMaxQueryBuilder(queryFactory));
            if (parser != null)
            {
                queryFactory.AddBuilder("UserQuery", new UserInputQueryBuilder(parser));
            }
            else
            {
                queryFactory.AddBuilder("UserQuery", new UserInputQueryBuilder(defaultField, analyzer));
            }
            queryFactory.AddBuilder("FilteredQuery", new FilteredQueryBuilder(filterFactory, queryFactory));
            queryFactory.AddBuilder("ConstantScoreQuery", new ConstantScoreQueryBuilder(filterFactory));

            filterFactory.AddBuilder("CachedFilter", new CachedFilterBuilder(queryFactory,
                filterFactory, maxNumCachedFilters));

            SpanQueryBuilderFactory sqof = new SpanQueryBuilderFactory();

            SpanNearBuilder snb = new SpanNearBuilder(sqof);
            sqof.AddBuilder("SpanNear", snb);
            queryFactory.AddBuilder("SpanNear", snb);

            BoostingTermBuilder btb = new BoostingTermBuilder();
            sqof.AddBuilder("BoostingTermQuery", btb);
            queryFactory.AddBuilder("BoostingTermQuery", btb);

            SpanTermBuilder snt = new SpanTermBuilder();
            sqof.AddBuilder("SpanTerm", snt);
            queryFactory.AddBuilder("SpanTerm", snt);

            SpanOrBuilder sot = new SpanOrBuilder(sqof);
            sqof.AddBuilder("SpanOr", sot);
            queryFactory.AddBuilder("SpanOr", sot);

            SpanOrTermsBuilder sots = new SpanOrTermsBuilder(analyzer);
            sqof.AddBuilder("SpanOrTerms", sots);
            queryFactory.AddBuilder("SpanOrTerms", sots);

            SpanFirstBuilder sft = new SpanFirstBuilder(sqof);
            sqof.AddBuilder("SpanFirst", sft);
            queryFactory.AddBuilder("SpanFirst", sft);

            SpanNotBuilder snot = new SpanNotBuilder(sqof);
            sqof.AddBuilder("SpanNot", snot);
            queryFactory.AddBuilder("SpanNot", snot);
        }
Example #39
0
 public LuceneIndexer()
 {
     luceneIndexDirectory = null;
     writer        = null;
     analyzer      = null;
     parser        = null;
     newSimilarity = new NewSimilarity();
 }
Example #40
0
		private IndexWriter CreateWriterNoTry(Directory d, Analyzer a)
		{
			var indexExists = IndexExists();
			logger.Debug("Creating index writer, index exists: " + indexExists);
			var iw = new IndexWriter(d, a, create: !indexExists, mfl: IndexWriter.MaxFieldLength.UNLIMITED);
			iw.WriteLockTimeout = LockTimeout;
			return iw;
		}
Example #41
0
 public LuceneSearcher(Directory index, string orderBy, string defaultField, Analyzer analyzer)
 {
     this.OrderBy = orderBy;
     this.Analyzer = analyzer;
     this.defaultField = string.IsNullOrEmpty(defaultField) ? "all" : defaultField;
     this.indexSearcher = new IndexSearcher(index, true);
     InitQueryParser();
 }
Example #42
0
        public StandardQueryParser GetParser(Analyzer a)
        {
            if (a == null) a = new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true);
            StandardQueryParser qp = new StandardQueryParser(a);
            qp.DefaultOperator = (Operator.OR);

            return qp;
        }
        public static UmbracoExamineSearcher GetUmbracoSearcher(Lucene.Net.Store.Directory luceneDir, Analyzer analyzer = null)
		{
            if (analyzer == null)
            {
                analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29);
            }
            return new UmbracoExamineSearcher(luceneDir, analyzer);
		}
Example #44
0
 public Context(Directory directory, Analyzer analyzer, Version version, IIndexWriter indexWriter, object transactionLock)
 {
     this.directory = directory;
     this.analyzer = analyzer;
     this.version = version;
     this.indexWriter = indexWriter;
     this.transactionLock = transactionLock;
 }
        //static WordNetEngine wordNet;

        static Program()
        {
            luceneIndexDirectory = null;
            analyzer             = null;
            writer = null;
            //customSimilarity = new CustomSimilarity();
            items = new List <string[]>();
        }
Example #46
0
		public PDFIndexer(Lucene.Net.Store.Directory luceneDirectory, IDataService dataService, Analyzer analyzer, bool async)
			: base(
				new IndexCriteria(Enumerable.Empty<IIndexField>(), Enumerable.Empty<IIndexField>(), Enumerable.Empty<string>(), Enumerable.Empty<string>(), null),
				luceneDirectory, dataService, analyzer, async)
		{
			SupportedExtensions = new[] { ".pdf" };
			UmbracoFileProperty = "umbracoFile";
		}
Example #47
0
 public SearchEngineApplication()
 {
     InitializeComponent();
     searchResultList = new string[1];
     analyzer         = new Lucene.Net.Analysis.Standard.StandardAnalyzer(VERSION);
     //analyzer = new Lucene.Net.Analysis.Snowball.SnowballAnalyzer(VERSION);
     newSimilarity = new NewSimilarity();
 }
 /// <summary>
 /// Creates the index at indexPath
 /// </summary>
 /// <param name="indexPath">Directory path to create the index</param>
 public void CreateIndex(string indexPath)
 {
     //Done in Week 3 Practical
     luceneIndexDirectory = Lucene.Net.Store.FSDirectory.Open(indexPath);
     analyzer             = new Lucene.Net.Analysis.Standard.StandardAnalyzer(VERSION);
     IndexWriter.MaxFieldLength mfl = new IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH);
     writer = new Lucene.Net.Index.IndexWriter(luceneIndexDirectory, analyzer, true, mfl);
 }
Example #49
0
 protected override LuceneIndex.IndexWriter GetIndexWriter(LuceneStore.Directory indexDirectory,
                                                           LuceneAnalysis.Analyzer analyzer, bool create)
 {
     return(new LuceneIndex.IndexWriter(
                indexDirectory,
                analyzer,
                create,
                LuceneIndex.IndexWriter.MaxFieldLength.UNLIMITED));
 }
Example #50
0
        public LuceneApplication()
        {
            directory     = null;
            indexWriter   = null;
            indexSearcher = null;
            queryParser   = null;

            analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(VERSION);;
        }
Example #51
0
        /// class constructor
        public LuceneIREngine()
        {
            luceneIndexDirectory = null;
            writer = null;
            ISet <string> stopWords = StopAnalyzer.ENGLISH_STOP_WORDS_SET;

            analyzer     = new SnowballAnalyzer(VERSION, "English", stopWords);
            mySimilarity = new CustomSimilarity();
        }
Example #52
0
        public void CreateIndex(string indexPath)
        {
            luceneIndexDirectory = FSDirectory.Open(indexPath);
            analyzer             = new Lucene.Net.Analysis.Standard.StandardAnalyzer(VERSION);
            IndexWriter.MaxFieldLength mfl = new IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH);
            IndexDeletionPolicy        p;

            writer = new Lucene.Net.Index.IndexWriter(luceneIndexDirectory, analyzer, true, mfl);
        }
Example #53
0
 public LuceneAdvancedSearchApplication()
 {
     luceneIndexDirectory = null;
     writer = null;
     //standardAnalyzer will not do the stem
     analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(VERSION);//whitespaceAnalyzer will break at whitespace, this is the cause for the first error
     //analyzer = new Lucene.Net.Analysis.Snowball.SnowballAnalyzer(VERSION, "English");//the string name refer to the stemmer analyzer
     parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, TEXT_FN, analyzer);
 }
Example #54
0
 public LuceneSearcheEngine()
 {
     luceneIndexDirectory = null;
     writer = null;
     //analyzer = new Lucene.Net.Analysis.SimpleAnalyzer();     // Using simple analyzer for baseline system
     analyzer      = new Lucene.Net.Analysis.Snowball.SnowballAnalyzer(Lucene.Net.Util.Version.LUCENE_30, "English", stopWords); //Using Standard Analyzer to apply steming and removing of stop words.
     parser        = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, TEXT_FN, analyzer);
     newSimilarity = new NewSimilarity();
 }
Example #55
0
 public void LuceneApplication()
 {
     luceneIndexDirectory = null; // Is set in Create Index
     analyzer             = new Lucene.Net.Analysis.WhitespaceAnalyzer();
     analyzer             = new Lucene.Net.Analysis.SimpleAnalyzer();
     analyzer             = new Lucene.Net.Analysis.StopAnalyzer(VERSION);
     analyzer             = new Lucene.Net.Analysis.Standard.StandardAnalyzer(VERSION);
     analyzer             = new Lucene.Net.Analysis.Snowball.SnowballAnalyzer(VERSION, "English");
     writer = null; // Is set in CreateWriter
 }
        private void button1_Click(object sender, EventArgs e)
        {
            string strWord         = textBox1.Text;
            string strAnalyzerName = comboBox1.SelectedItem.ToString();

            Lucene.Net.Analysis.Analyzer analyzer = AnalyzerHelper.GetAnalyzerByName(strAnalyzerName);
            List <String> listString = cutWord(strWord, analyzer);

            listBox1.DataSource = listString;
        }
 public LuceneIndexSearch()
 {
     luceneIndexDirectory = null;
     writer = null;
     // SnowballAnalyzer's second var "name" is the language of stemmer
     analyzer = new Lucene.Net.Analysis.SimpleAnalyzer();
     //analyzer = new Lucene.Net.Analysis.Snowball.SnowballAnalyzer(Lucene.Net.Util.Version.LUCENE_30, "English");
     parser       = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, TEXT_FN_PASS_TEXT, analyzer);
     mySimilarity = new NewSimilarity();
 }
Example #58
0
        }//contructor which is used to initialize the objects

        //create index
        public void CreateIndex(string indexPath)
        {
            luceneIndexDirectory = Lucene.Net.Store.FSDirectory.Open(indexPath);
            analyzerstandard     = new Lucene.Net.Analysis.Standard.StandardAnalyzer(VERSION);
            analyzerkeyword      = new Lucene.Net.Analysis.KeywordAnalyzer();
            IndexWriter.MaxFieldLength mfl = new IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH);
            analysor = new PerFieldAnalyzerWrapper(analyzerstandard);
            writer   = new Lucene.Net.Index.IndexWriter(luceneIndexDirectory, analysor, true, mfl);
            writer.SetSimilarity(customSimilarity);//for task 6
        }
 public MainSearchEngine()
 {
     luceneIndexDirectory = null;
     writer = null;
     //analyzer = new Lucene.Net.Analysis.SimpleAnalyzer();     // Using simple analyzer for baseline system
     analyzer      = new Lucene.Net.Analysis.Standard.StandardAnalyzer(VERSION);; //Using Standard Analyzer to apply steming and removing of stop words.
     parser        = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, TEXT_FN, analyzer);
     multiParser   = new MultiFieldQueryParser(VERSION, new[] { TEXT_FN_TITLE, TEXT_FN_AUTHOR }, analyzer);
     newSimilarity = new NewSimilarity();
 }
        //Procedimiento que crea el índice Lucene
        private static void CrearIndice(Dictionary <string, UrlDocument> URLResult, Lucene.Net.Analysis.Analyzer analyzer)
        {
            Trace.WriteLine("Creando el índice de Lucene");
            IndexWriter writer = new IndexWriter(_directory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);

            writer.UseCompoundFile = false;
            writer.Dispose();

            //Indexar los documentos
            Trace.WriteLine("Indexando los documentos...");
            indexFilesXively(URLResult, analyzer);
            Trace.WriteLine("'" + totalDocs + "' documentos indexados.");
        }