/// <summary> /// Creates the index at indexPath /// </summary> /// <param name="indexPath">Directory path to create the index</param> public void CreateIndex(string indexPath) { luceneIndexDirectory = Lucene.Net.Store.FSDirectory.Open(indexPath); analyzer = new Lucene.Net.Analysis.SimpleAnalyzer(); IndexWriter.MaxFieldLength mfl = new IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH); writer = new Lucene.Net.Index.IndexWriter(luceneIndexDirectory, analyzer, true, mfl); }
public LuceneIndexSearch(string analyzer_str) { luceneIndexDirectory = null; writer = null; switch (analyzer_str) { case "Simple Analyzer": analyzer = new Lucene.Net.Analysis.SimpleAnalyzer(); Console.WriteLine("Simple Analyzer"); break; case "Standard Analyzer": analyzer = analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30); Console.WriteLine("Standard Analyzer"); break; case "Snowball Analyzer": // SnowballAnalyzer's second var "name" is the language of stemmer analyzer = new Lucene.Net.Analysis.Snowball.SnowballAnalyzer(Lucene.Net.Util.Version.LUCENE_30, "English"); Console.WriteLine("Snowball Analyzer"); break; default: analyzer = new Lucene.Net.Analysis.SimpleAnalyzer(); break; } parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, TEXT_FN_PASS_TEXT, analyzer); mySimilarity = new NewSimilarity(); }
void SetAnalyzerType(Type defaultType, IEnumerable<FieldDetails> fields) { if (defaultType == null) { defaultType = typeof(StandardAnalyzer); } // create default analyzer _defaultAnalyzer = Activator.CreateInstance(defaultType) as Analyzer; if (_defaultAnalyzer == null) { throw new ArgumentException("defaultType is not an Analyzer type"); } var wrapper = new PerFieldAnalyzerWrapper(_defaultAnalyzer); if (fields != null) { foreach (var fd in fields) { if (fd.Field.Analyzer!=null) { var fieldAnalyzer = CreateAnalyzerFromType(fd.Field.Analyzer); if (fieldAnalyzer != null) { wrapper.AddAnalyzer(fd.Name, fieldAnalyzer); } } } } Analyzer = wrapper; }
public InstancePerFieldAnalyzerWrapper() { var analyzer = new Lucene.Net.Analysis.PerFieldAnalyzerWrapper(new Synonyms.SynonymAnalyzer(new Synonyms.XmlSynonymEngine())); analyzer.AddAnalyzer("cota", new Lucene.Net.Analysis.KeywordAnalyzer()); analyzer.AddAnalyzer("codigo", new Lucene.Net.Analysis.KeywordAnalyzer()); instancePerFieldAnalyzerWrapper = analyzer; }
public static TimeSpan WriteIndexWithEvent(Analyzer analyzer, IndexerSet indexer, Source source, bool create, IndexCompletedEventHandler OnIndexCompleted, IndexProgressChangedEventHandler OnProgressChanged) { try { //ChineseSegAnalysis csa = new ChineseSegAnalysis(index.BasePath, index.NamePath, index.NumberPath, index.CustomPaths); //csa.FilterFilePath = index.FilterPath; //Analyzer analyzer = csa.GetAnalyzer(); string connect = source.GetConnString(); DateTime start; if (create) { DBRamCreateIndexer dbcIndexer = new DBRamCreateIndexer(analyzer, source.DBType, connect, index.Path,index.Caption); dbcIndexer.OnIndexCompleted += OnIndexCompleted; dbcIndexer.OnProgressChanged += OnProgressChanged; start = DateTime.Now; dbcIndexer.WriteResultsWithEvent(source.Query, indexer.MaxFieldLength, indexer.RamBufferSize, indexer.MergeFactor, indexer.MaxBufferedDocs); return DateTime.Now - start; } else { DBRamIncremIndexer dbiIndexer = new DBRamIncremIndexer(analyzer, source.DBType, connect, index.Path,index.Caption); dbiIndexer.OnIndexCompleted += OnIndexCompleted; dbiIndexer.OnProgressChanged += OnProgressChanged; start = DateTime.Now; dbiIndexer.WriteResultsWithEvent(source.Query, indexer.MaxFieldLength, indexer.RamBufferSize, indexer.MergeFactor, indexer.MaxBufferedDocs); return DateTime.Now - start; } } catch (Exception e) { throw e; } }
public static UmbracoContentIndexer GetUmbracoIndexer( Lucene.Net.Store.Directory luceneDir, Analyzer analyzer = null, IDataService dataService = null) { if (dataService == null) { dataService = new TestDataService(); } if (analyzer == null) { analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29); } var indexSet = new IndexSet(); var indexCriteria = indexSet.ToIndexCriteria(dataService, UmbracoContentIndexer.IndexFieldPolicies); var i = new UmbracoContentIndexer(indexCriteria, luceneDir, //custom lucene directory dataService, analyzer, false); //i.IndexSecondsInterval = 1; i.IndexingError += IndexingError; return i; }
public void Searcher(string path) { luceneIndexDirectory = Lucene.Net.Store.FSDirectory.Open(path); writer = null; analyzer = new Lucene.Net.Analysis.SimpleAnalyzer(); parser = new QueryParser(VERSION, TEXT, analyzer); }
public void Dispose() { facetHandlers = null; directory = null; analyzer = null; selectionProperties = null; }
//method to determine the analyzer users choose to use public void AnalyzerSelection(string analyzerSelection) { switch (analyzerSelection) { case "Simple Analyzer": analyzer = new Lucene.Net.Analysis.SimpleAnalyzer(); break; case "Standard Analyzer": analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(VERSION); break; case "Snowball Analyzer": analyzer = new Lucene.Net.Analysis.Snowball.SnowballAnalyzer(VERSION, "English"); break; case "Keyword Analyzer": analyzer = new Lucene.Net.Analysis.KeywordAnalyzer(); break; case "Stop Analyzer": analyzer = new Lucene.Net.Analysis.StopAnalyzer(VERSION); break; case "Whitespace Analyzer": analyzer = new Lucene.Net.Analysis.WhitespaceAnalyzer(); break; default: analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(VERSION); break; } }
public void CreateIndex(Analyzer analayer) { FSDirectory fsDir = new SimpleFSDirectory(new DirectoryInfo(_indexerFolder)); IndexWriter indexWriter = new IndexWriter(fsDir, analayer, true, Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED); string[] files = System.IO.Directory.GetFiles(_textFilesFolder, Config.FileSearchPattern, SearchOption.AllDirectories); foreach (string file in files) { string name = new FileInfo(file).Name; string content = File.ReadAllText(file); Document doc = new Document(); doc.Add(new Field(Config.Field_Path, file, Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field(Config.Field_Name, name, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field(Config.Field_Content, content, Field.Store.NO, Field.Index.ANALYZED)); indexWriter.AddDocument(doc); Console.WriteLine("{0} - {1}", file, name); } indexWriter.Optimize(); indexWriter.Dispose(); Console.WriteLine("File count: {0}", files.Length); }
public Searcher(Analyzer an) { analyzer = an; parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, TEXT_FN, analyzer); //parser = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_30, TEXT_FN, analyzer); //newSimilarity = new NewSimilarity(); // Activity 9 }
public override IQueryNode Process(IQueryNode queryTree) { Analyzer analyzer = GetQueryConfigHandler().Get(ConfigurationKeys.ANALYZER); if (analyzer != null) { this.analyzer = analyzer; this.positionIncrementsEnabled = false; bool? positionIncrementsEnabled = GetQueryConfigHandler().Get(ConfigurationKeys.ENABLE_POSITION_INCREMENTS); var defaultOperator = GetQueryConfigHandler().Get(ConfigurationKeys.DEFAULT_OPERATOR); this.defaultOperator = defaultOperator != null ? defaultOperator.Value : Operator.OR; if (positionIncrementsEnabled != null) { this.positionIncrementsEnabled = positionIncrementsEnabled.Value; } if (this.analyzer != null) { return base.Process(queryTree); } } return queryTree; }
public LuceneSearcher(DirectoryInfo workingFolder, Analyzer analyzer) : base(analyzer) { _disposer = new DisposableSearcher(this); LuceneIndexFolder = new DirectoryInfo(Path.Combine(workingFolder.FullName, "Index")); InitializeDirectory(); }
// Creates index based on selection of analyzer public void CreateIndex(string indexPath, string name) { luceneIndexDirectory = Lucene.Net.Store.FSDirectory.Open(indexPath); IndexWriter.MaxFieldLength mfl = new IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH); if (name == "WhitespaceAnalyzer") { analyzer = new Lucene.Net.Analysis.WhitespaceAnalyzer(); } if (name == "SimpleAnalyzer") { analyzer = new Lucene.Net.Analysis.SimpleAnalyzer(); } if (name == "StandardAnalyzer") { analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(VERSION); } if (name == "StopAnalyzer") { analyzer = new Lucene.Net.Analysis.StopAnalyzer(VERSION); } else { writer = new Lucene.Net.Index.IndexWriter(luceneIndexDirectory, analyzer, true, mfl); } writer.SetSimilarity(customSimilarity); }
public LuceneSearcher(IndexWriter writer, Analyzer analyzer) : base(analyzer) { if (writer == null) throw new ArgumentNullException("writer"); _disposer = new DisposableSearcher(this); _nrtWriter = writer; }
public SearchEngine() { luceneIndexDirectory = null; // Is set in Create Index analyzer = null; // Is set in CreateAnalyser writer = null; // Is set in CreateWriter CSVdoc = new CSVDocument(); }
public void Init() { facetHandlers = new List<IFacetHandler>(); directory = new RAMDirectory(); analyzer = new WhitespaceAnalyzer(); selectionProperties = new Dictionary<string, string>(); IndexWriter writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); writer.AddDocument(Doc("prop1=val1", "prop2=val1", "prop5=val1")); writer.AddDocument(Doc("prop1=val2", "prop3=val1", "prop7=val7")); writer.AddDocument(Doc("prop1=val2", "prop3=val2", "prop3=val3")); writer.AddDocument(Doc("prop1=val1", "prop2=val1")); writer.AddDocument(Doc("prop1=val1", "prop2=val1")); writer.AddDocument(Doc("prop1=val1", "prop2=val1", "prop4=val2", "prop4=val3")); writer.Commit(); attributesFacetHandler = new AttributesFacetHandler(AttributeHandlerName, AttributeHandlerName, null, null, new Dictionary<string, string>()); facetHandlers.Add(attributesFacetHandler); IndexReader reader = IndexReader.Open(directory, true); boboReader = BoboIndexReader.GetInstance(reader, facetHandlers); attributesFacetHandler.LoadFacetData(boboReader); browser = new BoboBrowser(boboReader); }
private void CreateAnalyser() { //analyzer = new Lucene.Net.Analysis.SimpleAnalyzer(); //analyzer = new Lucene.Net.Analysis.Snowball.SnowballAnalyzer("English"); string[] StpWrds = new string[] { "a", "about", "above", "above", "across", "after", "afterwards", "again", "against", "all", "almost", "alone", "along", "already", "also", "although", "always", "am", "among", "amongst", "amoungst", "amount", "an", "and", "another", "any", "anyhow", "anyone", "anything", "anyway", "anywhere", "are", "around", "as", "at", "back", "be", "became", "because", "become", "becomes", "becoming", "been", "before", "beforehand", "behind", "being", "below", "beside", "besides", "between", "beyond", "bill", "both", "bottom", "but", "by", "call", "can", "cannot", "cant", "co", "con", "could", "couldnt", "cry", "de", "describe", "detail", "do", "done", "down", "due", "during", "each", "eg", "eight", "either", "eleven", "else", "elsewhere", "empty", "enough", "etc", "even", "ever", "every", "everyone", "everything", "everywhere", "except", "few", "fifteen", "fify", "fill", "find", "fire", "first", "five", "for", "former", "formerly", "forty", "found", "four", "from", "front", "full", "further", "get", "give", "go", "had", "has", "hasnt", "have", "he", "hence", "her", "here", "hereafter", "hereby", "herein", "hereupon", "hers", "herself", "him", "himself", "his", "how", "however", "hundred", "ie", "if", "in", "inc", "indeed", "interest", "into", "is", "it", "its", "itself", "keep", "last", "latter", "latterly", "least", "less", "ltd", "made", "many", "may", "me", "meanwhile", "might", "mill", "mine", "more", "moreover", "most", "mostly", "move", "much", "must", "my", "myself", "name", "namely", "neither", "never", "nevertheless", "next", "nine", "no", "nobody", "none", "noone", "nor", "not", "nothing", "now", "nowhere", "of", "off", "often", "on", "once", "one", "only", "onto", "or", "other", "others", "otherwise", "our", "ours", "ourselves", "out", "over", "own", "part", "per", "perhaps", "please", "put", "rather", "re", "same", "see", "seem", "seemed", "seeming", "seems", "serious", "several", "she", "should", "show", "side", "since", "sincere", "six", "sixty", "so", "some", "somehow", "someone", "something", "sometime", "sometimes", "somewhere", "still", "such", "system", "take", "ten", "than", "that", "the", "their", "them", "themselves", "then", "thence", "there", "thereafter", "thereby", "therefore", "therein", "thereupon", "these", "they", "thickv", "thin", "third", "this", "those", "though", "three", "through", "throughout", "thru", "thus", "to", "together", "too", "top", "toward", "towards", "twelve", "twenty", "two", "un", "under", "until", "up", "upon", "us", "very", "via", "was", "we", "well", "were", "what", "whatever", "when", "whence", "whenever", "where", "whereafter", "whereas", "whereby", "wherein", "whereupon", "wherever", "whether", "which", "while", "whither", "who", "whoever", "whole", "whom", "whose", "why", "will", "with", "within", "without", "would", "yet", "you", "your", "yours", "yourself", "yourselves", "the" }; analyzer = new Lucene.Net.Analysis.Snowball.SnowballAnalyzer(Version, "English", StpWrds); }
public LuceneTesterBase(LuceneDirectory directory, LuceneAnalyzer analyzer, LuceneVersion version) { Analyzer = analyzer; CurrentLuceneVersion = version; IndexDirectory = directory; Debug = false; }
public LuceneInteractive() { luceneIndexDirectory = null; writer = null; analyzer = new Lucene.Net.Analysis.SimpleAnalyzer(); parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, TEXT_FN, analyzer); }
public virtual void TestFarsiRangeFilterCollating(Analyzer analyzer, BytesRef firstBeg, BytesRef firstEnd, BytesRef secondBeg, BytesRef secondEnd) { Directory dir = NewDirectory(); IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); Document doc = new Document(); doc.Add(new TextField("content", "\u0633\u0627\u0628", Field.Store.YES)); doc.Add(new StringField("body", "body", Field.Store.YES)); writer.AddDocument(doc); writer.Dispose(); IndexReader reader = DirectoryReader.Open(dir); IndexSearcher searcher = new IndexSearcher(reader); Query query = new TermQuery(new Term("body", "body")); // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi // orders the U+0698 character before the U+0633 character, so the single // index Term below should NOT be returned by a TermRangeFilter with a Farsi // Collator (or an Arabic one for the case when Farsi searcher not // supported). ScoreDoc[] result = searcher.Search(query, new TermRangeFilter("content", firstBeg, firstEnd, true, true), 1).ScoreDocs; Assert.AreEqual(0, result.Length, "The index Term should not be included."); result = searcher.Search(query, new TermRangeFilter("content", secondBeg, secondEnd, true, true), 1).ScoreDocs; Assert.AreEqual(1, result.Length, "The index Term should be included."); reader.Dispose(); dir.Dispose(); }
public List <String> cutWord(string word, Lucene.Net.Analysis.Analyzer analysis) { List <string> result = new List <string>(); //TokenStream tokenStream = analysis.ReusableTokenStream("", new StringReader(word)); TokenStream tokenStream = analysis.TokenStream("field1", new StringReader(word)); //IndexWriterConfig iwc = new IndexWriterConfig(LuceneVersion.LUCENE_48, analyzer); bool boolHas = tokenStream.HasAttributes; ITermAttribute attrbutes;// = tokenStream.GetAttribute<ITermAttribute>(); //IEnumerable<Lucene.Net.Util.Attribute> aaa = tokenStream.GetAttributeImplsIterator(); //IEnumerable<Type> bbb = tokenStream.GetAttributeTypesIterator(); while (tokenStream.IncrementToken()) { attrbutes = tokenStream.GetAttribute <ITermAttribute>(); result.Add(attrbutes.Term.ToString()); } tokenStream.Reset(); //attrbutes. //Token token = tokenStream.; //PanGu.Segment segment = new PanGu.Segment(); tokenStream.End(); return(result); }
public void CreateSearchIndex() { directory = new RAMDirectory(); analyzer = new StandardAnalyzer(Version.LUCENE_30); var ixw = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); LookupTable = new Dictionary<string, BaseContent>(); foreach (BaseContent p in Service.PoIs.ToList()) { var document = new Document(); document.Add(new Field("id", p.Id.ToString(), Field.Store.YES, Field.Index.NO, Field.TermVector.NO)); string all = p.Name + " "; foreach (MetaInfo mi in p.EffectiveMetaInfo) { string value; if (mi.Type != MetaTypes.text || !p.Labels.TryGetValue(mi.Label, out value)) continue; document.Add(new Field(mi.Label, value, Field.Store.YES, Field.Index.ANALYZED)); all += value + " "; } document.Add(new Field("All", all, Field.Store.YES, Field.Index.ANALYZED)); LookupTable[p.Id.ToString()] = p; ixw.AddDocument(document); } ixw.Commit(); }
private void InitSearchServiceAnalyzer(Type indexingServiceSettingsType, Analyzer defaultAnalyzer, Analyzer textAnalyzer) { var perFieldAnalyzerWrapper = new PerFieldAnalyzerWrapper(defaultAnalyzer); perFieldAnalyzerWrapper.AddAnalyzer("EPISERVER_SEARCH_ID", new KeywordAnalyzer()); perFieldAnalyzerWrapper.AddAnalyzer("EPISERVER_SEARCH_CULTURE", new KeywordAnalyzer()); perFieldAnalyzerWrapper.AddAnalyzer("EPISERVER_SEARCH_REFERENCEID", new KeywordAnalyzer()); perFieldAnalyzerWrapper.AddAnalyzer("EPISERVER_SEARCH_AUTHORSTORAGE", new KeywordAnalyzer()); perFieldAnalyzerWrapper.AddAnalyzer("EPISERVER_SEARCH_CATEGORIES", new WhitespaceAnalyzer()); perFieldAnalyzerWrapper.AddAnalyzer("EPISERVER_SEARCH_ACL", new WhitespaceAnalyzer()); perFieldAnalyzerWrapper.AddAnalyzer("EPISERVER_SEARCH_VIRTUALPATH", new WhitespaceAnalyzer()); perFieldAnalyzerWrapper.AddAnalyzer("EPISERVER_SEARCH_TYPE", new WhitespaceAnalyzer()); perFieldAnalyzerWrapper.AddAnalyzer("EPISERVER_SEARCH_CREATED", new WhitespaceAnalyzer()); perFieldAnalyzerWrapper.AddAnalyzer("EPISERVER_SEARCH_MODIFIED", new WhitespaceAnalyzer()); perFieldAnalyzerWrapper.AddAnalyzer("EPISERVER_SEARCH_PUBLICATIONEND", new WhitespaceAnalyzer()); perFieldAnalyzerWrapper.AddAnalyzer("EPISERVER_SEARCH_PUBLICATIONSTART", new WhitespaceAnalyzer()); perFieldAnalyzerWrapper.AddAnalyzer("EPISERVER_SEARCH_ITEMSTATUS", new WhitespaceAnalyzer()); perFieldAnalyzerWrapper.AddAnalyzer("EPISERVER_SEARCH_TITLE", textAnalyzer); perFieldAnalyzerWrapper.AddAnalyzer("EPISERVER_SEARCH_DISPLAYTEXT", textAnalyzer); perFieldAnalyzerWrapper.AddAnalyzer("EPISERVER_SEARCH_AUTHORS", textAnalyzer); perFieldAnalyzerWrapper.AddAnalyzer("EPISERVER_SEARCH_DEFAULT", textAnalyzer); indexingServiceSettingsType .GetField("_analyzer", BindingFlags.Static | BindingFlags.NonPublic) .SetValue(null, perFieldAnalyzerWrapper); }
public void Dispose() { facetHandlers = null; if (directory.isOpen_ForNUnit) directory.Dispose(); directory = null; analyzer = null; }
public ObjectQueryTranslator(string[] defaultFieldNames, Analyzer defaultAnalyzer) { this._defaultAnalyzer = defaultAnalyzer; this._defaultFieldNames = defaultFieldNames; _root = new BooleanQuery(); }
public LuceneIndexer() { luceneIndexDirectory = null; writer = null; analyzer = null; parser = null; }
public LuceneAdvancedSearchApplication() { luceneIndexDirectory = null; writer = null; analyzer = new SimpleAnalyzer(); similarity = new Newsimilarity(); }
public LuceneApp() { luceneIndexDirectory = null; analyzer = null; writer = null; newSimilarity = new NewSimilarity(); parserFields = new string[] { DOC_TITLE, DOC_AUTHOR, DOC_BIB, DOC_BODY }; fieldWeights = new Dictionary <string, float>(); foreach (string field in parserFields) { fieldWeights.Add(field, 1); } // Init WordNet // Src: https://developer.syn.co.in/tutorial/wordnet/tutorial.html var directory = "../../../wordnetdic"; wordNetEngine = new WordNetEngine(); // data sources wordNetEngine.AddDataSource(new StreamReader(Path.Combine(directory, "data.adj")), PartOfSpeech.Adjective); wordNetEngine.AddDataSource(new StreamReader(Path.Combine(directory, "data.adv")), PartOfSpeech.Adverb); wordNetEngine.AddDataSource(new StreamReader(Path.Combine(directory, "data.noun")), PartOfSpeech.Noun); wordNetEngine.AddDataSource(new StreamReader(Path.Combine(directory, "data.verb")), PartOfSpeech.Verb); // indexes wordNetEngine.AddIndexSource(new StreamReader(Path.Combine(directory, "index.adj")), PartOfSpeech.Adjective); wordNetEngine.AddIndexSource(new StreamReader(Path.Combine(directory, "index.adv")), PartOfSpeech.Adverb); wordNetEngine.AddIndexSource(new StreamReader(Path.Combine(directory, "index.noun")), PartOfSpeech.Noun); wordNetEngine.AddIndexSource(new StreamReader(Path.Combine(directory, "index.verb")), PartOfSpeech.Verb); Console.WriteLine("Loading database..."); wordNetEngine.Load(); Console.WriteLine("Load completed."); }
/// <summary> Simple similarity query generators. /// Takes every unique word and forms a boolean query where all words are optional. /// After you get this you'll use to to query your <see cref="IndexSearcher"/> for similar docs. /// The only caveat is the first hit returned <b>should be</b> your source document - you'll /// need to then ignore that. /// /// <p/> /// /// So, if you have a code fragment like this: /// <br/> /// <code> /// Query q = formSimilaryQuery( "I use Lucene to search fast. Fast searchers are good", new StandardAnalyzer(), "contents", null); /// </code> /// /// <p/> /// /// The query returned, in string form, will be <c>'(i use lucene to search fast searchers are good')</c>. /// /// <p/> /// The philosophy behind this method is "two documents are similar if they share lots of words". /// Note that behind the scenes, Lucenes scoring algorithm will tend to give two documents a higher similarity score if the share more uncommon words. /// /// <P/> /// This method is fail-safe in that if a long 'body' is passed in and /// <see cref="BooleanQuery.Add"/> (used internally) /// throws /// <see cref="BooleanQuery.TooManyClauses"/>, the /// query as it is will be returned. /// </summary> /// <param name="body">the body of the document you want to find similar documents to /// </param> /// <param name="a">the analyzer to use to parse the body /// </param> /// <param name="field">the field you want to search on, probably something like "contents" or "body" /// </param> /// <param name="stop">optional set of stop words to ignore /// </param> /// <returns> a query with all unique words in 'body' /// </returns> /// <throws> IOException this can't happen... </throws> public static Query FormSimilarQuery(System.String body, Analyzer a, System.String field, ISet<string> stop) { TokenStream ts = a.TokenStream(field, new System.IO.StringReader(body)); ITermAttribute termAtt = ts.AddAttribute<ITermAttribute>(); BooleanQuery tmp = new BooleanQuery(); ISet<string> already = Lucene.Net.Support.Compatibility.SetFactory.CreateHashSet<string>(); // ignore dups while (ts.IncrementToken()) { String word = termAtt.Term; // ignore opt stop words if (stop != null && stop.Contains(word)) continue; // ignore dups if (already.Contains(word)) continue; already.Add(word); // add to query TermQuery tq = new TermQuery(new Term(field, word)); try { tmp.Add(tq, Occur.SHOULD); } catch (BooleanQuery.TooManyClauses) { // fail-safe, just return what we have, not the end of the world break; } } return tmp; }
public LuceneSearcher(Lucene.Net.Store.Directory luceneDirectory, Analyzer analyzer) : base(analyzer) { _disposer = new DisposableSearcher(this); LuceneIndexFolder = null; _luceneDirectory = luceneDirectory; }
public virtual QueryParser GetParser(Analyzer a) { if (a == null) a = new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true); QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, DefaultField, a); qp.DefaultOperator = (QueryParserBase.OR_OPERATOR); return qp; }
public LuceneAdvancedSearchApplication() { luceneIndexDirectory = null; writer = null; analyzer = new Lucene.Net.Analysis.Snowball.SnowballAnalyzer(VERSION, "English"); parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, TEXT_FN, analyzer); }
public override void TearDown() { base.TearDown(); dir.Dispose(); dir = null; anlzr = null; }
public static TimeSpan WriteIndex(Analyzer analyzer,IndexerSet indexer, Source source,bool create) { try { //ChineseSegAnalysis csa = new ChineseSegAnalysis(index.BasePath, index.NamePath, index.NumberPath, index.CustomPaths); //csa.FilterFilePath = index.FilterPath; //Analyzer analyzer = csa.GetAnalyzer(); string connect = source.GetConnString(); DateTime start; if (create) { DBCreateIndexer dbcIndexer = new DBCreateIndexer(analyzer, source.DBType, connect, index.Path,index.Caption); dbcIndexer.PrimaryKey = source.PrimaryKey; start = DateTime.Now; dbcIndexer.WriteResults(source.Query,indexer.MaxFieldLength,indexer.RamBufferSize, indexer.MergeFactor, indexer.MaxBufferedDocs); return DateTime.Now - start; } else { DBIncremIndexer dbiIndexer = new DBIncremIndexer(analyzer, source.DBType, connect, index.Path,index.Caption); dbiIndexer.PrimaryKey = source.PrimaryKey; start = DateTime.Now; dbiIndexer.WriteResults(source.Query, indexer.MaxFieldLength, indexer.RamBufferSize, indexer.MergeFactor, indexer.MaxBufferedDocs); return DateTime.Now - start; } } catch (Exception e) { throw e; } }
public void Dispose() { searcher.Dispose(); writer.Dispose(); directory.Dispose(); analyzer = null; }
/// <summary> /// Detects untokenized fields and sets as NotAnalyzed in analyzer /// </summary> private static string PreProcessUntokenizedTerms(PerFieldAnalyzerWrapper analyzer, string query, Analyzer keywordAnlyzer) { var untokenizedMatches = untokenizedQuery.Matches(query); if (untokenizedMatches.Count < 1) { return query; } var sb = new StringBuilder(query); // KeywordAnalyzer will not tokenize the values // process in reverse order to leverage match string indexes for (int i=untokenizedMatches.Count; i>0; i--) { Match match = untokenizedMatches[i-1]; // specify that term for this field should not be tokenized analyzer.AddAnalyzer(match.Groups[1].Value, keywordAnlyzer); Group term = match.Groups[2]; // remove enclosing "[[" "]]" from term value (again in reverse order) sb.Remove(term.Index+term.Length-2, 2); sb.Remove(term.Index, 2); } return sb.ToString(); }
protected CoreParser(string defaultField, Analyzer analyzer, QueryParser parser) { this.analyzer = analyzer; this.parser = parser; filterFactory = new FilterBuilderFactory(); filterFactory.AddBuilder("RangeFilter", new RangeFilterBuilder()); filterFactory.AddBuilder("NumericRangeFilter", new NumericRangeFilterBuilder()); queryFactory = new QueryBuilderFactory(); queryFactory.AddBuilder("TermQuery", new TermQueryBuilder()); queryFactory.AddBuilder("TermsQuery", new TermsQueryBuilder(analyzer)); queryFactory.AddBuilder("MatchAllDocsQuery", new MatchAllDocsQueryBuilder()); queryFactory.AddBuilder("BooleanQuery", new BooleanQueryBuilder(queryFactory)); queryFactory.AddBuilder("NumericRangeQuery", new NumericRangeQueryBuilder()); queryFactory.AddBuilder("DisjunctionMaxQuery", new DisjunctionMaxQueryBuilder(queryFactory)); if (parser != null) { queryFactory.AddBuilder("UserQuery", new UserInputQueryBuilder(parser)); } else { queryFactory.AddBuilder("UserQuery", new UserInputQueryBuilder(defaultField, analyzer)); } queryFactory.AddBuilder("FilteredQuery", new FilteredQueryBuilder(filterFactory, queryFactory)); queryFactory.AddBuilder("ConstantScoreQuery", new ConstantScoreQueryBuilder(filterFactory)); filterFactory.AddBuilder("CachedFilter", new CachedFilterBuilder(queryFactory, filterFactory, maxNumCachedFilters)); SpanQueryBuilderFactory sqof = new SpanQueryBuilderFactory(); SpanNearBuilder snb = new SpanNearBuilder(sqof); sqof.AddBuilder("SpanNear", snb); queryFactory.AddBuilder("SpanNear", snb); BoostingTermBuilder btb = new BoostingTermBuilder(); sqof.AddBuilder("BoostingTermQuery", btb); queryFactory.AddBuilder("BoostingTermQuery", btb); SpanTermBuilder snt = new SpanTermBuilder(); sqof.AddBuilder("SpanTerm", snt); queryFactory.AddBuilder("SpanTerm", snt); SpanOrBuilder sot = new SpanOrBuilder(sqof); sqof.AddBuilder("SpanOr", sot); queryFactory.AddBuilder("SpanOr", sot); SpanOrTermsBuilder sots = new SpanOrTermsBuilder(analyzer); sqof.AddBuilder("SpanOrTerms", sots); queryFactory.AddBuilder("SpanOrTerms", sots); SpanFirstBuilder sft = new SpanFirstBuilder(sqof); sqof.AddBuilder("SpanFirst", sft); queryFactory.AddBuilder("SpanFirst", sft); SpanNotBuilder snot = new SpanNotBuilder(sqof); sqof.AddBuilder("SpanNot", snot); queryFactory.AddBuilder("SpanNot", snot); }
public LuceneIndexer() { luceneIndexDirectory = null; writer = null; analyzer = null; parser = null; newSimilarity = new NewSimilarity(); }
private IndexWriter CreateWriterNoTry(Directory d, Analyzer a) { var indexExists = IndexExists(); logger.Debug("Creating index writer, index exists: " + indexExists); var iw = new IndexWriter(d, a, create: !indexExists, mfl: IndexWriter.MaxFieldLength.UNLIMITED); iw.WriteLockTimeout = LockTimeout; return iw; }
public LuceneSearcher(Directory index, string orderBy, string defaultField, Analyzer analyzer) { this.OrderBy = orderBy; this.Analyzer = analyzer; this.defaultField = string.IsNullOrEmpty(defaultField) ? "all" : defaultField; this.indexSearcher = new IndexSearcher(index, true); InitQueryParser(); }
public StandardQueryParser GetParser(Analyzer a) { if (a == null) a = new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true); StandardQueryParser qp = new StandardQueryParser(a); qp.DefaultOperator = (Operator.OR); return qp; }
public static UmbracoExamineSearcher GetUmbracoSearcher(Lucene.Net.Store.Directory luceneDir, Analyzer analyzer = null) { if (analyzer == null) { analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29); } return new UmbracoExamineSearcher(luceneDir, analyzer); }
public Context(Directory directory, Analyzer analyzer, Version version, IIndexWriter indexWriter, object transactionLock) { this.directory = directory; this.analyzer = analyzer; this.version = version; this.indexWriter = indexWriter; this.transactionLock = transactionLock; }
//static WordNetEngine wordNet; static Program() { luceneIndexDirectory = null; analyzer = null; writer = null; //customSimilarity = new CustomSimilarity(); items = new List <string[]>(); }
public PDFIndexer(Lucene.Net.Store.Directory luceneDirectory, IDataService dataService, Analyzer analyzer, bool async) : base( new IndexCriteria(Enumerable.Empty<IIndexField>(), Enumerable.Empty<IIndexField>(), Enumerable.Empty<string>(), Enumerable.Empty<string>(), null), luceneDirectory, dataService, analyzer, async) { SupportedExtensions = new[] { ".pdf" }; UmbracoFileProperty = "umbracoFile"; }
public SearchEngineApplication() { InitializeComponent(); searchResultList = new string[1]; analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(VERSION); //analyzer = new Lucene.Net.Analysis.Snowball.SnowballAnalyzer(VERSION); newSimilarity = new NewSimilarity(); }
/// <summary> /// Creates the index at indexPath /// </summary> /// <param name="indexPath">Directory path to create the index</param> public void CreateIndex(string indexPath) { //Done in Week 3 Practical luceneIndexDirectory = Lucene.Net.Store.FSDirectory.Open(indexPath); analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(VERSION); IndexWriter.MaxFieldLength mfl = new IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH); writer = new Lucene.Net.Index.IndexWriter(luceneIndexDirectory, analyzer, true, mfl); }
protected override LuceneIndex.IndexWriter GetIndexWriter(LuceneStore.Directory indexDirectory, LuceneAnalysis.Analyzer analyzer, bool create) { return(new LuceneIndex.IndexWriter( indexDirectory, analyzer, create, LuceneIndex.IndexWriter.MaxFieldLength.UNLIMITED)); }
public LuceneApplication() { directory = null; indexWriter = null; indexSearcher = null; queryParser = null; analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(VERSION);; }
/// class constructor public LuceneIREngine() { luceneIndexDirectory = null; writer = null; ISet <string> stopWords = StopAnalyzer.ENGLISH_STOP_WORDS_SET; analyzer = new SnowballAnalyzer(VERSION, "English", stopWords); mySimilarity = new CustomSimilarity(); }
public void CreateIndex(string indexPath) { luceneIndexDirectory = FSDirectory.Open(indexPath); analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(VERSION); IndexWriter.MaxFieldLength mfl = new IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH); IndexDeletionPolicy p; writer = new Lucene.Net.Index.IndexWriter(luceneIndexDirectory, analyzer, true, mfl); }
public LuceneAdvancedSearchApplication() { luceneIndexDirectory = null; writer = null; //standardAnalyzer will not do the stem analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(VERSION);//whitespaceAnalyzer will break at whitespace, this is the cause for the first error //analyzer = new Lucene.Net.Analysis.Snowball.SnowballAnalyzer(VERSION, "English");//the string name refer to the stemmer analyzer parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, TEXT_FN, analyzer); }
public LuceneSearcheEngine() { luceneIndexDirectory = null; writer = null; //analyzer = new Lucene.Net.Analysis.SimpleAnalyzer(); // Using simple analyzer for baseline system analyzer = new Lucene.Net.Analysis.Snowball.SnowballAnalyzer(Lucene.Net.Util.Version.LUCENE_30, "English", stopWords); //Using Standard Analyzer to apply steming and removing of stop words. parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, TEXT_FN, analyzer); newSimilarity = new NewSimilarity(); }
public void LuceneApplication() { luceneIndexDirectory = null; // Is set in Create Index analyzer = new Lucene.Net.Analysis.WhitespaceAnalyzer(); analyzer = new Lucene.Net.Analysis.SimpleAnalyzer(); analyzer = new Lucene.Net.Analysis.StopAnalyzer(VERSION); analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(VERSION); analyzer = new Lucene.Net.Analysis.Snowball.SnowballAnalyzer(VERSION, "English"); writer = null; // Is set in CreateWriter }
private void button1_Click(object sender, EventArgs e) { string strWord = textBox1.Text; string strAnalyzerName = comboBox1.SelectedItem.ToString(); Lucene.Net.Analysis.Analyzer analyzer = AnalyzerHelper.GetAnalyzerByName(strAnalyzerName); List <String> listString = cutWord(strWord, analyzer); listBox1.DataSource = listString; }
public LuceneIndexSearch() { luceneIndexDirectory = null; writer = null; // SnowballAnalyzer's second var "name" is the language of stemmer analyzer = new Lucene.Net.Analysis.SimpleAnalyzer(); //analyzer = new Lucene.Net.Analysis.Snowball.SnowballAnalyzer(Lucene.Net.Util.Version.LUCENE_30, "English"); parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, TEXT_FN_PASS_TEXT, analyzer); mySimilarity = new NewSimilarity(); }
}//contructor which is used to initialize the objects //create index public void CreateIndex(string indexPath) { luceneIndexDirectory = Lucene.Net.Store.FSDirectory.Open(indexPath); analyzerstandard = new Lucene.Net.Analysis.Standard.StandardAnalyzer(VERSION); analyzerkeyword = new Lucene.Net.Analysis.KeywordAnalyzer(); IndexWriter.MaxFieldLength mfl = new IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH); analysor = new PerFieldAnalyzerWrapper(analyzerstandard); writer = new Lucene.Net.Index.IndexWriter(luceneIndexDirectory, analysor, true, mfl); writer.SetSimilarity(customSimilarity);//for task 6 }
public MainSearchEngine() { luceneIndexDirectory = null; writer = null; //analyzer = new Lucene.Net.Analysis.SimpleAnalyzer(); // Using simple analyzer for baseline system analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(VERSION);; //Using Standard Analyzer to apply steming and removing of stop words. parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, TEXT_FN, analyzer); multiParser = new MultiFieldQueryParser(VERSION, new[] { TEXT_FN_TITLE, TEXT_FN_AUTHOR }, analyzer); newSimilarity = new NewSimilarity(); }
//Procedimiento que crea el índice Lucene private static void CrearIndice(Dictionary <string, UrlDocument> URLResult, Lucene.Net.Analysis.Analyzer analyzer) { Trace.WriteLine("Creando el índice de Lucene"); IndexWriter writer = new IndexWriter(_directory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); writer.UseCompoundFile = false; writer.Dispose(); //Indexar los documentos Trace.WriteLine("Indexando los documentos..."); indexFilesXively(URLResult, analyzer); Trace.WriteLine("'" + totalDocs + "' documentos indexados."); }