public void ClearIndex() { if (System.IO.Directory.GetFiles(this.index.Directory.FullName).Any()) { try { var analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30); using (var writer = new IndexWriter(this.index, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED)) { // remove older index entries writer.DeleteAll(); // close handles analyzer.Close(); writer.Dispose(); } ForceUnlockIndex(); } catch (Exception) { throw; } } }
public virtual void TestDemo_Renamed_Method() { Analyzer analyzer = new StandardAnalyzer(); // Store the index in memory: Directory directory = new RAMDirectory(); // To store an index on disk, use this instead (note that the // parameter true will overwrite the index in that directory // if one exists): //Directory directory = FSDirectory.getDirectory("/tmp/testindex", true); IndexWriter iwriter = new IndexWriter(directory, analyzer, true); iwriter.SetMaxFieldLength(25000); Document doc = new Document(); System.String text = "This is the text to be indexed."; doc.Add(new Field("fieldname", text, Field.Store.YES, Field.Index.TOKENIZED)); iwriter.AddDocument(doc); iwriter.Close(); // Now search the index: IndexSearcher isearcher = new IndexSearcher(directory); // Parse a simple query that searches for "text": Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser("fieldname", analyzer); Query query = parser.Parse("text"); Hits hits = isearcher.Search(query); Assert.AreEqual(1, hits.Length()); // Iterate through the results: for (int i = 0; i < hits.Length(); i++) { Document hitDoc = hits.Doc(i); Assert.AreEqual("This is the text to be indexed.", hitDoc.Get("fieldname")); } isearcher.Close(); directory.Close(); }
private static IndexWriter GetWriter() { var analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(); var writer = new Lucene.Net.Index.IndexWriter(IndexDir, analyzer); return(writer); }
public SearchResult[] Search(string searchString) { Analyzer analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Version.LUCENE_29); QueryParser parser = new QueryParser(Version.LUCENE_29, "Content", analyzer); var query = parser.Parse(searchString); Searcher searcher = new IndexSearcher(Lucene.Net.Index.IndexReader.Open(directory, true)); TopScoreDocCollector collector = TopScoreDocCollector.Create(100, true); searcher.Search(query, collector); var hits = collector.TopDocs().ScoreDocs; List<SearchResult> results = new List<SearchResult>(); for (int i = 0; i < hits.Length; i++) { int docId = hits[i].Doc; float score = hits[i].Score; Lucene.Net.Documents.Document doc = searcher.Doc(docId); results.Add(new SearchResult { BookId = Guid.Parse(doc.Get("BookId")), Score = score }); } return results.ToArray(); }
//public void Delete(string id) //{ // Analyzer _standardanalyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30); // IndexWriter _writer = new IndexWriter(_directory, _standardanalyzer, IndexWriter.MaxFieldLength.UNLIMITED); // QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "id", _standardanalyzer); // Query query = parser.Parse(id); // _writer.DeleteDocuments(query); //} public void Delete(string id) { Analyzer _standardanalyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30); IndexWriter _writer = new IndexWriter(_directory, _standardanalyzer, IndexWriter.MaxFieldLength.UNLIMITED); _writer.DeleteDocuments(new Term("id", id)); }
public void CanQueryLuceneIndexCreatedOnDisk() { CanCreateLuceneIndexOnDisk(); System.IO.DirectoryInfo di = new System.IO.DirectoryInfo(System.IO.Path.GetTempPath()); using (Lucene.Net.Store.Directory directory = Lucene.Net.Store.FSDirectory.Open(di)) { Lucene.Net.Index.IndexReader ir = Lucene.Net.Index.IndexReader.Open(directory, true); Lucene.Net.Search.Searcher searcher = new Lucene.Net.Search.IndexSearcher(ir); using (Lucene.Net.Analysis.Analyzer analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30)) { Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser(Version.LUCENE_30, "content", analyzer); Lucene.Net.Search.Query query = parser.Parse("lorem"); Lucene.Net.Search.TopScoreDocCollector collector = Lucene.Net.Search.TopScoreDocCollector.Create(100, true); searcher.Search(query, collector); Lucene.Net.Search.ScoreDoc[] docs = collector.TopDocs().ScoreDocs; foreach (Lucene.Net.Search.ScoreDoc scoreDoc in docs) { //Get the document that represents the search result. Document document = searcher.Doc(scoreDoc.Doc); var id = document.Get("Id"); var content = document.Get("content"); } } } }
private static Query GetQuery(string fieldName, string searchText, Lucene.Net.Analysis.Standard.StandardAnalyzer analyzer) { var parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, fieldName, analyzer); var query = parser.Parse(searchText); return(query); }
public void IndexFile(string filePath) { PropertyDescriptors descriptors = new PropertyDescriptors(); descriptors.LoadData(System.Windows.Forms.Application.StartupPath + "\\PropertyDescriptors.xml"); Analyzer a = new Lucene.Net.Analysis.Standard.StandardAnalyzer(); bool create = !(System.IO.Directory.Exists(_idxDir) && IndexReader.IndexExists(_idxDir)); IndexWriter iw = new IndexWriter(_idxDir, a, create); iw.SetUseCompoundFile(true); AdDataStream adStream = new AdDataStream(filePath); adStream.LoadData(); foreach (Advert ad in adStream.FetchAd()) { Document doc = new Document(); foreach (string s in ad.GetDictionary().Keys) { string temp = descriptors.GetIndexableFormat(descriptors[s], ad[s]); doc.Add(Field.Text(s, temp)); } iw.AddDocument(doc); if (_updateCallback != null) { _updateCallback("Added Document: " + ad["Title"]); } } iw.Optimize(); iw.Close(); }
public virtual void TestDemo_Renamed() { Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); // Store the index in memory: Directory directory = new RAMDirectory(); // To store an index on disk, use this instead: //Directory directory = FSDirectory.open("/tmp/testindex"); IndexWriter iwriter = new IndexWriter(directory, analyzer, true, new IndexWriter.MaxFieldLength(25000)); Document doc = new Document(); System.String text = "This is the text to be indexed."; doc.Add(new Field("fieldname", text, Field.Store.YES, Field.Index.ANALYZED)); iwriter.AddDocument(doc); iwriter.Close(); // Now search the index: IndexSearcher isearcher = new IndexSearcher(directory, true); // read-only=true // Parse a simple query that searches for "text": QueryParser parser = new QueryParser("fieldname", analyzer); Query query = parser.Parse("text"); ScoreDoc[] hits = isearcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length); // Iterate through the results: for (int i = 0; i < hits.Length; i++) { Document hitDoc = isearcher.Doc(hits[i].doc); Assert.AreEqual(hitDoc.Get("fieldname"), "This is the text to be indexed."); } isearcher.Close(); directory.Close(); }
public virtual void TestMmapIndex() { FSDirectory storeDirectory; storeDirectory = new MMapDirectory(new System.IO.FileInfo(storePathname), null); // plan to add a set of useful stopwords, consider changing some of the // interior filters. StandardAnalyzer analyzer = new StandardAnalyzer(new System.Collections.Hashtable()); // TODO: something about lock timeouts and leftover locks. IndexWriter writer = new IndexWriter(storeDirectory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); IndexSearcher searcher = new IndexSearcher(storePathname); for (int dx = 0; dx < 1000; dx++) { System.String f = RandomField(); Document doc = new Document(); doc.Add(new Field("data", f, Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); } searcher.Close(); writer.Close(); RmDir(new System.IO.FileInfo(storePathname)); }
public virtual void TestMmapIndex() { Assert.Ignore("Need to port tests, but we don't really support MMapDirectories anyway"); FSDirectory storeDirectory; storeDirectory = new MMapDirectory(new System.IO.DirectoryInfo(storePathname), null); // plan to add a set of useful stopwords, consider changing some of the // interior filters. StandardAnalyzer analyzer = new StandardAnalyzer(Util.Version.LUCENE_CURRENT, Support.Compatibility.SetFactory.CreateHashSet<string>()); // TODO: something about lock timeouts and leftover locks. IndexWriter writer = new IndexWriter(storeDirectory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); IndexSearcher searcher = new IndexSearcher(storeDirectory, true); for (int dx = 0; dx < 1000; dx++) { System.String f = RandomField(); Document doc = new Document(); doc.Add(new Field("data", f, Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); } searcher.Close(); writer.Close(); RmDir(new System.IO.FileInfo(storePathname)); }
public virtual void TestMaxTermLength2() { StandardAnalyzer sa = new StandardAnalyzer(); AssertAnalyzesTo(sa, "ab cd toolong xy z", new System.String[]{"ab", "cd", "toolong", "xy", "z"}); sa.SetMaxTokenLength(5); AssertAnalyzesTo(sa, "ab cd toolong xy z", new System.String[]{"ab", "cd", "xy", "z"}, new int[]{1, 1, 2, 1}); }
static void Main(string[] args) { var sw = Stopwatch.StartNew(); // Delete index from previous run var directoryName = "index"; if (System.IO.Directory.Exists(directoryName)) { System.IO.Directory.Delete(directoryName, true); } using (Directory directory = new MMapDirectory("index")) using (var analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(LuceneVersion.LUCENE_48)) { var config = new IndexWriterConfig(LuceneVersion.LUCENE_48, analyzer); using (var ixw = new IndexWriter(directory, config)) { IList <Document> documents = new List <Document>(2000); for (int i = 0; i < 1000; i++) { var document = new Document { new StringField("id", Guid.NewGuid().ToString(), Field.Store.YES), new StringField("notTokenized", "Will not be tokenized", Field.Store.YES), new TextField("content", "Hello world", Field.Store.YES), new Int32Field("intValue", 32, Field.Store.YES), new Int32Field("intNotStoredValue", 32, Field.Store.NO), new NumericDocValuesField("docValue", 64) }; documents.Add(document); } for (int i = 0; i < 1000; i++) { var document2 = new Document { new StringField("id", Guid.NewGuid().ToString(), Field.Store.YES), new StringField("notTokenized", "Will not be tokenized", Field.Store.YES), new TextField("content", "Hello world 2", Field.Store.YES), new Int32Field("intValue", 33, Field.Store.YES), new Int32Field("intNotStoredValue", 32, Field.Store.NO), new NumericDocValuesField("docValue", 65) }; documents.Add(document2); } ixw.AddDocuments(documents); ixw.Commit(); ixw.DeleteDocuments(NumericRangeQuery.NewInt32Range("intValue", 33, 33, true, true)); ixw.Commit(); } } Console.WriteLine(sw.ElapsedMilliseconds); Console.ReadKey(); }
private void btnFolder_Click(object sender, EventArgs e) { FolderBrowserDialog dia = new FolderBrowserDialog(); DialogResult res = dia.ShowDialog(); if (res != System.Windows.Forms.DialogResult.OK) { return; } FSDirectory dir = FSDirectory.GetDirectory(Environment.CurrentDirectory + "\\LuceneIndex"); //Lucene.Net.Store.RAMDirectory dir = new RAMDirectory(); Lucene.Net.Analysis.Standard.StandardAnalyzer an = new Lucene.Net.Analysis.Standard.StandardAnalyzer(); IndexWriter wr = new IndexWriter(dir, an,true); IStemmer stemmer = new EnglishStemmer(); DirectoryInfo diMain = new DirectoryInfo(dia.SelectedPath); foreach(FileInfo fi in diMain.GetFiles()){ Document doc = new Document(); doc.Add(new Field("title", fi.Name,Field.Store.YES, Field.Index.NO)); //doc.Add(new Field("text", File.ReadAllText(fi.FullName),Field.Store.YES, Field.Index.TOKENIZED,Field.TermVector.YES)); doc.Add(new Field("text", PerformStemming(stemmer,NLPToolkit.Tokenizer.TokenizeNow(File.ReadAllText(fi.FullName)).ToArray()), Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.YES)); wr.AddDocument(doc); } wr.Optimize(); wr.Flush(); wr.Close(); dir.Close(); IndexReader reader = IndexReader.Open(dir); for (int i = 0; i < reader.MaxDoc(); i++) { if (reader.IsDeleted(i)) continue; Document doc = reader.Document(i); String docId = doc.Get("docId"); foreach (TermFreqVector vector in reader.GetTermFreqVectors(i)) { foreach(string term in vector.GetTerms()){ Console.WriteLine(term); } } // do something with docId here... } //IndexSearcher search = new IndexSearcher(wr.GetReader()); //MoreLikeThis mlt = new MoreLikeThis(wr.GetReader()); //FileInfo fitarget = new FileInfo(@"C:\Users\peacemaker\Desktop\TestNoBitcoin\test.txt"); //Query query = mlt.Like(fitarget); //var hits = search.Search(query, int.MaxValue); //foreach (ScoreDoc doc in hits.ScoreDocs) //{ // textBox1.Text += doc.Score + Environment.NewLine; //} }
public static List <Models.SearchResult> Query(string text, int max = 8, string userId = null) { if (!string.IsNullOrEmpty(text) && !text.EndsWith(":")) { userId = string.IsNullOrEmpty(userId) ? Account.AuditId : userId; var analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29); //todo: what version? var parser = new Lucene.Net.QueryParsers.QueryParser(Lucene.Net.Util.Version.LUCENE_29, "text", analyzer); //text is just the default field to search var query = parser.Parse(text); //var term = new Term("text", text.ToLower()); //var query = new Lucene.Net.Search.PrefixQuery(term); //parser.Parse(text); //var query = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "text", analyzer).Parse(text.ToLower()); using (var dir = FSDirectory.Open(new DirectoryInfo(IndexDir))) { using (var searcher = new IndexSearcher(dir, true)) { var collector = TopScoreDocCollector.create(max * 2, true); //todo: mini-hack to accomidate the post-filtering of search results - attempt to get twice as many results as we need. hopefully we won't filter out that many... searcher.Search(query, collector); var hits = collector.TopDocs().ScoreDocs; //var hits = searcher.Search(query); var ret = new List <Models.SearchResult>(); for (var i = 0; i < hits.Length; i++) { var docId = hits[i].doc; var doc = new Models.SearchDocument(searcher.Doc(docId)); var provider = GetDocumentProvider(doc.Type); if (provider != null) { if (provider.IsAuthorized(doc, userId)) { ret.Add(provider.FormatResult(doc)); } } else { throw new Exception(string.Format("Formatter for type {0} not found", doc.Type)); } if (ret.Count >= max) //todo: mini-hack to accomidate the post-filtering of search results { break; } } return(ret); } } } return(new List <SearchResult>()); }
public void TestMemLeakage() { CloseableThreadLocalProfiler.EnableCloseableThreadLocalProfiler = true; int LoopCount = 100; Analyzer[] analyzers = new Analyzer[LoopCount]; RAMDirectory[] dirs = new RAMDirectory[LoopCount]; IndexWriter[] indexWriters = new IndexWriter[LoopCount]; System.Threading.Tasks.Parallel.For(0, LoopCount, (i) => { analyzers[i] = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Version.LUCENE_CURRENT); dirs[i] = new RAMDirectory(); indexWriters[i] = new IndexWriter(dirs[i], analyzers[i], true, IndexWriter.MaxFieldLength.UNLIMITED); }); System.Threading.Tasks.Parallel.For(0, LoopCount, (i) => { Document document = new Document(); document.Add(new Field("field", "some test", Field.Store.NO, Field.Index.ANALYZED)); indexWriters[i].AddDocument(document); }); System.Threading.Tasks.Parallel.For(0, LoopCount, (i) => { analyzers[i].Dispose(); indexWriters[i].Dispose(); }); System.Threading.Tasks.Parallel.For(0, LoopCount, (i) => { IndexSearcher searcher = new IndexSearcher(dirs[i]); TopDocs d = searcher.Search(new TermQuery(new Term("field", "test")), 10); searcher.Close(); }); System.Threading.Tasks.Parallel.For(0, LoopCount, (i) => dirs[i].Dispose()); GC.Collect(GC.MaxGeneration); GC.WaitForPendingFinalizers(); int aliveObjects = 0; foreach (WeakReference w in CloseableThreadLocalProfiler.Instances) { object o = w.Target; if (o != null) aliveObjects++; } CloseableThreadLocalProfiler.EnableCloseableThreadLocalProfiler = false; Assert.AreEqual(0, aliveObjects); }
/// <summary> /// Save to Index /// </summary> /// <param name="item"></param> /// <param name="indexPath"></param> public static bool SaveToIndex(SiteSearchItem item, string indexPath) { bool isSuceess = false; try { //Delete First DeleteIndex(item, indexPath); if (item.IsDeleted) { return(true); } //Add to index var indexDirectory = FSDirectory.Open(new DirectoryInfo(indexPath)); var analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30); var writer = new Lucene.Net.Index.IndexWriter(indexDirectory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED); var doc = new Document(); doc.Add(new Field("ID", item.ID, Field.Store.YES, Field.Index.NOT_ANALYZED)); //Set Boost in title var titleField = new Field("Title", item.Title, Field.Store.YES, Field.Index.ANALYZED); titleField.Boost = 1.3f; doc.Add(titleField); //Set Boost in Description var descriptionField = new Field("Description", item.Description, Field.Store.YES, Field.Index.ANALYZED); descriptionField.Boost = 1.1f; doc.Add(descriptionField); //Default boost 1f in Document Description doc.Add(new Field("DocumentDescription", item.DocumentDescription, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("URL", item.URL, Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc); analyzer.Close(); writer.Dispose(); isSuceess = true; } catch (Exception ex) { ErrorLog.WriteLog("SiteSearchService", "SaveToIndex", ex, string.Empty); } return(isSuceess); }
public void Code() { Analyzer _keywordanalyzer = new KeywordAnalyzer(); Analyzer _simpleanalyzer = new Lucene.Net.Analysis.SimpleAnalyzer(); Analyzer _stopanalyzer = new Lucene.Net.Analysis.StopAnalyzer(Lucene.Net.Util.Version.LUCENE_30); Analyzer _whitespaceanalyzer = new Lucene.Net.Analysis.WhitespaceAnalyzer(); Analyzer _standardanalyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30); var _perfieldanalyzer = new Lucene.Net.Analysis.PerFieldAnalyzerWrapper(_standardanalyzer); _perfieldanalyzer.AddAnalyzer("firstname", _keywordanalyzer); _perfieldanalyzer.AddAnalyzer("lastname", _keywordanalyzer); IndexWriter _writer = new IndexWriter(_directory, _perfieldanalyzer, IndexWriter.MaxFieldLength.UNLIMITED); IndexReader _reader = _writer.GetReader(); IndexSearcher _searcher = new IndexSearcher(_reader); //QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "title", _standardanalyzer); string[] fields = new[] { "text", "title", "author" }; var boosts = new Dictionary <string, float>(); boosts.Add("text", 2.0f); boosts.Add("title", 1.5f); QueryParser parser = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_30, fields, _standardanalyzer, boosts); Query query = parser.Parse("lucene is great"); TopDocs hits = _searcher.Search(query, 1000); IEnumerable <Document> docs = hits.ScoreDocs.Select(hit => _searcher.Doc(hit.Doc)); var books = docs.Select(doc => new Book() { Text = doc.Get("text"), Title = doc.Get("title"), Author = doc.Get("author"), Length = Int32.Parse(doc.Get("length")) }); _writer.Optimize(); _writer.Commit(); _writer.DeleteAll(); }
private void StartLuceneIndexCreateProcess() { string luceneIndexStoragePath = @ConfigurationManager.AppSettings["LuceneIndexStoragePath"]; bool folderExists = System.IO.Directory.Exists(luceneIndexStoragePath); if (!folderExists) { System.IO.Directory.CreateDirectory(luceneIndexStoragePath); } analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30); Lucene.Net.Store.Directory directory = Lucene.Net.Store.FSDirectory.Open(new System.IO.DirectoryInfo(luceneIndexStoragePath)); writer = new Lucene.Net.Index.IndexWriter(directory, analyzer, true, Lucene.Net.Index.IndexWriter.MaxFieldLength.LIMITED); try { // We will populate below list to create Lucene index. List <string> actorsList = new List <string>(); actorsList.Add("Johnny Depp"); actorsList.Add("Robert Downey Jr."); actorsList.Add("Johnny Depp"); actorsList.Add("Tom Cruise"); actorsList.Add("Brad Pitt"); actorsList.Add("Tom Hanks"); actorsList.Add("Denzel Washington"); actorsList.Add("Russell Crowe"); actorsList.Add("Kate Winslet"); actorsList.Add("Christian Bale"); actorsList.Add("Hugh Jackman"); actorsList.Add("Will Smith"); actorsList.Add("Sean Connery"); foreach (var item in actorsList) { Console.WriteLine(item); writer.AddDocument(CreateDocument(item.ToString())); } } catch { Lucene.Net.Index.IndexWriter.Unlock(directory); throw; } finally { writer.Optimize(); analyzer.Close(); writer.Dispose(); analyzer.Dispose(); } }
public void CanCreateLuceneIndexOnDisk() { System.IO.DirectoryInfo di = new System.IO.DirectoryInfo(System.IO.Path.Combine(System.IO.Path.GetTempPath(), "lucene_index")); using (Lucene.Net.Store.Directory directory = Lucene.Net.Store.FSDirectory.Open(di)) using (Lucene.Net.Analysis.Analyzer analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30)) { string test2 = "Lorem Ipsum è un testo segnaposto ....."; using (Lucene.Net.Index.IndexWriter ixw = new Lucene.Net.Index.IndexWriter(directory, analyzer, true, new IndexWriter.MaxFieldLength(4096))) { Document document = new Document(); document.Add(new Field("Id","<a title = \"test\" href = \"http://www.codewrecks.com/blog/index.php/2007/09/03/test/\"> test </a >.", Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.NO)); document.Add(new Field("content", "test", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); ixw.AddDocument(document); document = new Document(); document.Add(new Field("Id", test2.GetHashCode().ToString(CultureInfo.InvariantCulture), Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.NO)); document.Add(new Field("content", test2, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); ixw.AddDocument(document); ixw.Commit(); } } }
public virtual void TestMmapIndex() { FSDirectory storeDirectory; storeDirectory = FSDirectory.GetDirectory(storePathname); // plan to add a set of useful stopwords, consider changing some of the // interior filters. StandardAnalyzer analyzer = new StandardAnalyzer(new System.Collections.Hashtable()); // TODO: something about lock timeouts and leftover locks. IndexWriter writer = new IndexWriter(storeDirectory, analyzer, true); IndexSearcher searcher = new IndexSearcher(storePathname); for (int dx = 0; dx < 1000; dx++) { System.String f = RandomField(); Document doc = new Document(); doc.Add(new Field("data", f, Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc); } searcher.Close(); writer.Close(); RmDir(new System.IO.FileInfo(storePathname)); }
public virtual void TestUnRewrittenQuery() { //test to show how rewritten query can still be used searcher = new IndexSearcher(ramDir); Analyzer analyzer = new StandardAnalyzer(); QueryParser parser = new QueryParser(FIELD_NAME, analyzer); Query query = parser.Parse("JF? or Kenned*"); System.Console.Out.WriteLine("Searching with primitive query"); //forget to set this and... //query=query.rewrite(reader); Hits hits = searcher.Search(query); //create an instance of the highlighter with the tags used to surround highlighted text // QueryHighlightExtractor highlighter = new QueryHighlightExtractor(this, query, new StandardAnalyzer()); Highlighter highlighter = new Highlighter(this, new QueryScorer(query)); highlighter.SetTextFragmenter(new SimpleFragmenter(40)); int maxNumFragmentsRequired = 3; for (int i = 0; i < hits.Length(); i++) { System.String text = hits.Doc(i).Get(FIELD_NAME); TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new System.IO.StringReader(text)); System.String highlightedText = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired, "..."); System.Console.Out.WriteLine(highlightedText); } //We expect to have zero highlights if the query is multi-terms and is not rewritten! Assert.IsTrue(numHighlights == 0, "Failed to find correct number of highlights " + numHighlights + " found"); }
private long ExecuteSearch(IDbConnection Conn) { List<string> resultadosDaPesquisa = new List<string>(); long countResults = 0; try { // TODO: Considerar retirar a dependência com Lucene.Net para fazer a validação dos campos: nem todos são validados.... NivelDocumentalSearch ndSearch = new NivelDocumentalSearch(); Lucene.Net.Analysis.Analyzer analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30); QueryParser qp = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, string.Empty, analyzer); qp.AllowLeadingWildcard = true; StringBuilder errorMessage = new StringBuilder(); if (Helper.IsValidTxtID(qp, MasterPanelPesquisa.txtID.Text)) ndSearch.Id = MasterPanelPesquisa.txtID.Text; else errorMessage.AppendLine("Identificador: " + MasterPanelPesquisa.txtID.Text); ndSearch.TextoLivre = Helper.AddFieldToSearch(qp, "Texto Livre", MasterPanelPesquisa.txtPesquisaSimples.Text, ref errorMessage); if (MasterPanelPesquisa.cbModulo.Items.Count == 1) ndSearch.Modulo = 1; else ndSearch.Modulo = MasterPanelPesquisa.cbModulo.SelectedIndex; ndSearch.CodigoParcial = Helper.AddFieldToSearch(qp, "Código Parcial", MasterPanelPesquisa.txtCodigoParcial.Text, ref errorMessage); ndSearch.Designacao = Helper.AddFieldToSearch(qp, "Designação", MasterPanelPesquisa.txtDesignacao.Text, ref errorMessage); ndSearch.Autor = Helper.AddFieldToSearch(qp, "Autor", MasterPanelPesquisa.txtAutor.Text, ref errorMessage); ndSearch.EntidadeProdutora = Helper.AddFieldToSearch(qp, "Entidade Produtora", MasterPanelPesquisa.txtEntidadeProdutora.Text, ref errorMessage); if (MasterPanelPesquisa.lstNiveisDocumentais.SelectedItems.Count != MasterPanelPesquisa.lstNiveisDocumentais.Items.Count) { List<string> str = new List<string>(); foreach (DataRowView item in MasterPanelPesquisa.lstNiveisDocumentais.SelectedItems) str.Add(item.Row["Designacao"].ToString().ToLower()); ndSearch.NiveisDocumentais = str.ToArray(); ndSearch.NiveisDocumentaisOP = 0; } if (MasterPanelPesquisa.cdbDataInicio.Checked) ndSearch.DataProducaoInicio = MasterPanelPesquisa.cdbDataInicio.GetStandardMaskDate.ToString("yyyyMMdd"); if (MasterPanelPesquisa.cdbDataFim.Checked) ndSearch.DataProducaoFim = MasterPanelPesquisa.cdbDataFim.GetStandardMaskDate.ToString("yyyyMMdd"); if (MasterPanelPesquisa.cdbInicioDoFim.Checked) ndSearch.DataProducaoInicioDoFim = MasterPanelPesquisa.cdbInicioDoFim.GetStandardMaskDate.ToString("yyyyMMdd"); if (MasterPanelPesquisa.cdbFimDoFim.Checked) ndSearch.DataProducaoFimDoFim = MasterPanelPesquisa.cdbFimDoFim.GetStandardMaskDate.ToString("yyyyMMdd"); ndSearch.TipologiaInformacional = Helper.AddFieldToSearch(qp, "Tipologia Informacional", MasterPanelPesquisa.txtTipologiaInformacional.Text, ref errorMessage); ndSearch.TermosIndexacao = Helper.AddFieldToSearch(qp, "Indexação", MasterPanelPesquisa.txtIndexacao.Text, ref errorMessage); ndSearch.ConteudoInformacional = Helper.AddFieldToSearch(qp, "Conteúdo Informacional", MasterPanelPesquisa.txtConteudoInformacional.Text, ref errorMessage); ndSearch.Notas = Helper.AddFieldToSearch(qp, "Notas", MasterPanelPesquisa.txtNotas.Text, ref errorMessage); ndSearch.Cota = Helper.AddFieldToSearch(qp, "Cota", Helper.EscapeSpecialCharactersCotaDocumento(MasterPanelPesquisa.txtCota.Text.ToLower()), ref errorMessage); ndSearch.Agrupador = Helper.AddFieldToSearch(qp, "Agrupador", MasterPanelPesquisa.txtAgrupador.Text, ref errorMessage); ndSearch.SoComODs = string.Empty; ndSearch.SoComODsPub = string.Empty; ndSearch.SoComODsNaoPub = string.Empty; switch (MasterPanelPesquisa.cbODs.SelectedIndex) { case 1: ndSearch.SoComODs = Helper.AddFieldToSearch(qp, "objetos", "sim", ref errorMessage); break; case 2: ndSearch.SoComODsPub = Helper.AddFieldToSearch(qp, "objetosPublicados", "sim", ref errorMessage); break; case 3: ndSearch.SoComODsNaoPub = Helper.AddFieldToSearch(qp, "objetosNaoPublicados", "sim", ref errorMessage); break; } if (MasterPanelPesquisa.chkFormaSuporte.Checked) { StringBuilder str = new StringBuilder(); foreach (DataRowView item in MasterPanelPesquisa.lstFormaSuporte.SelectedItems) { str.Append(item.Row["Designacao"].ToString()); str.Append(" "); } ndSearch.SuporteEAcondicionamento = BreakStrings(str.ToString().ToLower()); ndSearch.SuporteEAcondicionamentoOP = MasterPanelPesquisa.cbFormaSuporte.SelectedIndex; } if (MasterPanelPesquisa.chkMaterialSuporte.Checked) { StringBuilder str = new StringBuilder(); foreach (DataRowView item in MasterPanelPesquisa.lstMaterialSuporte.SelectedItems) { str.Append(item.Row["Designacao"].ToString()); str.Append(" "); } ndSearch.MaterialDeSuporte = BreakStrings(str.ToString().ToLower()); ndSearch.MaterialDeSuporteOP = MasterPanelPesquisa.cbMaterialSuporte.SelectedIndex; } if (MasterPanelPesquisa.chkTecnicaRegisto.Checked) { StringBuilder str = new StringBuilder(); foreach (DataRowView item in MasterPanelPesquisa.lstTecnicaRegisto.SelectedItems) { str.Append(item.Row["Designacao"].ToString()); str.Append(" "); } ndSearch.TecnicaRegisto = BreakStrings(str.ToString().ToLower()); ndSearch.TecnicaRegistoOP = MasterPanelPesquisa.cbTecnicaRegisto.SelectedIndex; } if (MasterPanelPesquisa.chkEstadoConservacao.Checked) { StringBuilder str = new StringBuilder(); foreach (DataRowView item in MasterPanelPesquisa.lstEstadoConservacao.SelectedItems) { str.Append(item.Row["Designacao"].ToString()); str.Append(" "); } ndSearch.EstadoConservacao = BreakStrings(str.ToString().ToLower()); ndSearch.EstadoConservacaoOP = 0; } #region Licencas de obra if (MasterPanelPesquisa.get_Nome_LicencaObraRequerentes().Length > 0) ndSearch.Nome_LicencaObraRequerentes = MasterPanelPesquisa.get_Nome_LicencaObraRequerentes(); if (MasterPanelPesquisa.get_LocalizacaoObra_Actual().Length > 0) ndSearch.LocalizacaoObra_Actual = MasterPanelPesquisa.get_LocalizacaoObra_Actual(); if (MasterPanelPesquisa.get_NumPolicia_Actual().Length > 0) ndSearch.NumPolicia_Actual = MasterPanelPesquisa.get_NumPolicia_Actual(); if (MasterPanelPesquisa.get_LocalizacaoObra_Antiga().Length > 0) ndSearch.LocalizacaoObra_Antiga = MasterPanelPesquisa.get_LocalizacaoObra_Antiga(); if (MasterPanelPesquisa.get_NumPolicia_Antigo().Length > 0) ndSearch.NumPolicia_Antigo = MasterPanelPesquisa.get_NumPolicia_Antigo(); if (MasterPanelPesquisa.get_TipoObra().Length > 0) ndSearch.LicencaObra_TipoObra = MasterPanelPesquisa.get_TipoObra(); if (MasterPanelPesquisa.get_TecnicoObra().Length > 0) ndSearch.Termo_LicencaObraTecnicoObra = MasterPanelPesquisa.get_TecnicoObra(); if (MasterPanelPesquisa.get_CodigosAtestadoHabitabilidade().Length > 0) ndSearch.CodigosAtestadoHabitabilidade = MasterPanelPesquisa.get_CodigosAtestadoHabitabilidade(); if (MasterPanelPesquisa.get_Datas_LicencaObraDataLicencaConstrucao_Inicio().Length > 0) ndSearch.Datas_LicencaObraDataLicencaConstrucao_Inicio = MasterPanelPesquisa.get_Datas_LicencaObraDataLicencaConstrucao_Inicio(); if (MasterPanelPesquisa.get_Datas_LicencaObraDataLicencaConstrucao_Fim().Length > 0) ndSearch.Datas_LicencaObraDataLicencaConstrucao_Fim = MasterPanelPesquisa.get_Datas_LicencaObraDataLicencaConstrucao_Fim(); if (MasterPanelPesquisa.get_PH_checked()) ndSearch.LicencaObra_PHSimNao = MasterPanelPesquisa.get_PH_checked(); #endregion if (errorMessage.Length > 0) { MessageBox.Show("O(s) campo(s) seguinte(s) tem(êm) valor(es) incorrecto(s): " + System.Environment.NewLine + errorMessage.ToString()); return 0; } // impedir efectuar uma pesquisa no servidor de pesquisa quando, na pesquisa avançada, não existe nenhum critério definido excepto um nivel a partir da estrutura if (!ndSearch.IsCriteriaEmpty() || !MasterPanelPesquisa.chkEstruturaArquivistica.Checked) resultadosDaPesquisa.AddRange(SearchImpl.search(ndSearch.ToString(), "nivelDocumental", SessionHelper.GetGisaPrincipal().TrusteeUserOperator.ID.ToString())); else { resultadosDaPesquisa = null; countResults = -1; } } catch (Exception) { MessageBox.Show("Erro na conexão com o servidor de pesquisa", "Gisa", MessageBoxButtons.OK, MessageBoxIcon.Warning); } PesquisaList1.SearchServerIDs = resultadosDaPesquisa; PesquisaList1.UserID = SessionHelper.GetGisaPrincipal().TrusteeUserOperator.ID; PesquisaList1.SoDocExpirados = MasterPanelPesquisa.chkApenasDataElimExp.Checked; PesquisaList1.NewSearch = true; if (MasterPanelPesquisa.chkEstruturaArquivistica.Checked && MasterPanelPesquisa.cnList.SelectedNivelRow != null) PesquisaList1.IDNivelEstrutura = MasterPanelPesquisa.cnList.SelectedNivelRow.ID; else { countResults = resultadosDaPesquisa.Count; PesquisaList1.IDNivelEstrutura = null; } PesquisaList1.Focus(); return countResults; }
public virtual void TestBasic() { Directory dir = new MockRAMDirectory(); Analyzer analyzer = new StandardAnalyzer(Util.Version.LUCENE_CURRENT); IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); writer.MergeFactor = 2; writer.SetMaxBufferedDocs(2); writer.SetSimilarity(new SimpleSimilarity()); System.Text.StringBuilder sb = new System.Text.StringBuilder(265); System.String term = "term"; for (int i = 0; i < 30; i++) { Document d = new Document(); sb.Append(term).Append(" "); System.String content = sb.ToString(); Field noTf = new Field("noTf", content + (i % 2 == 0?"":" notf"), Field.Store.NO, Field.Index.ANALYZED); noTf.OmitTermFreqAndPositions = true; d.Add(noTf); Field tf = new Field("tf", content + (i % 2 == 0?" tf":""), Field.Store.NO, Field.Index.ANALYZED); d.Add(tf); writer.AddDocument(d); //System.out.println(d); } writer.Optimize(); // flush writer.Close(); _TestUtil.CheckIndex(dir); /* * Verify the index */ Searcher searcher = new IndexSearcher(dir, true); searcher.Similarity = new SimpleSimilarity(); Term a = new Term("noTf", term); Term b = new Term("tf", term); Term c = new Term("noTf", "notf"); Term d2 = new Term("tf", "tf"); TermQuery q1 = new TermQuery(a); TermQuery q2 = new TermQuery(b); TermQuery q3 = new TermQuery(c); TermQuery q4 = new TermQuery(d2); searcher.Search(q1, new AnonymousClassCountingHitCollector(this)); //System.out.println(CountingHitCollector.getCount()); searcher.Search(q2, new AnonymousClassCountingHitCollector1(this)); //System.out.println(CountingHitCollector.getCount()); searcher.Search(q3, new AnonymousClassCountingHitCollector2(this)); //System.out.println(CountingHitCollector.getCount()); searcher.Search(q4, new AnonymousClassCountingHitCollector3(this)); //System.out.println(CountingHitCollector.getCount()); BooleanQuery bq = new BooleanQuery(); bq.Add(q1, Occur.MUST); bq.Add(q4, Occur.MUST); searcher.Search(bq, new AnonymousClassCountingHitCollector4(this)); Assert.IsTrue(15 == CountingHitCollector.GetCount()); searcher.Close(); dir.Close(); }
private void Search() { try { SearchProgressBar.Maximum = 11; ProgressLabel.Text = "Progress: Initialize Search ..."; Searcher searcher = new IndexSearcher(@"Canon\index"); Analyzer analyzer = new StandardAnalyzer(); ArrayList resultList = new ArrayList(); System.IO.StreamReader in_Renamed = new System.IO.StreamReader(new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.Default).CurrentEncoding); String line = QueryInputBox.Text; if (line.Length == - 1) return; ProgressLabel.Text = "Progress: Parsing Query ..."; Query query = QueryParser.Parse(line, "contents", analyzer); //int[] ix = qtm.GetTermFrequencies(); Hits hits = searcher.Search(query); SearchProgressBar.Increment(1); ProgressLabel.Text = "Progress: Searched. Analyzing results ..."; //QueryHighlightExtractor highlighter = new QueryHighlightExtractor(query, new WhitespaceAnalyzer(), "<B>", "</B>"); Highlighter highlighter = new Highlighter(new QueryScorer(query)); highlighter.SetTextFragmenter(new SimpleFragmenter(80)); int maxNumFragmentsRequired = 1; //int HITS_PER_PAGE = 10; for (int i = 0; i < 10; i++) { SearchProgressBar.Increment(1); ProgressLabel.Text = "Progress: Analyzing hit " + (i+1).ToString(); // get the document from index Document doc = hits.Doc(i); //SegmentReader ir = new SegmentReader(); //Lucene.Net.Index.TermFreqVector tfv = //tfv.GetTermFrequencies string score = hits.Score(i).ToString(); //Box += "Hit no. " + i + " scored: " + score + " occ: " + /*highlighter.tokenFrequency */ " best fragment: \n"; ResultSet a = new ResultSet(); a.BookName = doc.Get("path").Replace(@"c:\cscd\temp\",""); a.Score = hits.Score(i); a.numberOfHits = hits.Length(); // get the document filename // we can't get the text from the index //because we didn't store it there //so get it from archive string path = doc.Get("path"); string name = GetInternalName(path); PaliReaderUtils.AalekhDecoder.UnzipFromZipLibrary(name); path = System.IO.Directory.GetCurrentDirectory() + @"\Work\" + name + ".htm"; string plainText = ""; //load text from zip archive temporarily using (StreamReader sr = new StreamReader(path, System.Text.Encoding.Default)) { plainText = parseHtml(sr.ReadToEnd()); } //-------------------------------Highlighter Code 1.4 TokenStream tokenStream = analyzer.TokenStream(new StringReader(plainText)); a.textFragment = highlighter.GetBestFragments(tokenStream, plainText, maxNumFragmentsRequired, "..."); if(File.Exists(path)) File.Delete(path); //------------------------------- resultList.Add(a); } SearchProgressBar.Value = 0; searcher.Close(); ssr = new ShowSearchResults(/*Box*/resultList); //this.Hide(); ssr.OpenBookEvent += new ShowSearchResults.OpenBook(this.TriggerOpenBook); ssr.Closing += new System.ComponentModel.CancelEventHandler(this.Closing_ResultWindow); this.Hide(); ssr.ShowDialog(); } catch (System.Exception e) { MessageBox.Show(" caught a " + e.GetType() + "\n with message: " + e.Message); } }
private void InitBlock() { analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); }
public virtual void TestNoPrxFile() { Directory ram = new MockRAMDirectory(); Analyzer analyzer = new StandardAnalyzer(Util.Version.LUCENE_CURRENT); IndexWriter writer = new IndexWriter(ram, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMaxBufferedDocs(3); writer.MergeFactor = 2; writer.UseCompoundFile = false; Document d = new Document(); Field f1 = new Field("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED); f1.OmitTermFreqAndPositions = true; d.Add(f1); for (int i = 0; i < 30; i++) writer.AddDocument(d); writer.Commit(); AssertNoPrx(ram); // force merge writer.Optimize(); // flush writer.Close(); AssertNoPrx(ram); _TestUtil.CheckIndex(ram); ram.Close(); }
public virtual void TestStopWordSearching() { Analyzer analyzer = new StandardAnalyzer(Util.Version.LUCENE_CURRENT); Directory ramDir = new RAMDirectory(); var iw = new IndexWriter(ramDir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); var doc = new Document(); doc.Add(new Field("body", "blah the footest blah", Field.Store.NO, Field.Index.ANALYZED)); iw.AddDocument(doc); iw.Close(); var mfqp = new MultiFieldQueryParser(Util.Version.LUCENE_CURRENT, new[] { "body" }, analyzer); mfqp.DefaultOperator = QueryParser.Operator.AND; var q = mfqp.Parse("the footest"); var is_Renamed = new IndexSearcher(ramDir, true); var hits = is_Renamed.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); is_Renamed.Close(); }
public virtual void TestDomainNames() { // Don't reuse a because we alter its state // (setReplaceInvalidAcronym) // Current lucene should not show the bug StandardAnalyzer a2 = new StandardAnalyzer(Version.LUCENE_CURRENT); // domain names AssertAnalyzesTo(a2, "www.nutch.org", new System.String[]{"www.nutch.org"}); //Notice the trailing . See https://issues.apache.org/jira/browse/LUCENE-1068. // the following should be recognized as HOST: AssertAnalyzesTo(a2, "www.nutch.org.", new System.String[]{"www.nutch.org"}, new System.String[]{"<HOST>"}); // 2.3 should show the bug a2 = new StandardAnalyzer(Version.LUCENE_23); AssertAnalyzesTo(a2, "www.nutch.org.", new System.String[]{"wwwnutchorg"}, new System.String[]{"<ACRONYM>"}); // 2.4 should not show the bug a2 = new StandardAnalyzer(Version.LUCENE_24); AssertAnalyzesTo(a2, "www.nutch.org.", new System.String[]{"www.nutch.org"}, new System.String[]{"<HOST>"}); }
public static void Main(System.String[] args) { System.String usage = "Usage: " + typeof(SearchFiles) + " [-index dir] [-field f] [-repeat n] [-queries file] [-raw] [-norms field]"; if (args.Length > 0 && ("-h".Equals(args[0]) || "-help".Equals(args[0]))) { System.Console.Out.WriteLine(usage); System.Environment.Exit(0); } System.String index = "index"; System.String field = "contents"; System.String queries = null; int repeat = 0; bool raw = false; System.String normsField = null; for (int i = 0; i < args.Length; i++) { if ("-index".Equals(args[i])) { index = args[i + 1]; i++; } else if ("-field".Equals(args[i])) { field = args[i + 1]; i++; } else if ("-queries".Equals(args[i])) { queries = args[i + 1]; i++; } else if ("-repeat".Equals(args[i])) { repeat = System.Int32.Parse(args[i + 1]); i++; } else if ("-raw".Equals(args[i])) { raw = true; } else if ("-norms".Equals(args[i])) { normsField = args[i + 1]; i++; } } IndexReader reader = IndexReader.Open(index); if (normsField != null) reader = new OneNormsReader(reader, normsField); Searcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(); System.IO.StreamReader in_Renamed = null; if (queries != null) { in_Renamed = new System.IO.StreamReader(new System.IO.StreamReader(queries, System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(queries, System.Text.Encoding.Default).CurrentEncoding); } else { in_Renamed = new System.IO.StreamReader(new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.GetEncoding("UTF-8")).BaseStream, new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.GetEncoding("UTF-8")).CurrentEncoding); } QueryParser parser = new QueryParser(field, analyzer); while (true) { if (queries == null) // prompt the user System.Console.Out.Write("Query: "); System.String line = in_Renamed.ReadLine(); if (line == null || line.Length == 0) break; Query query = parser.Parse(line); System.Console.Out.WriteLine("Searching for: " + query.ToString(field)); Hits hits = searcher.Search(query); if (repeat > 0) { // repeat & time as benchmark System.DateTime start = System.DateTime.Now; for (int i = 0; i < repeat; i++) { hits = searcher.Search(query); } System.DateTime end = System.DateTime.Now; System.Console.Out.WriteLine("Time: " + (end.Millisecond - start.Millisecond) + "ms"); } System.Console.Out.WriteLine(hits.Length() + " total matching documents"); int HITS_PER_PAGE = 10; for (int start = 0; start < hits.Length(); start += HITS_PER_PAGE) { int end = System.Math.Min(hits.Length(), start + HITS_PER_PAGE); for (int i = start; i < end; i++) { if (raw) { // output raw format System.Console.Out.WriteLine("doc=" + hits.Id(i) + " score=" + hits.Score(i)); continue; } Document doc = hits.Doc(i); System.String path = doc.Get("path"); if (path != null) { System.Console.Out.WriteLine((i + 1) + ". " + path); System.String title = doc.Get("title"); if (title != null) { System.Console.Out.WriteLine(" Title: " + doc.Get("title")); } } else { System.Console.Out.WriteLine((i + 1) + ". " + "No path for this document"); } } if (queries != null) // non-interactive break; if (hits.Length() > end) { System.Console.Out.Write("more (y/n) ? "); line = in_Renamed.ReadLine(); if (line.Length == 0 || line[0] == 'n') break; } } } reader.Close(); }
public override TokenStream TokenStream(string field, TextReader reader) { TokenStream stdStream = new Lucene.Net.Analysis.Standard.StandardAnalyzer().TokenStream(reader); return new StopFilter(new DiacriticFilter(new HamzaFilter(stdStream)), _stopWords); }
public void Test_LUCENE_3042_LUCENENET_433() { String testString = "t"; Analyzer analyzer = new StandardAnalyzer(_TestUtil.CurrentVersion); TokenStream stream = analyzer.ReusableTokenStream("dummy", new System.IO.StringReader(testString)); stream.Reset(); while (stream.IncrementToken()) { // consume } stream.End(); stream.Close(); AssertAnalyzesToReuse(analyzer, testString, new String[] { "t" }); }
public static City FindCity(CityToken cityToken) { var analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30); var queryParser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, CityFieldNames.Name, analyzer); var searcher = new IndexSearcher(FSDirectory.Open(ApplicationSettings.CityIndexDirectory), true); var sort = new Sort(new[] { new SortField(CityFieldNames.Population, SortField.LONG, true), SortField.FIELD_SCORE }); var possibleCityDetails = cityToken.GetPossibleCityDetails(); foreach (var possibleCityDetail in possibleCityDetails) { var topScoreDocCollector = TopFieldCollector.Create(sort, 5, true, false, false, false); var countryCode = string.Empty; if (!string.IsNullOrEmpty(possibleCityDetail.CountryName)) { countryCode = CountryCodes.LookupCountryCode(possibleCityDetail.CountryName); if (string.IsNullOrEmpty(countryCode)) continue; } var queryText = GetQueryText(possibleCityDetail.CityName, countryCode, possibleCityDetail.AdministrativeDivisionName); var query = queryParser.Parse(queryText); searcher.Search(query, topScoreDocCollector); var results = topScoreDocCollector.TopDocs().ScoreDocs; if (topScoreDocCollector.TotalHits > 0) { var cities = results.Select(x => new City(searcher.Doc(x.Doc))).ToList(); // if the name being searched for matches a country return it foreach (var city in cities) { var countryNameMatches = String.Compare(possibleCityDetail.CityName, city.CountryName, CultureInfo.InvariantCulture, CompareOptions.IgnoreNonSpace | CompareOptions.IgnoreCase) == 0; if (countryNameMatches) return city; } // if the name matches then return it first foreach (var city in cities) { var cityNameMatches = String.Compare(possibleCityDetail.CityName, city.Name, CultureInfo.InvariantCulture, CompareOptions.IgnoreNonSpace | CompareOptions.IgnoreCase) == 0; var asciiNameMatches = String.Compare(possibleCityDetail.CityName, city.AsciiName, CultureInfo.InvariantCulture, CompareOptions.IgnoreNonSpace | CompareOptions.IgnoreCase) == 0; var countryNameMatches = String.IsNullOrEmpty(possibleCityDetail.CountryName) || String.Compare(possibleCityDetail.CountryName, city.CountryName, CultureInfo.InvariantCulture, CompareOptions.IgnoreNonSpace | CompareOptions.IgnoreCase) == 0; if ((cityNameMatches || asciiNameMatches) && countryNameMatches) return city; } // if there were no direct city name or country name matches then just return the first result (which by default is sorted by population) var firstDocId = results[0].Doc; var firstDocument = searcher.Doc(firstDocId); return new City(firstDocument); } } return null; }
public virtual void TestLucene1140() { try { StandardAnalyzer analyzer = new StandardAnalyzer(true); AssertAnalyzesTo(analyzer, "www.nutch.org.", new System.String[]{"www.nutch.org"}, new System.String[]{"<HOST>"}); } catch (System.NullReferenceException e) { Assert.IsTrue(false, "Should not throw an NPE and it did"); } }
public virtual void TestParsingQueryWithoutBoosts() { var analyzer = new StandardAnalyzer(Util.Version.LUCENE_CURRENT); var fields = new[] {"f1", "f2"}; var boosts = new Dictionary<String, Single> { {"f1", 2} // missing f2 intentional }; var parser = new MultiFieldQueryParser(Util.Version.LUCENE_CURRENT, fields, analyzer, boosts); var query = parser.Parse("bazinga"); Assert.AreEqual("f1:bazinga^2.0 f2:bazinga", query.ToString()); }
public virtual void TestGiga() { StandardAnalyzer analyzer = new StandardAnalyzer(Util.Version.LUCENE_CURRENT); Directory index = new MockRAMDirectory(); IndexWriter w = new IndexWriter(index, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); AddDoc("Lucene in Action", w); AddDoc("Lucene for Dummies", w); // addDoc("Giga", w); AddDoc("Giga byte", w); AddDoc("ManagingGigabytesManagingGigabyte", w); AddDoc("ManagingGigabytesManagingGigabytes", w); AddDoc("The Art of Computer Science", w); AddDoc("J. K. Rowling", w); AddDoc("JK Rowling", w); AddDoc("Joanne K Roling", w); AddDoc("Bruce Willis", w); AddDoc("Willis bruce", w); AddDoc("Brute willis", w); AddDoc("B. willis", w); IndexReader r = w.GetReader(); w.Close(); Query q = new QueryParser(Util.Version.LUCENE_CURRENT, "field", analyzer).Parse("giga~0.9"); // 3. search IndexSearcher searcher = new IndexSearcher(r); ScoreDoc[] hits = searcher.Search(q, 10).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), "Giga byte"); r.Close(); }
public virtual void TestWickedLongTerm() { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true); char[] chars = new char[16383]; for (int index = 0; index < chars.Length; index++) chars.SetValue('x', index); Document doc = new Document(); System.String bigTerm = new System.String(chars); // Max length term is 16383, so this contents produces // a too-long term: System.String contents = "abc xyz x" + bigTerm + " another term"; doc.Add(new Field("content", contents, Field.Store.NO, Field.Index.TOKENIZED)); writer.AddDocument(doc); // Make sure we can add another normal document doc = new Document(); doc.Add(new Field("content", "abc bbb ccc", Field.Store.NO, Field.Index.TOKENIZED)); writer.AddDocument(doc); writer.Close(); IndexReader reader = IndexReader.Open(dir); // Make sure all terms < max size were indexed Assert.AreEqual(2, reader.DocFreq(new Term("content", "abc"))); Assert.AreEqual(1, reader.DocFreq(new Term("content", "bbb"))); Assert.AreEqual(1, reader.DocFreq(new Term("content", "term"))); Assert.AreEqual(1, reader.DocFreq(new Term("content", "another"))); // Make sure position is still incremented when // massive term is skipped: TermPositions tps = reader.TermPositions(new Term("content", "another")); Assert.IsTrue(tps.Next()); Assert.AreEqual(1, tps.Freq()); Assert.AreEqual(3, tps.NextPosition()); // Make sure the doc that has the massive term is in // the index: Assert.AreEqual(2, reader.NumDocs(), "document with wicked long term should is not in the index!"); reader.Close(); // Make sure we can add a document with exactly the // maximum length term, and search on that term: doc = new Document(); doc.Add(new Field("content", bigTerm, Field.Store.NO, Field.Index.TOKENIZED)); StandardAnalyzer sa = new StandardAnalyzer(); sa.SetMaxTokenLength(100000); writer = new IndexWriter(dir, sa); writer.AddDocument(doc); writer.Close(); reader = IndexReader.Open(dir); Assert.AreEqual(1, reader.DocFreq(new Term("content", bigTerm))); reader.Close(); dir.Close(); }
public static void Main(System.String[] args) { try { Searcher searcher = new IndexSearcher(@"index"); Analyzer analyzer = new StandardAnalyzer(); System.IO.StreamReader in_Renamed = new System.IO.StreamReader(new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.Default).CurrentEncoding); while (true) { System.Console.Out.Write("Query: "); System.String line = in_Renamed.ReadLine(); if (line.Length == - 1) break; Query query = QueryParser.Parse(line, "contents", analyzer); System.Console.Out.WriteLine("Searching for: " + query.ToString("contents")); Hits hits = searcher.Search(query); System.Console.Out.WriteLine(hits.Length() + " total matching documents"); int HITS_PER_PAGE = 10; for (int start = 0; start < hits.Length(); start += HITS_PER_PAGE) { int end = System.Math.Min(hits.Length(), start + HITS_PER_PAGE); for (int i = start; i < end; i++) { Document doc = hits.Doc(i); System.String path = doc.Get("path"); if (path != null) { System.Console.Out.WriteLine(i + ". " + path); } else { System.String url = doc.Get("url"); if (url != null) { System.Console.Out.WriteLine(i + ". " + url); System.Console.Out.WriteLine(" - " + doc.Get("title")); } else { System.Console.Out.WriteLine(i + ". " + "No path nor URL for this document"); } } } if (hits.Length() > end) { System.Console.Out.Write("more (y/n) ? "); line = in_Renamed.ReadLine(); if (line.Length == 0 || line[0] == 'n') break; } } } searcher.Close(); } catch (System.Exception e) { System.Console.Out.WriteLine(" caught a " + e.GetType() + "\n with message: " + e.Message); } }
public StandardAnalyzer() { Analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(SearchSettings.Instance.LuceneVersion); }