public void SetUp() { var writer = new IndexWriter(store, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); var doc = new Document(); doc.Add(new Field("aaa", "foo", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("aaa", "foo", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("contents", "Tom", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("contents", "Jerry", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("zzz", "bar", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Optimize(); writer.Close(); }
public void CreateIndex(Analyzer analayer) { FSDirectory fsDir = new SimpleFSDirectory(new DirectoryInfo(_indexerFolder)); IndexWriter indexWriter = new IndexWriter(fsDir, analayer, true, Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED); string[] files = System.IO.Directory.GetFiles(_textFilesFolder, Config.FileSearchPattern, SearchOption.AllDirectories); foreach (string file in files) { string name = new FileInfo(file).Name; string content = File.ReadAllText(file); Document doc = new Document(); doc.Add(new Field(Config.Field_Path, file, Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field(Config.Field_Name, name, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field(Config.Field_Content, content, Field.Store.NO, Field.Index.ANALYZED)); indexWriter.AddDocument(doc); Console.WriteLine("{0} - {1}", file, name); } indexWriter.Optimize(); indexWriter.Dispose(); Console.WriteLine("File count: {0}", files.Length); }
protected LuceneWork(object id, string idInString, System.Type entityClass, Document document) { this.id = id; this.idInString = idInString; this.entityClass = entityClass; this.document = document; }
public override void SetUp() { base.SetUp(); Dir = NewDirectory(); FieldName = Random().NextBoolean() ? "field" : ""; // sometimes use an empty string as field name RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.KEYWORD, false)).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000))); Document doc = new Document(); Field field = NewStringField(FieldName, "", Field.Store.NO); doc.Add(field); List<string> terms = new List<string>(); int num = AtLeast(200); for (int i = 0; i < num; i++) { string s = TestUtil.RandomUnicodeString(Random()); field.StringValue = s; terms.Add(s); writer.AddDocument(doc); } if (VERBOSE) { // utf16 order terms.Sort(); Console.WriteLine("UTF16 order:"); foreach (string s in terms) { Console.WriteLine(" " + UnicodeUtil.ToHexString(s)); } } Reader = writer.Reader; Searcher1 = NewSearcher(Reader); Searcher2 = NewSearcher(Reader); writer.Dispose(); }
public void TestReadersWriters() { Directory dir; using(dir = new RAMDirectory()) { Document doc; IndexWriter writer; IndexReader reader; using (writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED)) { Field field = new Field("name", "value", Field.Store.YES,Field.Index.ANALYZED); doc = new Document(); doc.Add(field); writer.AddDocument(doc); writer.Commit(); using (reader = writer.GetReader()) { IndexReader r1 = reader.Reopen(); } Assert.Throws<AlreadyClosedException>(() => reader.Reopen(), "IndexReader shouldn't be open here"); } Assert.Throws<AlreadyClosedException>(() => writer.AddDocument(doc), "IndexWriter shouldn't be open here"); Assert.IsTrue(dir.isOpen_ForNUnit, "RAMDirectory"); } Assert.IsFalse(dir.isOpen_ForNUnit, "RAMDirectory"); }
private static Directory MakeIndex() { Directory dir = new RAMDirectory(); try { System.Random r = new System.Random((System.Int32) (BASE_SEED + 42)); Analyzer analyzer = new SimpleAnalyzer(); IndexWriter writer = new IndexWriter(dir, analyzer, true); writer.SetUseCompoundFile(false); for (int d = 1; d <= NUM_DOCS; d++) { Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); for (int f = 1; f <= NUM_FIELDS; f++) { doc.Add(new Field("f" + f, data[f % data.Length] + '#' + data[r.Next(data.Length)], Field.Store.YES, Field.Index.TOKENIZED)); } writer.AddDocument(doc); } writer.Close(); } catch (System.Exception e) { throw new System.SystemException("", e); } return dir; }
public void TestMax() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); doc.Add(new SortedSetDocValuesField("value", new BytesRef("foo"))); doc.Add(new SortedSetDocValuesField("value", new BytesRef("bar"))); doc.Add(NewStringField("id", "1", Field.Store.YES)); writer.AddDocument(doc); doc = new Document(); doc.Add(new SortedSetDocValuesField("value", new BytesRef("baz"))); doc.Add(NewStringField("id", "2", Field.Store.YES)); writer.AddDocument(doc); IndexReader ir = writer.Reader; writer.Dispose(); // slow wrapper does not support random access ordinals (there is no need for that!) IndexSearcher searcher = NewSearcher(ir, false); Sort sort = new Sort(new SortedSetSortField("value", false, Selector.MAX)); TopDocs td = searcher.Search(new MatchAllDocsQuery(), 10, sort); assertEquals(2, td.TotalHits); // 'baz' comes before 'foo' assertEquals("2", searcher.Doc(td.ScoreDocs[0].Doc).Get("id")); assertEquals("1", searcher.Doc(td.ScoreDocs[1].Doc).Get("id")); assertNoFieldCaches(); ir.Dispose(); dir.Dispose(); }
public void testMissingTerms() { String fieldName = "field1"; Directory rd = new RAMDirectory(); var w = new IndexWriter(rd, new KeywordAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); for (int i = 0; i < 100; i++) { var doc = new Document(); int term = i*10; //terms are units of 10; doc.Add(new Field(fieldName, "" + term, Field.Store.YES, Field.Index.ANALYZED)); w.AddDocument(doc); } IndexReader reader = w.GetReader(); w.Close(); TermsFilter tf = new TermsFilter(); tf.AddTerm(new Term(fieldName, "19")); FixedBitSet bits = (FixedBitSet) tf.GetDocIdSet(reader); Assert.AreEqual(0, bits.Cardinality(), "Must match nothing"); tf.AddTerm(new Term(fieldName, "20")); bits = (FixedBitSet) tf.GetDocIdSet(reader); Assert.AreEqual(1, bits.Cardinality(), "Must match 1"); tf.AddTerm(new Term(fieldName, "10")); bits = (FixedBitSet) tf.GetDocIdSet(reader); Assert.AreEqual(2, bits.Cardinality(), "Must match 2"); tf.AddTerm(new Term(fieldName, "00")); bits = (FixedBitSet) tf.GetDocIdSet(reader); Assert.AreEqual(2, bits.Cardinality(), "Must match 2"); reader.Close(); rd.Close(); }
public static Document readTXT(string path) { Document doc = new Document(); doc.Add(new Field("Path", path, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("Content" , readText(path) , Field.Store.YES, Field.Index.ANALYZED)) ; return doc; }
/// <summary> /// /// </summary> /// <param name="p"></param> /// <param name="writer"></param> private static void AddDocumentToIndex(Product p, IndexWriter writer) { Document doc = new Document(); doc.Add(new Field("Name", p.Name, Field.Store.YES, Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.YES ) ); doc.Add(new Field("Origin", p.Origin.ToString(), Field.Store.YES, Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.YES ) ); doc.Add(new Field("Price", p.Price.ToString(), Field.Store.YES, Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.YES ) ); writer.AddDocument(doc); }
// ... has multiple qualifications private Document MakeQualification(string qualification, int year) { Document job = new Document(); job.Add(NewStringField("qualification", qualification, Field.Store.YES)); job.Add(new IntField("year", year, Field.Store.NO)); return job; }
public virtual void TestPrefixQuery_Mem() { Directory directory = NewDirectory(); string[] categories = new string[] { "/Computers", "/Computers/Mac", "/Computers/Windows" }; RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, Similarity, TimeZone); for (int i = 0; i < categories.Length; i++) { Document doc = new Document(); doc.Add(NewStringField("category", categories[i], Field.Store.YES)); writer.AddDocument(doc); } IndexReader reader = writer.Reader; PrefixQuery query = new PrefixQuery(new Term("category", "/Computers")); IndexSearcher searcher = NewSearcher(reader); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length, "All documents in /Computers category and below"); query = new PrefixQuery(new Term("category", "/Computers/Mac")); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length, "One in /Computers/Mac"); query = new PrefixQuery(new Term("category", "")); Terms terms = MultiFields.GetTerms(searcher.IndexReader, "category"); Assert.IsFalse(query.GetTermsEnum(terms) is PrefixTermsEnum); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length, "everything"); writer.Dispose(); reader.Dispose(); directory.Dispose(); }
public Document BuildRecord() { var doc = new Document(); var numericField = new NumericField("DatabaseID", Field.Store.YES, false); numericField.SetIntValue(Email.ID); doc.Add(numericField); var field = new Field("UniqueID", UniqueID, Field.Store.YES, Field.Index.NOT_ANALYZED); doc.Add(field); field = new Field("Title", Title, Field.Store.YES, Field.Index.NOT_ANALYZED); doc.Add(field); field = new Field("Description", Description, Field.Store.YES, Field.Index.NOT_ANALYZED); doc.Add(field); field = new Field("Type", Type, Field.Store.YES, Field.Index.ANALYZED); doc.Add(field); /* field = new Field("Name", EventDescription.Name, Field.Store.YES, Field.Index.ANALYZED); doc.Add(field);*/ return doc; }
//END //this method creates document from an ObjectToIndex public void BuildIndex(FileToIndex file) { using (var analyzer = new Lucene.Net.Analysis.Ru.RussianAnalyzer(Version.LUCENE_30)) { using (IndexWriter idxw = new IndexWriter(_directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED)) { //check if document exists, if true deletes existing var searchQuery = new TermQuery(new Term("Id", file.Id.ToString())); idxw.DeleteDocuments(searchQuery); //creation Document doc = new Document(); doc.Add(new Field("Id", file.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));//аналайзер разбивает строки на слова doc.Add(new Field("Title", file.Title, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("Description", file.Description, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("Authors", file.Authors, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("Text", file.Text, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("Hashtags", file.Hashtags, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("Discipline", file.Discipline, Field.Store.YES, Field.Index.ANALYZED)); //write the document to the index idxw.AddDocument(doc); //optimize and close the writer idxw.Commit(); idxw.Optimize(); } } }
private Document GetLuceneDocument(RuntimeContentModel content) { var d = new Document(); d.Add(new Field("Url", content.Url, Field.Store.YES, Field.Index.NOT_ANALYZED)); d.Add(new Field("Name", content.Name, Field.Store.YES, Field.Index.ANALYZED)); d.Add(new Field("CreateDate", content.CreateDate.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); d.Add(new Field("UpdateDate", content.UpdateDate.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); d.Add(new Field("Type", content.Type, Field.Store.YES, Field.Index.NOT_ANALYZED)); d.Add(new Field("CreatorName", content.CreatorName, Field.Store.YES, Field.Index.NOT_ANALYZED)); d.Add(new Field("WriterName", content.WriterName, Field.Store.YES, Field.Index.NOT_ANALYZED)); d.Add(new Field("RelativeUrl", content.RelativeUrl, Field.Store.YES, Field.Index.NOT_ANALYZED)); d.Add(new Field("Template", content.Template, Field.Store.YES, Field.Index.NOT_ANALYZED)); d.Add(new Field("SortOrder", content.SortOrder.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); d.Add(new Field("Level", content.Level.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); foreach (var property in content.Content) { var value = property.Value.ToString(); value = StripHtml(value); d.Add(new Field(property.Key, value, Field.Store.YES, Field.Index.ANALYZED)); } return d; }
public void SetUp() { IndexWriter writer = new IndexWriter(store, new WhitespaceAnalyzer(), true); Document doc; doc = new Document(); doc.Add(new Field("aaa", "foo", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("aaa", "foo", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("contents", "Tom", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("contents", "Jerry", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("zzz", "bar", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Optimize(); writer.Close(); }
protected override void AddSpecialFields(Document document, Item item) { Assert.ArgumentNotNull(document, "document"); Assert.ArgumentNotNull(item, "item"); document.Add(this.CreateTextField(BuiltinFields.Name, item.Name)); document.Add(this.CreateDataField(BuiltinFields.Name, item.Name)); this.DetectRemovalFilterAndProcess(document, item, "DisplayName", BuiltinFields.Name, (itm) => item.Appearance.DisplayName); this.DetectRemovalFilterValueField(document, item, "Icon", BuiltinFields.Icon, itm => itm.Appearance.Icon); this.DetectRemovalFilterAndProcess(document, item, "Creator", BuiltinFields.Creator, itm => itm.Statistics.CreatedBy); this.DetectRemovalFilterAndProcess(document, item, "Editor", BuiltinFields.Editor, itm => itm.Statistics.UpdatedBy); this.DetectRemovalFilterAndProcess(document, item, "AllTemplates", BuiltinFields.AllTemplates, this.GetAllTemplates); this.DetectRemovalFilterAndProcess(document, item, "TemplateName", BuiltinFields.TemplateName, itm => itm.TemplateName); if (this.DetectRemoval("Hidden")) { if (this.IsHidden(item)) { this.DetectRemovalFilterValueField(document, item, "Hidden", BuiltinFields.Hidden, itm => "1"); } } this.DetectRemovalFilterValueField(document, item, "Created", BuiltinFields.Created, itm => item[FieldIDs.Created]); this.DetectRemovalFilterValueField(document, item, "Updated", BuiltinFields.Updated, itm => item[FieldIDs.Updated]); this.DetectRemovalFilterAndProcess(document, item, "Path", BuiltinFields.Path, this.GetItemPath); this.DetectRemovalFilterAndProcess(document, item, "Links", BuiltinFields.Links, this.GetItemLinks); var tags = this.Tags; if (tags.Length > 0) { document.Add(this.CreateTextField(BuiltinFields.Tags, tags)); document.Add(this.CreateDataField(BuiltinFields.Tags, tags)); } }
/// <summary> /// Set up a new index in RAM with three test phrases and the supplied Analyzer. /// </summary> /// <exception cref="Exception"> if an error occurs with index writer or searcher </exception> public override void SetUp() { base.SetUp(); analyzer = new ShingleAnalyzerWrapper(new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false), 2); directory = NewDirectory(); IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); Document doc; doc = new Document(); doc.Add(new TextField("content", "please divide this sentence into shingles", Field.Store.YES)); writer.AddDocument(doc); doc = new Document(); doc.Add(new TextField("content", "just another test sentence", Field.Store.YES)); writer.AddDocument(doc); doc = new Document(); doc.Add(new TextField("content", "a sentence which contains no test", Field.Store.YES)); writer.AddDocument(doc); writer.Dispose(); reader = DirectoryReader.Open(directory); searcher = NewSearcher(reader); }
public void CreateSearchIndex() { directory = new RAMDirectory(); analyzer = new StandardAnalyzer(Version.LUCENE_30); var ixw = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); LookupTable = new Dictionary<string, BaseContent>(); foreach (BaseContent p in Service.PoIs.ToList()) { var document = new Document(); document.Add(new Field("id", p.Id.ToString(), Field.Store.YES, Field.Index.NO, Field.TermVector.NO)); string all = p.Name + " "; foreach (MetaInfo mi in p.EffectiveMetaInfo) { string value; if (mi.Type != MetaTypes.text || !p.Labels.TryGetValue(mi.Label, out value)) continue; document.Add(new Field(mi.Label, value, Field.Store.YES, Field.Index.ANALYZED)); all += value + " "; } document.Add(new Field("All", all, Field.Store.YES, Field.Index.ANALYZED)); LookupTable[p.Id.ToString()] = p; ixw.AddDocument(document); } ixw.Commit(); }
private Directory MakeIndex() { Directory dir = new RAMDirectory(); try { System.Random r = NewRandom(); Analyzer analyzer = new SimpleAnalyzer(); IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); writer.SetUseCompoundFile(false); for (int d = 1; d <= NUM_DOCS; d++) { Document doc = new Document(); for (int f = 1; f <= NUM_FIELDS; f++) { doc.Add(new Field("f" + f, data[f % data.Length] + '#' + data[r.Next(data.Length)], Field.Store.YES, Field.Index.ANALYZED)); } writer.AddDocument(doc); } writer.Close(); } catch (System.Exception e) { throw new System.SystemException("", e); } return dir; }
public void SetUp() { doc = new Document(); Elapsed = TimeSpan.MinValue; OptionalElapsed = null; }
private static void AddNumericPublishedDate(Document document, DateTime publishedDate) { var publishedDateString = publishedDate.ToString("yyyyMMddhhmmss"); var publishedDateNumeric = new NumericField(PublishedDateNumericField, Field.Store.YES, true); publishedDateNumeric.SetLongValue(long.Parse(publishedDateString)); document.Add(publishedDateNumeric); }
/// <summary> /// 创建索引文档 /// </summary> /// <param name="dic"></param> public void AddLuceneIndex(Dictionary<string, string> dic) { //var analyzer = new StandardAnalyzer(Version.LUCENE_30); var analyzer = GetAnalyzer(); using (var directory = GetLuceneDirectory()) using (var writer = new IndexWriter(directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED)) { var doc = new Document(); foreach (KeyValuePair<string, string> pair in dic) { // add new index entry //Field.Store.YES:表示是否存储原值。 //只有当Field.Store.YES在后面才能用doc.Get("number")取出值来 //Field.Index. NOT_ANALYZED:不进行分词保存 //todo:boost if (NotAnalyzeFields.Exists(one => one == pair.Key)) { doc.Add(new Field(pair.Key, pair.Value, Field.Store.YES, Field.Index.NOT_ANALYZED)); } else { doc.Add(new Field(pair.Key, pair.Value, Field.Store.YES, Field.Index.ANALYZED)); } } //doc.Boost writer.AddDocument(doc); writer.Commit(); writer.Optimize(); analyzer.Close(); } }
public static IndexWriter AddDocument(this IndexWriter indexWriter, params Action<Document>[] documentActions ) { Document document = new Document(); document.Setup(documentActions); indexWriter.AddDocument(document); return indexWriter; }
public virtual void TestSimpleSkip() { Directory dir = new CountingRAMDirectory(this, new RAMDirectory()); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()).SetCodec(TestUtil.AlwaysPostingsFormat(new Lucene41PostingsFormat())).SetMergePolicy(NewLogMergePolicy())); Term term = new Term("test", "a"); for (int i = 0; i < 5000; i++) { Document d1 = new Document(); d1.Add(NewTextField(term.Field(), term.Text(), Field.Store.NO)); writer.AddDocument(d1); } writer.Commit(); writer.ForceMerge(1); writer.Dispose(); AtomicReader reader = GetOnlySegmentReader(DirectoryReader.Open(dir)); for (int i = 0; i < 2; i++) { Counter = 0; DocsAndPositionsEnum tp = reader.TermPositionsEnum(term); CheckSkipTo(tp, 14, 185); // no skips CheckSkipTo(tp, 17, 190); // one skip on level 0 CheckSkipTo(tp, 287, 200); // one skip on level 1, two on level 0 // this test would fail if we had only one skip level, // because than more bytes would be read from the freqStream CheckSkipTo(tp, 4800, 250); // one skip on level 2 } }
private static void IndexIndicator(IndicatorMetadata indicatorMetadata, IEnumerable<IndicatorMetadataTextProperty> properties, IndexWriter writer) { Document doc = new Document(); doc.Add(new Field("id", indicatorMetadata.IndicatorId.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); var text = indicatorMetadata.Descriptive; StringBuilder sb = new StringBuilder(); foreach (var indicatorMetadataTextProperty in properties) { var key = indicatorMetadataTextProperty.ColumnName; if (text.ContainsKey(key)) { sb.Append(text[key]); sb.Append(" "); } } doc.Add(new Field("IndicatorText", sb.ToString().ToLower(), Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(doc); }
public override void SetUp() { base.SetUp(); Dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000))); Document doc = new Document(); FieldType customType = new FieldType(TextField.TYPE_STORED); customType.OmitNorms = true; Field field = NewField("field", "", customType); doc.Add(field); NumberFormatInfo df = new NumberFormatInfo(); df.NumberDecimalDigits = 0; //NumberFormat df = new DecimalFormat("000", new DecimalFormatSymbols(Locale.ROOT)); for (int i = 0; i < 1000; i++) { field.StringValue = i.ToString(df); writer.AddDocument(doc); } Reader = writer.Reader; writer.Dispose(); Searcher = NewSearcher(Reader); }
public ExDocument(Document dc, float s) { if (dc == null) throw new ArgumentNullException("dc", "should input a true doc for ExDocument."); doc=dc; score =s; }
public void BeforeClass() { Dir = NewDirectory(); Sdir1 = NewDirectory(); Sdir2 = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, new MockAnalyzer(Random()), Similarity, TimeZone); RandomIndexWriter swriter1 = new RandomIndexWriter(Random(), Sdir1, new MockAnalyzer(Random()), Similarity, TimeZone); RandomIndexWriter swriter2 = new RandomIndexWriter(Random(), Sdir2, new MockAnalyzer(Random()), Similarity, TimeZone); for (int i = 0; i < 10; i++) { Document doc = new Document(); doc.Add(NewStringField("data", Convert.ToString(i), Field.Store.NO)); writer.AddDocument(doc); ((i % 2 == 0) ? swriter1 : swriter2).AddDocument(doc); } writer.ForceMerge(1); swriter1.ForceMerge(1); swriter2.ForceMerge(1); writer.Dispose(); swriter1.Dispose(); swriter2.Dispose(); Reader = DirectoryReader.Open(Dir); Searcher = NewSearcher(Reader); MultiReader = new MultiReader(new IndexReader[] { DirectoryReader.Open(Sdir1), DirectoryReader.Open(Sdir2) }, true); MultiSearcher = NewSearcher(MultiReader); MultiReaderDupls = new MultiReader(new IndexReader[] { DirectoryReader.Open(Sdir1), DirectoryReader.Open(Dir) }, true); MultiSearcherDupls = NewSearcher(MultiReaderDupls); }
private static void AddTextToIndex(int txts, string text, IndexWriter writer) { Document doc = new Document(); doc.Add(new Field("id", txts.ToString(), Field.Store.YES, Field.Index.UN_TOKENIZED)); doc.Add(new Field("postBody", text, Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc); }
private void CheckHits(ScoreDoc[] hits, int expectedCount, IndexSearcher searcher) { Assert.AreEqual(expectedCount, hits.Length, "total results"); for (int i = 0; i < hits.Length; i++) { if (i < 10 || (i > 94 && i < 105)) { Documents.Document d = searcher.Doc(hits[i].Doc); Assert.AreEqual(Convert.ToString(i), d.Get(ID_FIELD), "check " + i); } } }
private void PrintHits(TextWriter @out, ScoreDoc[] hits, IndexSearcher searcher) { @out.WriteLine(hits.Length + " total results\n"); for (int i = 0; i < hits.Length; i++) { if (i < 10 || (i > 94 && i < 105)) { Documents.Document d = searcher.Doc(hits[i].Doc); @out.WriteLine(i + " " + d.Get(ID_FIELD)); } } }
public virtual void TestNegativeQueryBoost() { Query q = new TermQuery(new Term("foo", "bar")); q.Boost = -42f; Assert.AreEqual(-42f, q.Boost, 0.0f); Store.Directory directory = NewDirectory(); try { Analyzer analyzer = new MockAnalyzer(Random); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); IndexWriter writer = new IndexWriter(directory, conf); try { Documents.Document d = new Documents.Document(); d.Add(NewTextField("foo", "bar", Field.Store.YES)); writer.AddDocument(d); } finally { writer.Dispose(); } IndexReader reader = DirectoryReader.Open(directory); try { IndexSearcher searcher = NewSearcher(reader); ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.IsTrue(hits[0].Score < 0, "score is not negative: " + hits[0].Score); Explanation explain = searcher.Explain(q, hits[0].Doc); Assert.AreEqual(hits[0].Score, explain.Value, 0.001f, "score doesn't match explanation"); Assert.IsTrue(explain.IsMatch, "explain doesn't think doc is a match"); } finally { reader.Dispose(); } } finally { directory.Dispose(); } }
private void DoTestSearch(Random random, StringWriter @out, bool useCompoundFile) { Store.Directory directory = NewDirectory(); Analyzer analyzer = new MockAnalyzer(random); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); MergePolicy mp = conf.MergePolicy; mp.NoCFSRatio = useCompoundFile ? 1.0 : 0.0; IndexWriter writer = new IndexWriter(directory, conf); string[] docs = new string[] { "a b c d e", "a b c d e a b c d e", "a b c d e f g h i j", "a c e", "e c a", "a c e a c e", "a c e a b c" }; for (int j = 0; j < docs.Length; j++) { Documents.Document d = new Documents.Document(); d.Add(NewTextField("contents", docs[j], Field.Store.YES)); d.Add(NewStringField("id", "" + j, Field.Store.NO)); writer.AddDocument(d); } writer.Dispose(); IndexReader reader = DirectoryReader.Open(directory); IndexSearcher searcher = NewSearcher(reader); ScoreDoc[] hits = null; Sort sort = new Sort(SortField.FIELD_SCORE, new SortField("id", SortFieldType.INT32)); foreach (Query query in BuildQueries()) { @out.WriteLine("Query: " + query.ToString("contents")); if (Verbose) { Console.WriteLine("TEST: query=" + query); } hits = searcher.Search(query, null, 1000, sort).ScoreDocs; @out.WriteLine(hits.Length + " total results"); for (int i = 0; i < hits.Length && i < 10; i++) { Documents.Document d = searcher.Doc(hits[i].Doc); @out.WriteLine(i + " " + hits[i].Score + " " + d.Get("contents")); } } reader.Dispose(); directory.Dispose(); }
public virtual void TestDemo() { Analyzer analyzer = new MockAnalyzer(Random()); // Store the index in memory: using (Directory directory = NewDirectory()) { string longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm"; string text = "this is the text to be indexed. " + longTerm; // To store an index on disk, use this instead: // Directory directory = FSDirectory.open(new File("/tmp/testindex")); using (RandomIndexWriter iwriter = new RandomIndexWriter(Random(), directory, NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, analyzer))) { Documents.Document doc = new Documents.Document(); doc.Add(NewTextField("fieldname", text, Field.Store.YES)); iwriter.AddDocument(doc); } // Now search the index: using (IndexReader ireader = DirectoryReader.Open(directory)) // read-only=true { IndexSearcher isearcher = NewSearcher(ireader); Assert.AreEqual(1, isearcher.Search(new TermQuery(new Term("fieldname", longTerm)), 1).TotalHits); Query query = new TermQuery(new Term("fieldname", "text")); TopDocs hits = isearcher.Search(query, null, 1); Assert.AreEqual(1, hits.TotalHits); // Iterate through the results: for (int i = 0; i < hits.ScoreDocs.Length; i++) { Documents.Document hitDoc = isearcher.Doc(hits.ScoreDocs[i].Doc); Assert.AreEqual(text, hitDoc.Get("fieldname")); } // Test simple phrase query PhraseQuery phraseQuery = new PhraseQuery(); phraseQuery.Add(new Term("fieldname", "to")); phraseQuery.Add(new Term("fieldname", "be")); Assert.AreEqual(1, isearcher.Search(phraseQuery, null, 1).TotalHits); } } }
private static int GetInt(this LuceneDocument doc, string name) { var value = doc.GetField(name)?.GetInt32ValueOrDefault(); return(value ?? 0); }
private static bool GetBool(this LuceneDocument doc, string name) { var value = doc.GetField(name)?.GetInt32ValueOrDefault(); return(value == 1); }
protected Document GetProjection(Lucene.Net.Documents.Document input, float score, string id) { Document doc = null; if (_fieldsToFetch.AnyExtractableFromIndex == false) { doc = DirectGet(input, id); if (doc == null) { return(null); } return(GetProjectionFromDocument(doc, score, _fieldsToFetch, _context)); } var documentLoaded = false; var result = new DynamicJsonValue(); if (_fieldsToFetch.IsDistinct == false && string.IsNullOrEmpty(id) == false) { result[Constants.Indexing.Fields.DocumentIdFieldName] = id; } Dictionary <string, FieldsToFetch.FieldToFetch> fields; if (_fieldsToFetch.ExtractAllFromIndexAndDocument) { fields = input.GetFields() .Where(x => x.Name != Constants.Indexing.Fields.DocumentIdFieldName && x.Name != Constants.Indexing.Fields.ReduceKeyFieldName && x.Name != Constants.Indexing.Fields.ReduceValueFieldName) .Distinct(UniqueFieldNames.Instance) .ToDictionary(x => x.Name, x => new FieldsToFetch.FieldToFetch(x.Name, x.IsStored)); doc = DirectGet(input, id); documentLoaded = true; if (doc != null) { foreach (var name in doc.Data.GetPropertyNames()) { if (fields.ContainsKey(name)) { continue; } fields[name] = new FieldsToFetch.FieldToFetch(name, canExtractFromIndex: false); } } } else { fields = _fieldsToFetch.Fields; } foreach (var fieldToFetch in fields.Values) { if (TryExtractValueFromIndex(fieldToFetch, input, result)) { continue; } if (documentLoaded == false) { doc = DirectGet(input, id); documentLoaded = true; } if (doc == null) { continue; } MaybeExtractValueFromDocument(fieldToFetch, doc, result); } if (doc == null) { doc = new Document { Key = _context.GetLazyString(id) }; } return(ReturnProjection(result, doc, score, _context)); }
public override int Merge(MergeState mergeState) { int docCount = 0; int idx = 0; foreach (AtomicReader reader in mergeState.Readers) { SegmentReader matchingSegmentReader = mergeState.MatchingSegmentReaders[idx++]; CompressingStoredFieldsReader matchingFieldsReader = null; if (matchingSegmentReader != null) { StoredFieldsReader fieldsReader = matchingSegmentReader.FieldsReader; // we can only bulk-copy if the matching reader is also a CompressingStoredFieldsReader if (fieldsReader != null && fieldsReader is CompressingStoredFieldsReader compressingStoredFieldsReader) { matchingFieldsReader = compressingStoredFieldsReader; } } int maxDoc = reader.MaxDoc; IBits liveDocs = reader.LiveDocs; if (matchingFieldsReader == null || matchingFieldsReader.Version != VERSION_CURRENT || matchingFieldsReader.CompressionMode != compressionMode || matchingFieldsReader.ChunkSize != chunkSize) // the way data is decompressed depends on the chunk size - means reader version is not the same as the writer version { // naive merge... for (int i = NextLiveDoc(0, liveDocs, maxDoc); i < maxDoc; i = NextLiveDoc(i + 1, liveDocs, maxDoc)) { Document doc = reader.Document(i); AddDocument(doc, mergeState.FieldInfos); ++docCount; mergeState.CheckAbort.Work(300); } } else { int docID = NextLiveDoc(0, liveDocs, maxDoc); if (docID < maxDoc) { // not all docs were deleted CompressingStoredFieldsReader.ChunkIterator it = matchingFieldsReader.GetChunkIterator(docID); int[] startOffsets = Arrays.Empty <int>(); do { // go to the next chunk that contains docID it.Next(docID); // transform lengths into offsets if (startOffsets.Length < it.chunkDocs) { startOffsets = new int[ArrayUtil.Oversize(it.chunkDocs, 4)]; } for (int i = 1; i < it.chunkDocs; ++i) { startOffsets[i] = startOffsets[i - 1] + it.lengths[i - 1]; } if (numBufferedDocs == 0 && startOffsets[it.chunkDocs - 1] < chunkSize && startOffsets[it.chunkDocs - 1] + it.lengths[it.chunkDocs - 1] >= chunkSize && NextDeletedDoc(it.docBase, liveDocs, it.docBase + it.chunkDocs) == it.docBase + it.chunkDocs) // no deletion in the chunk - chunk is large enough - chunk is small enough - starting a new chunk { if (Debugging.AssertsEnabled) { Debugging.Assert(docID == it.docBase); } // no need to decompress, just copy data indexWriter.WriteIndex(it.chunkDocs, fieldsStream.Position); // LUCENENET specific: Renamed from getFilePointer() to match FileStream WriteHeader(this.docBase, it.chunkDocs, it.numStoredFields, it.lengths); it.CopyCompressedData(fieldsStream); this.docBase += it.chunkDocs; docID = NextLiveDoc(it.docBase + it.chunkDocs, liveDocs, maxDoc); docCount += it.chunkDocs; mergeState.CheckAbort.Work(300 * it.chunkDocs); } else { // decompress it.Decompress(); if (startOffsets[it.chunkDocs - 1] + it.lengths[it.chunkDocs - 1] != it.bytes.Length) { throw new CorruptIndexException("Corrupted: expected chunk size=" + startOffsets[it.chunkDocs - 1] + it.lengths[it.chunkDocs - 1] + ", got " + it.bytes.Length); } // copy non-deleted docs for (; docID < it.docBase + it.chunkDocs; docID = NextLiveDoc(docID + 1, liveDocs, maxDoc)) { int diff = docID - it.docBase; StartDocument(it.numStoredFields[diff]); bufferedDocs.WriteBytes(it.bytes.Bytes, it.bytes.Offset + startOffsets[diff], it.lengths[diff]); FinishDocument(); ++docCount; mergeState.CheckAbort.Work(300); } } } while (docID < maxDoc); it.CheckIntegrity(); } } } Finish(mergeState.FieldInfos, docCount); return(docCount); }
public override void OnIndexEntryCreated(string entryKey, Document document) { using (var cmd = Connection.CreateCommand()) { cmd.Transaction = tx; var pkParam = cmd.CreateParameter(); pkParam.ParameterName = GetParameterName("entryKey"); pkParam.Value = entryKey; cmd.Parameters.Add(pkParam); var sb = new StringBuilder("INSERT INTO ") .Append(destination.TableName) .Append(" (") .Append(destination.PrimaryKeyColumnName) .Append(", "); foreach (var mapping in destination.ColumnsMapping) { sb.Append(mapping.Value).Append(", "); } sb.Length = sb.Length - 2; sb.Append(") \r\nVALUES (") .Append(pkParam.ParameterName) .Append(", "); foreach (var mapping in destination.ColumnsMapping) { var parameter = cmd.CreateParameter(); parameter.ParameterName = GetParameterName(mapping.Key); var field = document.GetFieldable(mapping.Key); var numericfield = document.GetFieldable(String.Concat(mapping.Key, "_Range")); if (numericfield != null) { field = numericfield; } if (field == null) { parameter.Value = DBNull.Value; } else if (field is NumericField) { var numField = (NumericField)field; parameter.Value = numField.GetNumericValue(); } else { var stringValue = field.StringValue(); if (datePattern.IsMatch(stringValue)) { try { parameter.Value = DateTools.StringToDate(stringValue); } catch { parameter.Value = stringValue; } } else { parameter.Value = stringValue; } } cmd.Parameters.Add(parameter); sb.Append(parameter.ParameterName).Append(", "); } sb.Length = sb.Length - 2; sb.Append(")"); cmd.CommandText = sb.ToString(); cmd.ExecuteNonQuery(); } }
public List <List <string> > MySearchText2(string querytext, int page) { List <string> result = new List <string>(); List <string> textAbstract = new List <string>(); List <string> numberofresults = new List <string>(); List <string> maximunPage = new List <string>(); List <List <string> > myResult = new List <List <string> >(); Query query = parser.Parse(querytext); querytext = query.ToString(); //string[] queryArray = querytext.Split(new[] { "Text:" }, StringSplitOptions.RemoveEmptyEntries); //string finalQuery = string.Join(",", queryArray); TopDocs results = searcher.Search(query, 100); int maxPage = 0; if (results.ScoreDocs.Length % 10 != 0) { maxPage = results.ScoreDocs.Length / 10 + 1; } else { maxPage = results.ScoreDocs.Length / 10; } //System.Console.WriteLine("Number of results is " + results.TotalHits); if (results.ScoreDocs.Length == 0) { MessageBox.Show("There is no such information"); } maximunPage.Add(maxPage.ToString()); int modul = results.ScoreDocs.Length % 10; numberofresults.Add("Number of results is: " + results.TotalHits.ToString()); if (page < maxPage) { for (int rank = (page - 1) * 10; rank < page * 10; rank++) { Lucene.Net.Documents.Document doc = searcher.Doc(results.ScoreDocs[rank].Doc); string myFieldValue = doc.Get(TEXT_FN).ToString(); string title = GetTitle(myFieldValue); string Author = GetAuthors(myFieldValue); string b = Get_b(myFieldValue); string firstSentence = GetFirstSentence(myFieldValue); string text_abstract = GetAbstract(myFieldValue); //result.Add("abstract+"+ text_abstract); result.Add("Rank:" + (rank + 1) + "\nTitle: " + title + "\nAuthor: " + Author + "\nBibliographic information: " + b + "\nfrist Sentence: " + firstSentence + "\n" + "\n-------------------"); textAbstract.Add("Abstract:" + text_abstract); } } else { for (int rank = (maxPage - 1) * 10; rank < (maxPage - 1) * 10 + modul; rank++) { Lucene.Net.Documents.Document doc = searcher.Doc(results.ScoreDocs[rank].Doc); string myFieldValue = doc.Get(TEXT_FN).ToString(); string title = GetTitle(myFieldValue); string Author = GetAuthors(myFieldValue); string b = Get_b(myFieldValue); string firstSentence = GetFirstSentence(myFieldValue); string text_abstract = GetAbstract(myFieldValue); textAbstract.Add("Abstract:" + text_abstract); result.Add("Rank :" + (rank + 1) + "\nTitle: " + title + "\nAuthor: " + Author + "\nBibliographic information: "+ b + "\nfrist Sentence: " + firstSentence + "\n" + "\n-------------------"); } } myResult.Add(result); myResult.Add(textAbstract); myResult.Add(maximunPage); myResult.Add(numberofresults); return(myResult); }
public override void OnIndexEntryCreated(string entryKey, Document document) { var resultDocId = document.GetField(setupDoc.DocumentKey); if (resultDocId == null) { log.Warn("Could not find document id property '{0}' in '{1}' for index '{2}'", setupDoc.DocumentKey, entryKey, index); return; } var documentId = resultDocId.StringValue; itemsToRemove.TryRemove(documentId); var resultDoc = database.Get(documentId, null); if (resultDoc == null) { log.Warn("Could not find a document with the id '{0}' for index '{1}'", documentId, index); return; } var entityName = resultDoc.Metadata.Value <string>(Constants.RavenEntityName); if (entityName != null && viewGenerator.ForEntityNames.Contains(entityName)) { log.Warn( "Rejected update for a potentially recursive update on document '{0}' because the index '{1}' includes documents with entity name of '{2}'", documentId, index, entityName); return; } if (viewGenerator.ForEntityNames.Count == 0) { log.Warn( "Rejected update for a potentially recursive update on document '{0}' because the index '{1}' includes all documents", documentId, index); return; } var changesMade = false; foreach (var mapping in setupDoc.FieldNameMappings) { var field = document.GetFieldable(mapping.Key + "_Range") ?? document.GetFieldable(mapping.Key); if (field == null) { continue; } var numericField = field as NumericField; if (numericField != null) { resultDoc.DataAsJson[mapping.Value] = new RavenJValue(numericField.NumericValue); } else { resultDoc.DataAsJson[mapping.Value] = field.StringValue; } changesMade = true; } if (changesMade) { database.Put(documentId, resultDoc.Etag, resultDoc.DataAsJson, resultDoc.Metadata, null); } }
private static Field AddBoolField(this LuceneDocument doc, string name, bool data) { int convertedValue = (data ? 0x01 : 0x00); return(doc.AddInt32Field(name, convertedValue, Field.Store.YES)); }
public Document GetProjectionFromDocument(Document doc, Lucene.Net.Documents.Document luceneDoc, float score, FieldsToFetch fieldsToFetch, JsonOperationContext context, IState state) { var result = new DynamicJsonValue(); foreach (var fieldToFetch in fieldsToFetch.Fields.Values) { if (TryGetValue(fieldToFetch, doc, luceneDoc, state, out var fieldVal)) { if (fieldsToFetch.SingleBodyOrMethodWithNoAlias) { Document newDoc = null; if (fieldVal is BlittableJsonReaderObject nested) { newDoc = new Document { Id = doc.Id, ChangeVector = doc.ChangeVector, Data = nested, Etag = doc.Etag, Flags = doc.Flags, IndexScore = score, LastModified = doc.LastModified, LowerId = doc.LowerId, NonPersistentFlags = doc.NonPersistentFlags, StorageId = doc.StorageId, TransactionMarker = doc.TransactionMarker }; } else if (fieldVal is Document d) { newDoc = d; newDoc.IndexScore = score; } else { ThrowInvalidQueryBodyResponse(fieldVal); } return(newDoc); } if (fieldVal is List <object> list) { var array = new DynamicJsonArray(); for (int i = 0; i < list.Count; i++) { if (list[i] is Document d3) { array.Add(d3.Data); } else { array.Add(list[i]); } } fieldVal = array; } if (fieldVal is Document d2) { fieldVal = d2.Data; } var key = fieldToFetch.ProjectedName ?? fieldToFetch.Name.Value; result[key] = fieldVal; } } return(ReturnProjection(result, doc, score, context)); }
public static void Main(String[] a) { String indexName = "localhost_index"; String fn = "c:/Program Files/Apache Group/Apache/htdocs/manual/vhosts/index.html.en"; Uri url = null; for (int i = 0; i < a.Length; i++) { if (a[i].Equals("-i")) { indexName = a[++i]; } else if (a[i].Equals("-f")) { fn = a[++i]; } else if (a[i].Equals("-url")) { url = new Uri(a[++i]); } } StreamWriter temp_writer; temp_writer = new StreamWriter(Console.OpenStandardOutput(), Console.Out.Encoding); temp_writer.AutoFlush = true; StreamWriter o = temp_writer; IndexReader r = IndexReader.Open(indexName); o.WriteLine("Open index " + indexName + " which has " + r.NumDocs() + " docs"); LuceneMoreLikeThis mlt = new LuceneMoreLikeThis(r); o.WriteLine("Query generation parameters:"); o.WriteLine(mlt.DescribeParams()); o.WriteLine(); Query query = null; if (url != null) { o.WriteLine("Parsing URL: " + url); query = mlt.Like(url); } else if (fn != null) { o.WriteLine("Parsing file: " + fn); query = mlt.Like(new FileInfo(fn)); } o.WriteLine("q: " + query); o.WriteLine(); Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(indexName); Lucene.Net.Search.Hits hits = searcher.Search(query); int len = hits.Length(); o.WriteLine("found: " + len + " documents matching"); o.WriteLine(); for (int i = 0; i < Math.Min(25, len); i++) { Lucene.Net.Documents.Document d = hits.Doc(i); String summary = d.Get("summary"); o.WriteLine("score : " + hits.Score(i)); o.WriteLine("url : " + d.Get("url")); o.WriteLine("\ttitle : " + d.Get("title")); if (summary != null) { o.WriteLine("\tsummary: " + d.Get("summary")); } o.WriteLine(); } }
private bool TryExtractValueFromIndex(FieldsToFetch.FieldToFetch fieldToFetch, Lucene.Net.Documents.Document indexDocument, DynamicJsonValue toFill, IState state) { if (fieldToFetch.CanExtractFromIndex == false) { return(false); } var name = fieldToFetch.ProjectedName ?? fieldToFetch.Name.Value; DynamicJsonArray array = null; FieldType fieldType = null; var anyExtracted = false; foreach (var field in indexDocument.GetFields(fieldToFetch.Name)) { if (fieldType == null) { fieldType = GetFieldType(field.Name, indexDocument); } var fieldValue = ConvertType(_context, field, fieldType, state); if (fieldType.IsArray) { if (array == null) { array = new DynamicJsonArray(); toFill[name] = array; } array.Add(fieldValue); anyExtracted = true; continue; } toFill[name] = fieldValue; anyExtracted = true; } return(anyExtracted); }
private bool TryGetValue(FieldsToFetch.FieldToFetch fieldToFetch, Document document, Lucene.Net.Documents.Document luceneDoc, IState state, out object value) { if (fieldToFetch.QueryField == null) { return(TryGetFieldValueFromDocument(document, fieldToFetch, out value)); } if (fieldToFetch.QueryField.Function != null) { var args = new object[fieldToFetch.QueryField.FunctionArgs.Length + 1]; for (int i = 0; i < fieldToFetch.FunctionArgs.Length; i++) { TryGetValue(fieldToFetch.FunctionArgs[i], document, luceneDoc, state, out args[i]); if (ReferenceEquals(args[i], document)) { args[i] = Tuple.Create(document, luceneDoc, state); } } args[args.Length - 1] = _query.QueryParameters; value = InvokeFunction( fieldToFetch.QueryField.Name, _query.Metadata.Query, args); return(true); } if (fieldToFetch.QueryField.ValueTokenType != null) { var val = fieldToFetch.QueryField.Value; if (fieldToFetch.QueryField.ValueTokenType.Value == ValueTokenType.Parameter) { if (_query == null) { value = null; return(false); // only happens for debug endpoints and more like this } _query.QueryParameters.TryGet((string)val, out val); } value = val; return(true); } if (fieldToFetch.QueryField.HasSourceAlias == false) { return(TryGetFieldValueFromDocument(document, fieldToFetch, out value)); } if (_loadedDocumentIds == null) { _loadedDocumentIds = new HashSet <string>(); _loadedDocuments = new Dictionary <string, Document>(); _loadedDocumentsByAliasName = new Dictionary <string, Document>(); } _loadedDocumentIds.Clear(); //_loadedDocuments.Clear(); - explicitly not clearing this, we want to cache this for the duration of the query _loadedDocuments[document.Id ?? string.Empty] = document; if (fieldToFetch.QueryField.SourceAlias != null) { if (fieldToFetch.QueryField.IsQuoted) { _loadedDocumentIds.Add(fieldToFetch.QueryField.SourceAlias); } else if (fieldToFetch.QueryField.IsParameter) { if (_query.QueryParameters == null) { throw new InvalidQueryException("The query is parametrized but the actual values of parameters were not provided", _query.Query, (BlittableJsonReaderObject)null); } if (_query.QueryParameters.TryGetMember(fieldToFetch.QueryField.SourceAlias, out var id) == false) { throw new InvalidQueryException($"Value of parameter '{fieldToFetch.QueryField.SourceAlias}' was not provided", _query.Query, _query.QueryParameters); } _loadedDocumentIds.Add(id.ToString()); } else if (fieldToFetch.QueryField.LoadFromAlias != null) { if (_loadedDocumentsByAliasName.TryGetValue(fieldToFetch.QueryField.LoadFromAlias, out var loadedDoc)) { IncludeUtil.GetDocIdFromInclude(loadedDoc.Data, fieldToFetch.QueryField.SourceAlias, _loadedDocumentIds); } } else { IncludeUtil.GetDocIdFromInclude(document.Data, fieldToFetch.QueryField.SourceAlias, _loadedDocumentIds); } } else { _loadedDocumentIds.Add(document.Id ?? string.Empty); // null source alias is the root doc _loadedDocumentsByAliasName.Clear(); } if (_loadedDocumentIds.Count == 0) { if (fieldToFetch.QueryField.SourceIsArray) { value = new List <object>(); return(true); } value = null; return(false); } var buffer = new List <object>(); foreach (var docId in _loadedDocumentIds) { if (docId == null) { continue; } if (_loadedDocuments.TryGetValue(docId, out var doc) == false) { _loadedDocuments[docId] = doc = LoadDocument(docId); } if (doc == null) { continue; } if (fieldToFetch.QueryField.Alias != null) { _loadedDocumentsByAliasName[fieldToFetch.QueryField.Alias] = doc; } if (string.IsNullOrEmpty(fieldToFetch.Name)) // we need the whole document here { buffer.Add(doc); continue; } if (TryGetFieldValueFromDocument(doc, fieldToFetch, out var val)) { if (val is string == false && val is System.Collections.IEnumerable items) { // we flatten arrays in projections foreach (var item in items) { buffer.Add(item); } fieldToFetch.QueryField.SourceIsArray = true; } else { buffer.Add(val); } } } if (fieldToFetch.QueryField.SourceIsArray) { value = buffer; return(true); } if (buffer.Count > 0) { if (buffer.Count > 1) { ThrowOnlyArrayFieldCanHaveMultipleValues(fieldToFetch); } value = buffer[0]; return(true); } value = null; return(false); }
public virtual void TestPerFieldCodec() { int NUM_DOCS = AtLeast(173); if (VERBOSE) { Console.WriteLine("TEST: NUM_DOCS=" + NUM_DOCS); } using (BaseDirectoryWrapper dir = NewDirectory()) { dir.CheckIndexOnDispose = false; // we use a custom codec provider using (IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetCodec(new CustomPerFieldCodec()).SetMergePolicy(NewLogMergePolicy(3)))) { Documents.Document doc = new Documents.Document(); // uses default codec: doc.Add(NewTextField("field1", "this field uses the standard codec as the test", Field.Store.NO)); // uses pulsing codec: Field field2 = NewTextField("field2", "this field uses the pulsing codec as the test", Field.Store.NO); doc.Add(field2); Field idField = NewStringField("id", "", Field.Store.NO); doc.Add(idField); for (int i = 0; i < NUM_DOCS; i++) { idField.SetStringValue("" + i); w.AddDocument(doc); if ((i + 1) % 10 == 0) { w.Commit(); } } if (VERBOSE) { Console.WriteLine("TEST: now delete id=77"); } w.DeleteDocuments(new Term("id", "77")); using (IndexReader r = DirectoryReader.Open(w, true)) { Assert.AreEqual(NUM_DOCS - 1, r.NumDocs); IndexSearcher s = NewSearcher(r); Assert.AreEqual(NUM_DOCS - 1, s.Search(new TermQuery(new Term("field1", "standard")), 1).TotalHits); Assert.AreEqual(NUM_DOCS - 1, s.Search(new TermQuery(new Term("field2", "pulsing")), 1).TotalHits); } if (VERBOSE) { Console.WriteLine("\nTEST: now delete 2nd doc"); } w.DeleteDocuments(new Term("id", "44")); if (VERBOSE) { Console.WriteLine("\nTEST: now force merge"); } w.ForceMerge(1); if (VERBOSE) { Console.WriteLine("\nTEST: now open reader"); } using (IndexReader r = DirectoryReader.Open(w, true)) { Assert.AreEqual(NUM_DOCS - 2, r.MaxDoc); Assert.AreEqual(NUM_DOCS - 2, r.NumDocs); IndexSearcher s = NewSearcher(r); Assert.AreEqual(NUM_DOCS - 2, s.Search(new TermQuery(new Term("field1", "standard")), 1).TotalHits); Assert.AreEqual(NUM_DOCS - 2, s.Search(new TermQuery(new Term("field2", "pulsing")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("id", "76")), 1).TotalHits); Assert.AreEqual(0, s.Search(new TermQuery(new Term("id", "77")), 1).TotalHits); Assert.AreEqual(0, s.Search(new TermQuery(new Term("id", "44")), 1).TotalHits); if (VERBOSE) { Console.WriteLine("\nTEST: now close NRT reader"); } } } } }
private static List <string> GetStringList(this LuceneDocument doc, string name) { return(doc.GetValues(name)?.FirstOrDefault()?.Split(' ').ToList()); }
public abstract Document Get(Lucene.Net.Documents.Document input, float score);
private static DateTime GetDate(this LuceneDocument doc, string name) { var ticks = doc.GetField(name)?.GetInt64ValueOrDefault(); return(ticks.HasValue ? new DateTime(ticks.Value) : DateTime.MinValue); }
public override DocumentsWriter.DocWriter ProcessDocument() { consumer.StartDocument(); fieldsWriter.StartDocument(); Document doc = docState.doc; System.Diagnostics.Debug.Assert(docFieldProcessor.docWriter.writer.TestPoint("DocumentsWriter.ThreadState.init start")); fieldCount = 0; int thisFieldGen = fieldGen++; System.Collections.Generic.IList <IFieldable> docFields = doc.GetFields(); int numDocFields = docFields.Count; // Absorb any new fields first seen in this document. // Also absorb any changes to fields we had already // seen before (eg suddenly turning on norms or // vectors, etc.): for (int i = 0; i < numDocFields; i++) { IFieldable field = docFields[i]; string fieldName = field.Name; // Make sure we have a PerField allocated int hashPos = fieldName.GetHashCode() & hashMask; DocFieldProcessorPerField fp = fieldHash[hashPos]; while (fp != null && !fp.fieldInfo.name.Equals(fieldName)) { fp = fp.next; } if (fp == null) { // TODO FI: we need to genericize the "flags" that a // field holds, and, how these flags are merged; it // needs to be more "pluggable" such that if I want // to have a new "thing" my Fields can do, I can // easily add it FieldInfo fi = fieldInfos.Add(fieldName, field.IsIndexed, field.IsTermVectorStored, field.IsStorePositionWithTermVector, field.IsStoreOffsetWithTermVector, field.OmitNorms, false, field.OmitTermFreqAndPositions); fp = new DocFieldProcessorPerField(this, fi); fp.next = fieldHash[hashPos]; fieldHash[hashPos] = fp; totalFieldCount++; if (totalFieldCount >= fieldHash.Length / 2) { Rehash(); } } else { fp.fieldInfo.Update(field.IsIndexed, field.IsTermVectorStored, field.IsStorePositionWithTermVector, field.IsStoreOffsetWithTermVector, field.OmitNorms, false, field.OmitTermFreqAndPositions); } if (thisFieldGen != fp.lastGen) { // First time we're seeing this field for this doc fp.fieldCount = 0; if (fieldCount == fields.Length) { int newSize = fields.Length * 2; DocFieldProcessorPerField[] newArray = new DocFieldProcessorPerField[newSize]; Array.Copy(fields, 0, newArray, 0, fieldCount); fields = newArray; } fields[fieldCount++] = fp; fp.lastGen = thisFieldGen; } if (fp.fieldCount == fp.fields.Length) { IFieldable[] newArray = new IFieldable[fp.fields.Length * 2]; Array.Copy(fp.fields, 0, newArray, 0, fp.fieldCount); fp.fields = newArray; } fp.fields[fp.fieldCount++] = field; if (field.IsStored) { fieldsWriter.AddField(field, fp.fieldInfo); } } // If we are writing vectors then we must visit // fields in sorted order so they are written in // sorted order. TODO: we actually only need to // sort the subset of fields that have vectors // enabled; we could save [small amount of] CPU // here. QuickSort(fields, 0, fieldCount - 1); for (int i = 0; i < fieldCount; i++) { fields[i].consumer.ProcessFields(fields[i].fields, fields[i].fieldCount); } if (docState.maxTermPrefix != null && docState.infoStream != null) { docState.infoStream.WriteLine("WARNING: document contains at least one immense term (longer than the max length " + DocumentsWriter.MAX_TERM_LENGTH + "), all of which were skipped. Please correct the analyzer to not produce such terms. The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'"); docState.maxTermPrefix = null; } DocumentsWriter.DocWriter one = fieldsWriter.FinishDocument(); DocumentsWriter.DocWriter two = consumer.FinishDocument(); if (one == null) { return(two); } else if (two == null) { return(one); } else { PerDoc both = GetPerDoc(); both.docID = docState.docID; System.Diagnostics.Debug.Assert(one.docID == docState.docID); System.Diagnostics.Debug.Assert(two.docID == docState.docID); both.one = one; both.two = two; return(both); } }
public static T ToEntity <T>(this LuceneDocument source) where T : class, IMarcellEntity { return(ToEntity(source, typeof(T)) as T); }
public DefaultDocumentLuceneWrapper(LuceneDocument doc) { _doc = doc; }
public List <Suggestion> ChangeComm(string changeNumber) { WebOperationContext.Current.OutgoingResponse.Headers.Add("Access-Control-Allow-Origin", "*"); //grab the information about this change ticket from ServiceNow GetServiceNowData SDData = new GetServiceNowData(); Task <string> changeRequest = SDData.GetChangeInfoREST(changeNumber); string changeJsonResponse = changeRequest.Result; dynamic json = JsonConvert.DeserializeObject(changeJsonResponse); string changeBody = ""; string changeGroup = ""; string CISysID = ""; string changecommCI = ""; DateTime changeStart = new DateTime(); DateTime changeEnd = new DateTime(); string jsonStart = json.result[0].start_date.ToString(); //Debug.WriteLine(jsonStart); if (json != null) { changeBody += json.result[0].description.ToString().Trim(); changeGroup += json.result[0].assignment_group.value.ToString().Trim(); CISysID += json.result[0].cmdb_ci.value.ToString().Trim(); changeStart = DateTime.ParseExact(json.result[0].start_date.ToString(), "yyyy-MM-dd HH:mm:ss", System.Globalization.CultureInfo.InvariantCulture); changeEnd = DateTime.ParseExact(json.result[0].end_date.ToString(), "yyyy-MM-dd HH:mm:ss", System.Globalization.CultureInfo.InvariantCulture); } else { throw new WebFaultException <string>( string.Format("There is no ticket in ServiceNow for '{0}'.", changeNumber), HttpStatusCode.NotFound); } //Lets grab ths CI information if (CISysID != "") { Task <string> changeCI = SDData.GetCIInfoREST(CISysID); string CIJson = changeCI.Result; dynamic CIjson = JsonConvert.DeserializeObject(CIJson); if (CIjson != null) { changecommCI += CIjson.result.sys_class_name.ToString().Trim(); } } //Lets see if we can detect and apps GetApplications getApps = new GetApplications(); List <string> apps = getApps.GetApps(changeBody); //Now lets build the query for this change QueryBuilder queryBuilder = new QueryBuilder(); BooleanQuery query = new BooleanQuery(); if (apps.Count() > 0) { string changeApps = String.Join(" ", apps.ToArray()); query = queryBuilder.GetCommQuery(changeBody, changeGroup, changeApps, changecommCI); } else { query = queryBuilder.GetCommQuery_NoApps(changeBody, changeGroup, changecommCI); } //Finally, execute the query against our Lucene database of change communications string indexFileLocation = @"C:\Models\ChangeData\"; Lucene.Net.Store.Directory dir = FSDirectory.Open(indexFileLocation); IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(Lucene.Net.Index.IndexReader.Open(dir, true)); TopDocs td = searcher.Search(query, 5); ScoreDoc[] hits = td.ScoreDocs; List <Suggestion> suggestions = new List <Suggestion>(); TextProcessor processor = new TextProcessor(); foreach (ScoreDoc searchResult in hits) { int docId = searchResult.Doc; double score = Convert.ToDouble(searchResult.Score); Lucene.Net.Documents.Document doc = searcher.Doc(docId); Suggestion suggestion = new Suggestion(); suggestion.changeNumber = doc.Get("changenumber"); suggestion.changeGroup = doc.Get("change_group_name"); suggestion.release = changeStart; suggestion.archive = changeEnd; suggestion.title = doc.Get("title"); suggestion.application = doc.Get("application"); suggestion.summaryHTML = processor.Convert2Text(suggestion, doc.Get("communication_descriptionHTML")); suggestion.summaryText = processor.GenerateDescriptionText(suggestion.summaryHTML); suggestion.bodyHTML = processor.Convert2Text(suggestion, doc.Get("communication_bodyHTML")); suggestion.bodyHTML += "<div id='referenceinfo'><p>Reference No.: <strong>" + changeNumber + "</strong></p><p> </p><p> </p>"; suggestions.Add(suggestion); } return(suggestions); }
private int CheckDbAndIndex(DbDataReader dbreader, IndexReader ixreader, List <Difference> result) { var versionId = dbreader.GetInt32(0); var dbNodeTimestamp = dbreader.GetInt64(1); var dbVersionTimestamp = dbreader.GetInt64(2); var termDocs = ixreader.TermDocs(new Lucene.Net.Index.Term(LucObject.FieldName.VersionId, Lucene.Net.Util.NumericUtils.IntToPrefixCoded(versionId))); Lucene.Net.Documents.Document doc = null; int docid = -1; if (termDocs.Next()) { docid = termDocs.Doc(); doc = ixreader.Document(docid); var indexNodeTimestamp = ParseLong(doc.Get(LucObject.FieldName.NodeTimestamp)); var indexVersionTimestamp = ParseLong(doc.Get(LucObject.FieldName.VersionTimestamp)); var nodeId = ParseInt(doc.Get(LucObject.FieldName.NodeId)); var version = doc.Get(LucObject.FieldName.Version); var p = doc.Get(LucObject.FieldName.Path); if (termDocs.Next()) { result.Add(new Difference(IndexDifferenceKind.MoreDocument) { DocId = docid, NodeId = nodeId, VersionId = versionId, Version = version, Path = p, DbNodeTimestamp = dbNodeTimestamp, DbVersionTimestamp = dbVersionTimestamp, IxNodeTimestamp = indexNodeTimestamp, IxVersionTimestamp = indexVersionTimestamp, }); } if (dbVersionTimestamp != indexVersionTimestamp) { result.Add(new Difference(IndexDifferenceKind.DifferentVersionTimestamp) { DocId = docid, VersionId = versionId, DbNodeTimestamp = dbNodeTimestamp, DbVersionTimestamp = dbVersionTimestamp, IxNodeTimestamp = indexNodeTimestamp, IxVersionTimestamp = indexVersionTimestamp, NodeId = nodeId, Version = version, Path = p }); } if (dbNodeTimestamp != indexNodeTimestamp) { var ok = false; var isLastDraft = doc.Get(LucObject.FieldName.IsLastDraft); if (isLastDraft != BooleanIndexHandler.YES) { var latestDocs = ixreader.TermDocs(new Lucene.Net.Index.Term(LucObject.FieldName.NodeId, Lucene.Net.Util.NumericUtils.IntToPrefixCoded(nodeId))); Lucene.Net.Documents.Document latestDoc = null; while (latestDocs.Next()) { var latestdocid = latestDocs.Doc(); var d = ixreader.Document(latestdocid); if (d.Get(LucObject.FieldName.IsLastDraft) != BooleanIndexHandler.YES) { continue; } latestDoc = d; break; } var latestPath = latestDoc.Get(LucObject.FieldName.Path); if (latestPath == p) { ok = true; } } if (!ok) { result.Add(new Difference(IndexDifferenceKind.DifferentNodeTimestamp) { DocId = docid, VersionId = versionId, DbNodeTimestamp = dbNodeTimestamp, DbVersionTimestamp = dbVersionTimestamp, IxNodeTimestamp = indexNodeTimestamp, IxVersionTimestamp = indexVersionTimestamp, NodeId = nodeId, Version = version, Path = p }); } } } else { result.Add(new Difference(IndexDifferenceKind.NotInIndex) { DocId = docid, VersionId = versionId, DbNodeTimestamp = dbNodeTimestamp, DbVersionTimestamp = dbVersionTimestamp, }); } return(docid); }
public List <List <string> > MySearchText(string rawQuery, int page, bool first) { List <string> result = new List <string>(); List <string> textAbstract = new List <string>(); List <string> numberofresults = new List <string>(); List <string> maximunPage = new List <string>(); List <string> finalquery = new List <string>(); List <List <string> > myResult = new List <List <string> >(); List <string> Store_information = new List <string>(); string finalQuery; //querytext = querytext.ToLower(); //LuceneAdvancedSearchApplication myLucene = new LuceneAdvancedSearchApplication(); Query query = parser.Parse(rawQuery); string querytext = query.ToString(); List <string> synonym = new List <string>(); TopDocs results; if (GUIForm.expandState) { var synSetList = GUIForm.wordNet.GetSynSets(rawQuery); foreach (var synSet in synSetList) { foreach (var word in synSet.Words) { if (!synonym.Contains(word)) { synonym.Add(word); } } } if (GUIForm.Weight) { synonym[0] += "^5"; first = false; } string expandQuery = string.Join(" ", synonym); Query expandedquery = parser.Parse(expandQuery); finalQuery = expandQuery.ToString(); finalquery.Add("Final Query: " + finalQuery); results = searcher.Search(expandedquery, 100); } else { string[] queryArray = querytext.Split(new[] { "Text:" }, StringSplitOptions.RemoveEmptyEntries); results = searcher.Search(query, 100); finalQuery = queryArray[0]; finalquery.Add("Final Query: " + finalQuery); } //System.Console.WriteLine("Number of results is " + results.TotalHits); if (results.ScoreDocs.Length == 0) { MessageBox.Show("There is no such information"); } //int maxPage = results.ScoreDocs.Length/10 + 1 ; int modul = results.ScoreDocs.Length % 10; int maxPage = 0; if (results.ScoreDocs.Length % 10 != 0) { maxPage = results.ScoreDocs.Length / 10 + 1; } else { maxPage = results.ScoreDocs.Length / 10; } maximunPage.Add(maxPage.ToString()); numberofresults.Add("Number of results is: " + results.TotalHits.ToString()); // MessageBox.Show(results.TotalHits.ToString(), "Number of results is "); if (page < maxPage) { for (int rank = (page - 1) * 10; rank < page * 10; rank++) { Lucene.Net.Documents.Document doc = searcher.Doc(results.ScoreDocs[rank].Doc); string myFieldValue = doc.Get(TEXT_FN).ToString(); string title = GetTitle(myFieldValue); string Author = GetAuthors(myFieldValue); string b = Get_b(myFieldValue); string firstSentence = GetFirstSentence(myFieldValue); string text_abstract = GetAbstract(myFieldValue); string ID = GetID(myFieldValue); Store_information.Add(finalQuery + "\t\t" + "Q0" + "\t\t" + ID + "\t\t" + rank + "\t\t" + results.ScoreDocs[rank].Score + "\t\t" + "n9599291_n9814434_n9754911" + "\n"); result.Add("Rank:" + (rank + 1) + "\nTitle: " + title + "\nAuthor: " + Author + "\nBibliographic information: " + b + "\nfrist Sentence: " + firstSentence + "\n" + "\n-------------------"); textAbstract.Add("Abstract:" + text_abstract); } } else { for (int rank = (maxPage - 1) * 10; rank < (maxPage - 1) * 10 + modul; rank++) { Lucene.Net.Documents.Document doc = searcher.Doc(results.ScoreDocs[rank].Doc); string myFieldValue = doc.Get(TEXT_FN).ToString(); string ID = GetID(myFieldValue); string title = GetTitle(myFieldValue); string Author = GetAuthors(myFieldValue); string b = Get_b(myFieldValue); string firstSentence = GetFirstSentence(myFieldValue); string text_abstract = GetAbstract(myFieldValue); Store_information.Add(finalQuery + "\t\t" + "Q0" + "\t\t" + ID + "\t\t" + rank + "\t\t" + results.ScoreDocs[rank].Score + "\t\t" + "n9599291_n9814434_n9754911" + "\n"); textAbstract.Add("Abstract:" + text_abstract); result.Add("Rank :" + (rank + 1) + "\nTitle: " + title + "\nAuthor: " + Author + "\nBibliographic information: "+ b + "\nfrist Sentence: " + firstSentence + "\n" + "\n-------------------"); } //myResult.Add(result); //myResult.Add(textAbstract); //myResult.Add(maximunPage); } myResult.Add(result); myResult.Add(textAbstract); myResult.Add(maximunPage); myResult.Add(numberofresults); myResult.Add(finalquery); myResult.Add(Store_information); return(myResult); }
private static double GetDouble(this LuceneDocument doc, string name) { var value = doc.GetField(name)?.GetDoubleValueOrDefault(); return(value ?? 0.0); }
public abstract bool TryGetKey(Lucene.Net.Documents.Document document, out string key);
protected abstract Document DirectGet(Lucene.Net.Documents.Document input, string id);