AddDocument() public method

Adds a document to this index. If the document contains more than SetMaxFieldLength(int) terms for a given field, the remainder are discarded.

Note that if an Exception is hit (for example disk full) then the index will be consistent, but this document may not have been added. Furthermore, it's possible the index will have one segment in non-compound format even when using compound files (when a merge has partially succeeded).

This method periodically flushes pending documents to the Directory (see above), and also periodically triggers segment merges in the index according to the MergePolicy in use.

Merges temporarily consume space in the directory. The amount of space required is up to 1X the size of all segments being merged, when no readers/searchers are open against the index, and up to 2X the size of all segments being merged when readers/searchers are open against the index (see Optimize() for details). The sequence of primitive merge operations performed is governed by the merge policy.

Note that each term in the document can be no longer than 16383 characters, otherwise an IllegalArgumentException will be thrown.

Note that it's possible to create an invalid Unicode string in java if a UTF16 surrogate pair is malformed. In this case, the invalid characters are silently replaced with the Unicode replacement character U+FFFD.

NOTE: if this method hits an OutOfMemoryError you should immediately close the writer. See above for details.

public AddDocument ( Lucene.Net.Documents.Document doc ) : void
doc Lucene.Net.Documents.Document
return void
Ejemplo n.º 1
1
        public void SetUp()
        {

            var writer = new IndexWriter(store, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);

            var doc = new Document();
            doc.Add(new Field("aaa", "foo", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(new Field("aaa", "foo", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(new Field("contents", "Tom", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(new Field("contents", "Jerry", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(new Field("zzz", "bar", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc);

            writer.Optimize();
            writer.Close();
        }
Ejemplo n.º 2
1
        public void TestRollbackIntegrityWithBufferFlush()
        {
            Directory dir = new MockRAMDirectory();
            IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);
            for (int i = 0; i < 5; i++)
            {
                Document doc = new Document();
                doc.Add(new Field("pk", i.ToString(), Field.Store.YES, Field.Index.ANALYZED_NO_NORMS));
                w.AddDocument(doc);
            }
            w.Close();

            // If buffer size is small enough to cause a flush, errors ensue...
            w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);
            w.SetMaxBufferedDocs(2);

            Term pkTerm = new Term("pk", "");
            for (int i = 0; i < 3; i++)
            {
                Document doc = new Document();
                String value = i.ToString();
                doc.Add(new Field("pk", value, Field.Store.YES, Field.Index.ANALYZED_NO_NORMS));
                doc.Add(new Field("text", "foo", Field.Store.YES, Field.Index.ANALYZED_NO_NORMS));
                w.UpdateDocument(pkTerm.CreateTerm(value), doc);
            }
            w.Rollback();

            IndexReader r = IndexReader.Open(dir, true);
            Assert.AreEqual(5, r.NumDocs(), "index should contain same number of docs post rollback");
            r.Close();
            dir.Close();
        }
        /// <summary>
        /// Indexes the given text
        /// </summary>
        /// <param name="text">Text to index</param>
        public void IndexText(PassageCollection passage)
        {
            Lucene.Net.Documents.Document doc = new Document();
            foreach (Passage p in passage.passages)
            {
                Lucene.Net.Documents.Field is_selected  = new Field("is_selected", p.is_selected.ToString(), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
                Lucene.Net.Documents.Field url          = new Field("url", p.url, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
                Lucene.Net.Documents.Field passage_text = new Field("passage_text", p.passage_text, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
                Lucene.Net.Documents.Field passage_ID   = new Field("passage_ID", p.passage_ID.ToString(), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
                doc.Add(is_selected);
                doc.Add(url);
                doc.Add(passage_text);
                doc.Add(passage_ID);
                writer.AddDocument(doc);
                doc.RemoveField("is_selected");
                doc.RemoveField("url");
                doc.RemoveField("passage_text");
                doc.RemoveField("passage_ID");
            }

            Lucene.Net.Documents.Field query_id   = new Field("query_id", passage.query_id.ToString(), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
            Lucene.Net.Documents.Field query_type = new Field("query_type", passage.query_type, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
            Lucene.Net.Documents.Field query      = new Field("query", passage.query.ToString(), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
            foreach (string s in passage.answers)
            {
                Lucene.Net.Documents.Field answers = new Field("answers", s, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
                doc.Add(answers);
            }

            doc.Add(query_id);
            doc.Add(query_type);
            doc.Add(query);
            writer.AddDocument(doc);
        }
Ejemplo n.º 4
0
        public void TestReadersWriters()
        {
            Directory dir;
            
            using(dir = new RAMDirectory())
            {
                Document doc;
                IndexWriter writer;
                IndexReader reader;

                using (writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED))
                {
                    Field field = new Field("name", "value", Field.Store.YES,Field.Index.ANALYZED);
                    doc = new Document();
                    doc.Add(field);
                    writer.AddDocument(doc);
                    writer.Commit();

                    using (reader = writer.GetReader())
                    {
                        IndexReader r1 = reader.Reopen();
                    }

                    Assert.Throws<AlreadyClosedException>(() => reader.Reopen(), "IndexReader shouldn't be open here");
                }
                
                Assert.Throws<AlreadyClosedException>(() => writer.AddDocument(doc), "IndexWriter shouldn't be open here");

                Assert.IsTrue(dir.isOpen_ForNUnit, "RAMDirectory");
            }
            Assert.IsFalse(dir.isOpen_ForNUnit, "RAMDirectory");
        }
        public override void SetUp()
        {
            base.SetUp();
            store = NewDirectory();
            IndexWriter writer = new IndexWriter(store, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false)));

            Document doc;

            doc = new Document();
            doc.Add(NewTextField("aaa", "foo", Field.Store.YES));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(NewTextField("aaa", "foo", Field.Store.YES));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(NewTextField("contents", "Tom", Field.Store.YES));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(NewTextField("contents", "Jerry", Field.Store.YES));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(NewTextField("zzz", "bar", Field.Store.YES));
            writer.AddDocument(doc);

            writer.ForceMerge(1);
            writer.Dispose();
        }
Ejemplo n.º 6
0
        void Index()
        {
            Lucene.Net.Index.IndexWriter wr = new Lucene.Net.Index.IndexWriter(dir, new Lucene.Net.Analysis.WhitespaceAnalyzer(), Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED);

            Lucene.Net.Documents.Document doc = null;
            Lucene.Net.Documents.Field    f   = null;

            doc = new Lucene.Net.Documents.Document();
            f   = new Lucene.Net.Documents.Field("field", "a b c d", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED);
            doc.Add(f);
            wr.AddDocument(doc);

            doc = new Lucene.Net.Documents.Document();
            f   = new Lucene.Net.Documents.Field("field", "a b a d", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED);
            doc.Add(f);
            wr.AddDocument(doc);

            doc = new Lucene.Net.Documents.Document();
            f   = new Lucene.Net.Documents.Field("field", "a b e f", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED);
            doc.Add(f);
            wr.AddDocument(doc);

            doc = new Lucene.Net.Documents.Document();
            f   = new Lucene.Net.Documents.Field("field", "x y z", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED);
            doc.Add(f);
            wr.AddDocument(doc);

            wr.Close();
        }
        /// <summary>
        /// Set up a new index in RAM with three test phrases and the supplied Analyzer.
        /// </summary>
        /// <exception cref="Exception"> if an error occurs with index writer or searcher </exception>
        public override void SetUp()
        {
            base.SetUp();
            analyzer = new ShingleAnalyzerWrapper(new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false), 2);
            directory = NewDirectory();
            IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer));

            Document doc;
            doc = new Document();
            doc.Add(new TextField("content", "please divide this sentence into shingles", Field.Store.YES));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(new TextField("content", "just another test sentence", Field.Store.YES));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(new TextField("content", "a sentence which contains no test", Field.Store.YES));
            writer.AddDocument(doc);

            writer.Dispose();

            reader = DirectoryReader.Open(directory);
            searcher = NewSearcher(reader);
        }
        public virtual void TestDateCompression()
        {
            Directory dir = new RAMDirectory();
            IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            IndexWriter iwriter = new IndexWriter(dir, iwc);

            const long @base = 13; // prime
            long day = 1000L * 60 * 60 * 24;

            Document doc = new Document();
            NumericDocValuesField dvf = new NumericDocValuesField("dv", 0);
            doc.Add(dvf);
            for (int i = 0; i < 300; ++i)
            {
                dvf.LongValue = @base + Random().Next(1000) * day;
                iwriter.AddDocument(doc);
            }
            iwriter.ForceMerge(1);
            long size1 = DirSize(dir);
            for (int i = 0; i < 50; ++i)
            {
                dvf.LongValue = @base + Random().Next(1000) * day;
                iwriter.AddDocument(doc);
            }
            iwriter.ForceMerge(1);
            long size2 = DirSize(dir);
            // make sure the new longs costed less than if they had only been packed
            Assert.IsTrue(size2 < size1 + (PackedInts.BitsRequired(day) * 50) / 8);
        }
        /// <summary>
        /// index 1 document and commit.
        /// prepare for crashing.
        /// index 1 more document, and upon commit, creation of segments_2 will crash.
        /// </summary>
        private void IndexAndCrashOnCreateOutputSegments2()
        {
            Directory realDirectory = FSDirectory.Open(Path);
            CrashAfterCreateOutput crashAfterCreateOutput = new CrashAfterCreateOutput(realDirectory);

            // NOTE: cannot use RandomIndexWriter because it
            // sometimes commits:
            IndexWriter indexWriter = new IndexWriter(crashAfterCreateOutput, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));

            indexWriter.AddDocument(Document);
            // writes segments_1:
            indexWriter.Commit();

            crashAfterCreateOutput.GetCrashAfterCreateOutput = "segments_2";
            indexWriter.AddDocument(Document);
            try
            {
                // tries to write segments_2 but hits fake exc:
                indexWriter.Commit();
                Assert.Fail("should have hit CrashingException");
            }
            catch (CrashingException e)
            {
                // expected
            }
            // writes segments_3
            indexWriter.Dispose();
            Assert.IsFalse(SlowFileExists(realDirectory, "segments_2"));
            crashAfterCreateOutput.Dispose();
        }
        private static void AddDocuments(IndexWriter writer) {
            var pages = PagesMetadata.Instance;
            var posts = PostsMetadata.Instance;

            foreach (var page in pages.List) {
                var doc = new Document();

                doc.Add(new Field("Url", "/" + page.Slug, Field.Store.YES, Field.Index.NOT_ANALYZED));
                doc.Add(new Field("Title", page.Title, Field.Store.YES, Field.Index.ANALYZED));
                doc.Add(new Field("Body", new Page(page.Slug, pages).BodyWithoutHtml, Field.Store.YES, Field.Index.ANALYZED));

                writer.AddDocument(doc);
            }

            foreach (var post in posts.List) {
                var doc = new Document();

                doc.Add(new Field("Url", "/blog/" + post.Slug, Field.Store.YES, Field.Index.NOT_ANALYZED));
                doc.Add(new Field("Title", post.Title, Field.Store.YES, Field.Index.ANALYZED));
                doc.Add(new Field("Description", post.ShortDescription, Field.Store.YES, Field.Index.ANALYZED));
                if (post.PublishDate != DateTime.MinValue)
                    doc.Add(new Field("PublishDate", post.PublishDate.ToString("dd MMMM yyyy"), Field.Store.YES, Field.Index.NOT_ANALYZED));
                if (post.LastUpdatedDate != DateTime.MinValue)
                    doc.Add(new Field("LastUpdatedDate", post.LastUpdatedDate.ToString("dd MMMM yyyy"), Field.Store.YES, Field.Index.NOT_ANALYZED));
                doc.Add(new Field("Author", post.Author, Field.Store.YES, Field.Index.ANALYZED));
                doc.Add(new Field("Body", new Post(post.Slug, posts).BodyWithoutHtml, Field.Store.YES, Field.Index.ANALYZED));

                writer.AddDocument(doc);
            }
        }
        // Creates multiple fields and sections DocID, Title, Author, Biliographic, Abstract for index
        public void IndexText(string text)
        {
            if (text == "")
            {
                Lucene.Net.Documents.Field    field = new Field(TEXT_FN, text, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
                Lucene.Net.Documents.Document doc   = new Document();
                doc.Add(field);
                writer.AddDocument(doc);
            }

            if ((text != "") && (text != null))
            {
                string removal = "\n";
                text = text.Replace(removal, "");

                // Delimeters
                string pattern = @"(.T)|(.A)|(.B)|(.W)";

                // Splits items by delimeters
                string[] substrings = Regex.Split(text, pattern);
                Lucene.Net.Documents.Document doc = new Document();

                // Creates new doc of multiple fields
                doc.Add(new Field("Title", substrings[2], Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
                doc.Add(new Field("Author", substrings[4], Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
                doc.Add(new Field("Bibliographic", substrings[6], Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
                doc.Add(new Field("Abstract", substrings[8], Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
                writer.AddDocument(doc);
            }
        }
        public void SetUp()
        {

            IndexWriter writer = new IndexWriter(store, new WhitespaceAnalyzer(), true);

            Document doc;

            doc = new Document();
            doc.Add(new Field("aaa", "foo", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(new Field("aaa", "foo", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(new Field("contents", "Tom", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(new Field("contents", "Jerry", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(new Field("zzz", "bar", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc);

            writer.Optimize();
            writer.Close();
        }
Ejemplo n.º 13
0
        void Index()
        {
            var conf = new IndexWriterConfig(LuceneVersion.LUCENE_CURRENT, new Lucene.Net.Analysis.Core.WhitespaceAnalyzer(LuceneVersion.LUCENE_CURRENT));

            Lucene.Net.Index.IndexWriter wr = new Lucene.Net.Index.IndexWriter(dir, conf /*new Lucene.Net.Analysis.Core.WhitespaceAnalyzer(LuceneVersion.LUCENE_CURRENT), Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED*/);

            Lucene.Net.Documents.Document doc = null;
            Lucene.Net.Documents.Field    f   = null;

            doc = new Lucene.Net.Documents.Document();
            f   = new Lucene.Net.Documents.Field("field", "a b c d", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED);
            doc.Add(f);
            wr.AddDocument(doc);

            doc = new Lucene.Net.Documents.Document();
            f   = new Lucene.Net.Documents.Field("field", "a b a d", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED);
            doc.Add(f);
            wr.AddDocument(doc);

            doc = new Lucene.Net.Documents.Document();
            f   = new Lucene.Net.Documents.Field("field", "a b e f", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED);
            doc.Add(f);
            wr.AddDocument(doc);

            doc = new Lucene.Net.Documents.Document();
            f   = new Lucene.Net.Documents.Field("field", "x y z", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED);
            doc.Add(f);
            wr.AddDocument(doc);

            wr.Dispose();
        }
Ejemplo n.º 14
0
        public virtual void TestBackToTheFuture()
        {
            Directory dir = NewDirectory();
            IndexWriter iw = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, null));

            Document doc = new Document();
            doc.Add(NewStringField("foo", "bar", Field.Store.NO));
            iw.AddDocument(doc);

            doc = new Document();
            doc.Add(NewStringField("foo", "baz", Field.Store.NO));
            iw.AddDocument(doc);

            DirectoryReader r1 = DirectoryReader.Open(iw, true);

            iw.DeleteDocuments(new Term("foo", "baz"));
            DirectoryReader r2 = DirectoryReader.Open(iw, true);

            FieldCache.DEFAULT.GetDocTermOrds(GetOnlySegmentReader(r2), "foo");

            SortedSetDocValues v = FieldCache.DEFAULT.GetDocTermOrds(GetOnlySegmentReader(r1), "foo");
            Assert.AreEqual(2, v.ValueCount);
            v.Document = 1;
            Assert.AreEqual(1, v.NextOrd());

            iw.Dispose();
            r1.Dispose();
            r2.Dispose();
            dir.Dispose();
        }
Ejemplo n.º 15
0
        public void TestSpanRegex()
        {
            RAMDirectory directory = new RAMDirectory();
            IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
            Document doc = new Document();
            // doc.Add(new Field("field", "the quick brown fox jumps over the lazy dog",
            // Field.Store.NO, Field.Index.ANALYZED));
            // writer.AddDocument(doc);
            // doc = new Document();
            doc.Add(new Field("field", "auto update", Field.Store.NO,
                Field.Index.ANALYZED));
            writer.AddDocument(doc);
            doc = new Document();
            doc.Add(new Field("field", "first auto update", Field.Store.NO,
                Field.Index.ANALYZED));
            writer.AddDocument(doc);
            writer.Optimize();
            writer.Close();

            IndexSearcher searcher = new IndexSearcher(directory, true);
            SpanRegexQuery srq = new SpanRegexQuery(new Term("field", "aut.*"));
            SpanFirstQuery sfq = new SpanFirstQuery(srq, 1);
            // SpanNearQuery query = new SpanNearQuery(new SpanQuery[] {srq, stq}, 6,
            // true);
            int numHits = searcher.Search(sfq, null, 1000).TotalHits;
            Assert.AreEqual(1, numHits);
        }
Ejemplo n.º 16
0
        public virtual void  TestPhrasePrefix()
        {
            RAMDirectory indexStore = new RAMDirectory();
            IndexWriter  writer     = new IndexWriter(indexStore, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null);
            Document     doc1       = new Document();
            Document     doc2       = new Document();
            Document     doc3       = new Document();
            Document     doc4       = new Document();
            Document     doc5       = new Document();

            doc1.Add(new Field("body", "blueberry pie", Field.Store.YES, Field.Index.ANALYZED));
            doc2.Add(new Field("body", "blueberry strudel", Field.Store.YES, Field.Index.ANALYZED));
            doc3.Add(new Field("body", "blueberry pizza", Field.Store.YES, Field.Index.ANALYZED));
            doc4.Add(new Field("body", "blueberry chewing gum", Field.Store.YES, Field.Index.ANALYZED));
            doc5.Add(new Field("body", "piccadilly circus", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc1, null);
            writer.AddDocument(doc2, null);
            writer.AddDocument(doc3, null);
            writer.AddDocument(doc4, null);
            writer.AddDocument(doc5, null);
            writer.Optimize(null);
            writer.Close();

            IndexSearcher searcher = new IndexSearcher(indexStore, true, null);

            //PhrasePrefixQuery query1 = new PhrasePrefixQuery();
            MultiPhraseQuery query1 = new MultiPhraseQuery();
            //PhrasePrefixQuery query2 = new PhrasePrefixQuery();
            MultiPhraseQuery query2 = new MultiPhraseQuery();

            query1.Add(new Term("body", "blueberry"));
            query2.Add(new Term("body", "strawberry"));

            System.Collections.ArrayList termsWithPrefix = new System.Collections.ArrayList();
            IndexReader ir = IndexReader.Open((Directory)indexStore, true, null);

            // this TermEnum gives "piccadilly", "pie" and "pizza".
            System.String prefix = "pi";
            TermEnum      te     = ir.Terms(new Term("body", prefix + "*"), null);

            do
            {
                if (te.Term.Text.StartsWith(prefix))
                {
                    termsWithPrefix.Add(te.Term);
                }
            }while (te.Next(null));

            query1.Add((Term[])termsWithPrefix.ToArray(typeof(Term)));
            query2.Add((Term[])termsWithPrefix.ToArray(typeof(Term)));

            ScoreDoc[] result;
            result = searcher.Search(query1, null, 1000, null).ScoreDocs;
            Assert.AreEqual(2, result.Length);

            result = searcher.Search(query2, null, 1000, null).ScoreDocs;
            Assert.AreEqual(0, result.Length);
        }
Ejemplo n.º 17
0
    public void Indexer()
    {
        var directory = FSDirectory.Open(new DirectoryInfo(Dic));
        var analyzer = new StandardAnalyzer(Version.LUCENE_29);
        var writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
        foreach (var item in XeDal.SelectAll())
        {
            var doc = new Document();
            doc.Add(new Field("Ten", item.Ten, Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.Add(new Field("NoiDung", string.Format("{0} {1}", item.Ten, item.GioiThieu), Field.Store.YES,
                              Field.Index.ANALYZED));
            doc.Add(new Field("RowId", item.RowId.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.Add(new Field("ID", item.Id.ToString(), Field.Store.YES, Field.Index.TOKENIZED));
            doc.Add(new Field("Url", item.XeUrl, Field.Store.YES, Field.Index.NOT_ANALYZED));
            writer.AddDocument(doc);
        }

        foreach (var item in NhomDal.SelectAll())
        {
            var doc = new Document();
            doc.Add(new Field("Ten", item.Ten, Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.Add(new Field("NoiDung", string.Format("{0} {1} {2}", item.Ten, item.GioiThieu, item.MoTa), Field.Store.YES,
                              Field.Index.ANALYZED));
            doc.Add(new Field("RowId", item.RowId.ToString(), Field.Store.YES, Field.Index.TOKENIZED));
            doc.Add(new Field("ID", item.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.Add(new Field("Url", item.Url, Field.Store.YES, Field.Index.NOT_ANALYZED));
            writer.AddDocument(doc);
        }

        foreach (var item in BinhLuanDal.SelectAll())
        {
            item.Ten = string.Format("{0} bình luận ngày {1}", item.Username, Lib.TimeDiff(item.NgayTao));
            var doc = new Document();
            doc.Add(new Field("Ten", item.Ten, Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.Add(new Field("NoiDung", string.Format("{0} {1}", item.Ten, item.NoiDung), Field.Store.YES,
                              Field.Index.ANALYZED));
            doc.Add(new Field("RowId", item.RowId.ToString(), Field.Store.YES, Field.Index.TOKENIZED));
            doc.Add(new Field("ID", item.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.Add(new Field("Url", item.Url, Field.Store.YES, Field.Index.NOT_ANALYZED));
            writer.AddDocument(doc);
        }

        foreach (var item in BlogDal.SelectAll())
        {
            var doc = new Document();
            doc.Add(new Field("Ten", item.Ten, Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.Add(new Field("NoiDung", string.Format("{0} {1}", item.Ten, item.NoiDung), Field.Store.YES,
                              Field.Index.ANALYZED));
            doc.Add(new Field("RowId", item.RowId.ToString(), Field.Store.YES, Field.Index.TOKENIZED));
            doc.Add(new Field("ID", item.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.Add(new Field("Url", item.Url, Field.Store.YES, Field.Index.NOT_ANALYZED));
            writer.AddDocument(doc);
        }

        writer.Optimize();
        writer.Commit();
        writer.Close();
    }
Ejemplo n.º 18
0
        public override void  SetUp()
        {
            base.SetUp();

            index = new RAMDirectory();
            IndexWriter writer = new IndexWriter(index, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null);

            writer.SetSimilarity(sim);

            // hed is the most important field, dek is secondary

            // d1 is an "ok" match for:  albino elephant
            {
                Document d1 = new Document();
                d1.Add(new Field("id", "d1", Field.Store.YES, Field.Index.NOT_ANALYZED));                 //Field.Keyword("id", "d1"));
                d1.Add(new Field("hed", "elephant", Field.Store.YES, Field.Index.ANALYZED));              //Field.Text("hed", "elephant"));
                d1.Add(new Field("dek", "elephant", Field.Store.YES, Field.Index.ANALYZED));              //Field.Text("dek", "elephant"));
                writer.AddDocument(d1, null);
            }

            // d2 is a "good" match for:  albino elephant
            {
                Document d2 = new Document();
                d2.Add(new Field("id", "d2", Field.Store.YES, Field.Index.NOT_ANALYZED));                 //Field.Keyword("id", "d2"));
                d2.Add(new Field("hed", "elephant", Field.Store.YES, Field.Index.ANALYZED));              //Field.Text("hed", "elephant"));
                d2.Add(new Field("dek", "albino", Field.Store.YES, Field.Index.ANALYZED));                //Field.Text("dek", "albino"));
                d2.Add(new Field("dek", "elephant", Field.Store.YES, Field.Index.ANALYZED));              //Field.Text("dek", "elephant"));
                writer.AddDocument(d2, null);
            }

            // d3 is a "better" match for:  albino elephant
            {
                Document d3 = new Document();
                d3.Add(new Field("id", "d3", Field.Store.YES, Field.Index.NOT_ANALYZED));                 //Field.Keyword("id", "d3"));
                d3.Add(new Field("hed", "albino", Field.Store.YES, Field.Index.ANALYZED));                //Field.Text("hed", "albino"));
                d3.Add(new Field("hed", "elephant", Field.Store.YES, Field.Index.ANALYZED));              //Field.Text("hed", "elephant"));
                writer.AddDocument(d3, null);
            }

            // d4 is the "best" match for:  albino elephant
            {
                Document d4 = new Document();
                d4.Add(new Field("id", "d4", Field.Store.YES, Field.Index.NOT_ANALYZED));                 //Field.Keyword("id", "d4"));
                d4.Add(new Field("hed", "albino", Field.Store.YES, Field.Index.ANALYZED));                //Field.Text("hed", "albino"));
                d4.Add(new Field("hed", "elephant", Field.Store.YES, Field.Index.ANALYZED));              //Field.Text("hed", "elephant"));
                d4.Add(new Field("dek", "albino", Field.Store.YES, Field.Index.ANALYZED));                //Field.Text("dek", "albino"));
                writer.AddDocument(d4, null);
            }

            writer.Close();

            r            = IndexReader.Open(index, true, null);
            s            = new IndexSearcher(r);
            s.Similarity = sim;
        }
Ejemplo n.º 19
0
        public override void  SetUp()
        {
            base.SetUp();
            RAMDirectory directory = new RAMDirectory();
            IndexWriter  writer    = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null);

            for (int i = 0; i < docFields.Length; i++)
            {
                Document document = new Document();
                document.Add(new Field(field, docFields[i], Field.Store.NO, Field.Index.ANALYZED));
                writer.AddDocument(document, null);
            }
            writer.Close();
            searcher = new IndexSearcher(directory, true, null);

            // Make big index
            dir2 = new MockRAMDirectory(directory);

            // First multiply small test index:
            mulFactor = 1;
            int docCount = 0;

            do
            {
                Directory   copy        = new RAMDirectory(dir2, null);
                IndexWriter indexWriter = new IndexWriter(dir2, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED, null);
                indexWriter.AddIndexesNoOptimize(null, new[] { copy });
                docCount = indexWriter.MaxDoc();
                indexWriter.Close();
                mulFactor *= 2;
            } while (docCount < 3000);

            IndexWriter w   = new IndexWriter(dir2, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED, null);
            Document    doc = new Document();

            doc.Add(new Field("field2", "xxx", Field.Store.NO, Field.Index.ANALYZED));
            for (int i = 0; i < NUM_EXTRA_DOCS / 2; i++)
            {
                w.AddDocument(doc, null);
            }
            doc = new Document();
            doc.Add(new Field("field2", "big bad bug", Field.Store.NO, Field.Index.ANALYZED));
            for (int i = 0; i < NUM_EXTRA_DOCS / 2; i++)
            {
                w.AddDocument(doc, null);
            }
            // optimize to 1 segment
            w.Optimize(null);
            reader = w.GetReader(null);
            w.Close();
            bigSearcher = new IndexSearcher(reader);
        }
Ejemplo n.º 20
0
        public virtual void TestFilterIndexReader()
        {
            Directory directory = NewDirectory();

            IndexWriter writer = new IndexWriter(directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));

            Document d1 = new Document();
            d1.Add(NewTextField("default", "one two", Field.Store.YES));
            writer.AddDocument(d1);

            Document d2 = new Document();
            d2.Add(NewTextField("default", "one three", Field.Store.YES));
            writer.AddDocument(d2);

            Document d3 = new Document();
            d3.Add(NewTextField("default", "two four", Field.Store.YES));
            writer.AddDocument(d3);

            writer.Dispose();

            Directory target = NewDirectory();

            // We mess with the postings so this can fail:
            ((BaseDirectoryWrapper)target).CrossCheckTermVectorsOnClose = false;

            writer = new IndexWriter(target, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
            IndexReader reader = new TestReader(DirectoryReader.Open(directory));
            writer.AddIndexes(reader);
            writer.Dispose();
            reader.Dispose();
            reader = DirectoryReader.Open(target);

            TermsEnum terms = MultiFields.GetTerms(reader, "default").Iterator(null);
            while (terms.Next() != null)
            {
                Assert.IsTrue(terms.Term().Utf8ToString().IndexOf('e') != -1);
            }

            Assert.AreEqual(TermsEnum.SeekStatus.FOUND, terms.SeekCeil(new BytesRef("one")));

            DocsAndPositionsEnum positions = terms.DocsAndPositions(MultiFields.GetLiveDocs(reader), null);
            while (positions.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
            {
                Assert.IsTrue((positions.DocID() % 2) == 1);
            }

            reader.Dispose();
            directory.Dispose();
            target.Dispose();
        }
Ejemplo n.º 21
0
        public virtual void TestMixedMerge()
        {
            Directory ram = NewDirectory();
            Analyzer analyzer = new MockAnalyzer(Random());
            IndexWriter writer = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMaxBufferedDocs(3).SetMergePolicy(NewLogMergePolicy(2)));
            Document d = new Document();

            // this field will have norms
            Field f1 = NewTextField("f1", "this field has norms", Field.Store.NO);
            d.Add(f1);

            // this field will NOT have norms
            FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
            customType.OmitNorms = true;
            Field f2 = NewField("f2", "this field has NO norms in all docs", customType);
            d.Add(f2);

            for (int i = 0; i < 30; i++)
            {
                writer.AddDocument(d);
            }

            // now we add another document which has norms for field f2 and not for f1 and verify if the SegmentMerger
            // keep things constant
            d = new Document();

            // Reverese
            d.Add(NewField("f1", "this field has norms", customType));

            d.Add(NewTextField("f2", "this field has NO norms in all docs", Field.Store.NO));

            for (int i = 0; i < 30; i++)
            {
                writer.AddDocument(d);
            }

            // force merge
            writer.ForceMerge(1);
            // flush
            writer.Dispose();

            SegmentReader reader = GetOnlySegmentReader(DirectoryReader.Open(ram));
            FieldInfos fi = reader.FieldInfos;
            Assert.IsTrue(fi.FieldInfo("f1").OmitsNorms(), "OmitNorms field bit should be set.");
            Assert.IsTrue(fi.FieldInfo("f2").OmitsNorms(), "OmitNorms field bit should be set.");

            reader.Dispose();
            ram.Dispose();
        }
Ejemplo n.º 22
0
        /// <summary>
        /// Indexes a given string into the index
        /// </summary>
        /// <param name="text">The text to index</param>
        public void IndexText(string resourcePath)
        {
            FileStream   fs = new FileStream(resourcePath + "/collection.json", FileMode.Open, FileAccess.Read);
            StreamReader sr = new StreamReader(fs);

            //Data jsonToObj = (JsonConvert.DeserializeObject<List<Data>>(sr.ReadToEnd()))[0];
            JArray jArr = JArray.Parse(sr.ReadToEnd());

            foreach (var item in jArr)
            {
                JToken passages = item[PASSAGES];
                string queryId  = item[TEXT_FN_QUERY_ID].ToString();
                foreach (var p in passages)
                {
                    string text = p[TEXT_FN_PASS_TEXT].ToString();
                    string url  = p[TEXT_FN_URL].ToString();
                    string id   = p[TEXT_FN_PASS_ID].ToString();
                    Lucene.Net.Documents.Field field1 = new Field(TEXT_FN_PASS_TEXT, text, Field.Store.YES, Field.Index.ANALYZED_NO_NORMS, Field.TermVector.NO); // Baseline requirement
                    Lucene.Net.Documents.Field field2 = new Field(TEXT_FN_URL, url, Field.Store.YES, Field.Index.ANALYZED_NO_NORMS, Field.TermVector.NO);        // Baseline requirement
                    Lucene.Net.Documents.Field field3 = new Field(TEXT_FN_PASS_ID, id, Field.Store.YES, Field.Index.NO, Field.TermVector.NO);
                    Lucene.Net.Documents.Field field4 = new Field(TEXT_FN_QUERY_ID, queryId, Field.Store.YES, Field.Index.NO, Field.TermVector.NO);

                    Lucene.Net.Documents.Document doc = new Document();
                    doc.Add(field1);
                    doc.Add(field2);
                    doc.Add(field3);
                    doc.Add(field4);

                    writer.AddDocument(doc);
                }
            }

            sr.Close();
        }
Ejemplo n.º 23
0
        public virtual void  TestFarsi()
        {
            /* build an index */
            RAMDirectory farsiIndex = new RAMDirectory();
            IndexWriter  writer     = new IndexWriter(farsiIndex, new SimpleAnalyzer(), T, IndexWriter.MaxFieldLength.LIMITED, null);
            Document     doc        = new Document();

            doc.Add(new Field("content", "\u0633\u0627\u0628", Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.Add(new Field("body", "body", Field.Store.YES, Field.Index.NOT_ANALYZED));
            writer.AddDocument(doc, null);

            writer.Optimize(null);
            writer.Close();

            IndexReader   reader = IndexReader.Open((Directory)farsiIndex, true, null);
            IndexSearcher search = new IndexSearcher(reader);
            Query         q      = new TermQuery(new Term("body", "body"));

            // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
            // RuleBasedCollator.  However, the Arabic Locale seems to order the Farsi
            // characters properly.
            System.Globalization.CompareInfo collator = new System.Globalization.CultureInfo("ar").CompareInfo;

            // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
            // orders the U+0698 character before the U+0633 character, so the single
            // index Term below should NOT be returned by a TermRangeFilter with a Farsi
            // Collator (or an Arabic one for the case when Farsi is not supported).
            int numHits = search.Search(q, new TermRangeFilter("content", "\u062F", "\u0698", T, T, collator), 1000, null).TotalHits;

            Assert.AreEqual(0, numHits, "The index Term should not be included.");

            numHits = search.Search(q, new TermRangeFilter("content", "\u0633", "\u0638", T, T, collator), 1000, null).TotalHits;
            Assert.AreEqual(1, numHits, "The index Term should be included.");
            search.Close();
        }
Ejemplo n.º 24
0
        private void  Add(System.String s, IndexWriter writer)
        {
            Document doc = new Document();

            doc.Add(new Field("body", s, Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc, null);
        }
Ejemplo n.º 25
0
		public virtual void  TestMultiValueSource()
		{
			Directory dir = new MockRAMDirectory();
			IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);
			Document doc = new Document();
			Field f = new Field("field", "", Field.Store.NO, Field.Index.NOT_ANALYZED);
			doc.Add(f);
			
			for (int i = 0; i < 17; i++)
			{
				f.SetValue("" + i);
				w.AddDocument(doc);
				w.Commit();
			}
			
			IndexReader r = w.GetReader();
			w.Close();
			
			Assert.IsTrue(r.GetSequentialSubReaders().Length > 1);
			
			ValueSource s1 = new IntFieldSource("field");
			DocValues v1 = s1.GetValues(r);
			DocValues v2 = new MultiValueSource(s1).GetValues(r);
			
			for (int i = 0; i < r.MaxDoc(); i++)
			{
				Assert.AreEqual(v1.IntVal(i), i);
				Assert.AreEqual(v2.IntVal(i), i);
			}
			
			Lucene.Net.Search.FieldCache_Fields.DEFAULT.PurgeAllCaches();
			
			r.Close();
			dir.Close();
		}
Ejemplo n.º 26
0
        public virtual void  TestDanish()
        {
            /* build an index */
            RAMDirectory danishIndex = new RAMDirectory();
            IndexWriter  writer      = new IndexWriter(danishIndex, new SimpleAnalyzer(), T, IndexWriter.MaxFieldLength.LIMITED, null);

            // Danish collation orders the words below in the given order
            // (example taken from TestSort.testInternationalSort() ).
            System.String[] words = new System.String[] { "H\u00D8T", "H\u00C5T", "MAND" };
            for (int docnum = 0; docnum < words.Length; ++docnum)
            {
                Document doc = new Document();
                doc.Add(new Field("content", words[docnum], Field.Store.YES, Field.Index.NOT_ANALYZED));
                doc.Add(new Field("body", "body", Field.Store.YES, Field.Index.NOT_ANALYZED));
                writer.AddDocument(doc, null);
            }
            writer.Optimize(null);
            writer.Close();

            IndexReader   reader = IndexReader.Open((Directory)danishIndex, true, null);
            IndexSearcher search = new IndexSearcher(reader);

            System.Globalization.CompareInfo c = new System.Globalization.CultureInfo("da" + "-" + "dk").CompareInfo;

            // Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ],
            // but Danish collation does.
            ScoreDoc[] result = search.Search(Csrq("content", "H\u00D8T", "MAND", F, F, c), null, 1000, null).ScoreDocs;
            AssertEquals("The index Term should be included.", 1, result.Length);

            result = search.Search(Csrq("content", "H\u00C5T", "MAND", F, F, c), null, 1000, null).ScoreDocs;
            AssertEquals("The index Term should not be included.", 0, result.Length);
            search.Close();
        }
Ejemplo n.º 27
0
        public override void  SetUp()
        {
            base.SetUp();
            System.String tempDir = System.IO.Path.GetTempPath();
            if (tempDir == null)
            {
                throw new System.IO.IOException("java.io.tmpdir undefined, cannot run test");
            }
            indexDir = new System.IO.DirectoryInfo(Path.Combine(tempDir, "RAMDirIndex"));

            Directory   dir    = FSDirectory.Open(indexDir);
            IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null);
            // add some documents
            Document doc = null;

            for (int i = 0; i < docsToAdd; i++)
            {
                doc = new Document();
                doc.Add(new Field("content", English.IntToEnglish(i).Trim(), Field.Store.YES, Field.Index.NOT_ANALYZED));
                writer.AddDocument(doc, null);
            }
            Assert.AreEqual(docsToAdd, writer.MaxDoc());
            writer.Close();
            dir.Close();
        }
Ejemplo n.º 28
0
        internal virtual void  BuildDir(Directory dir, int nDocs, int maxFields, int maxFieldLen)
        {
            IndexWriter iw = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null);

            iw.SetMaxBufferedDocs(10);
            for (int j = 0; j < nDocs; j++)
            {
                Document d       = new Document();
                int      nFields = r.Next(maxFields);
                for (int i = 0; i < nFields; i++)
                {
                    int flen = r.Next(maxFieldLen);
                    System.Text.StringBuilder sb = new System.Text.StringBuilder("^ ");
                    while (sb.Length < flen)
                    {
                        sb.Append(' ').Append(words[r.Next(words.Length)]);
                    }
                    sb.Append(" $");
                    Field.Store store = Field.Store.YES;                     // make random later
                    Field.Index index = Field.Index.ANALYZED;                // make random later
                    d.Add(new Field("f" + i, sb.ToString(), store, index));
                }
                iw.AddDocument(d, null);
            }
            iw.Close();
        }
Ejemplo n.º 29
0
		public override void  SetUp()
		{
			base.SetUp();
			Document doc;
			
			RAMDirectory rd1 = new RAMDirectory();
			IndexWriter iw1 = new IndexWriter(rd1, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
			
			doc = new Document();
			doc.Add(new Field("field1", "the quick brown fox jumps", Field.Store.YES, Field.Index.ANALYZED));
			doc.Add(new Field("field2", "the quick brown fox jumps", Field.Store.YES, Field.Index.ANALYZED));
			doc.Add(new Field("field4", "", Field.Store.NO, Field.Index.ANALYZED));
			iw1.AddDocument(doc);
			
			iw1.Close();
			RAMDirectory rd2 = new RAMDirectory();
			IndexWriter iw2 = new IndexWriter(rd2, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
			
			doc = new Document();
			doc.Add(new Field("field0", "", Field.Store.NO, Field.Index.ANALYZED));
			doc.Add(new Field("field1", "the fox jumps over the lazy dog", Field.Store.YES, Field.Index.ANALYZED));
			doc.Add(new Field("field3", "the fox jumps over the lazy dog", Field.Store.YES, Field.Index.ANALYZED));
			iw2.AddDocument(doc);
			
			iw2.Close();
			
			this.ir1 = IndexReader.Open(rd1, true);
		    this.ir2 = IndexReader.Open(rd2, true);
		}
Ejemplo n.º 30
0
        public virtual void  TestCompressionTools()
        {
            IFieldable binaryFldCompressed = new Field("binaryCompressed", CompressionTools.Compress(System.Text.UTF8Encoding.UTF8.GetBytes(binaryValCompressed)), Field.Store.YES);
            IFieldable stringFldCompressed = new Field("stringCompressed", CompressionTools.CompressString(binaryValCompressed), Field.Store.YES);

            Document doc = new Document();

            doc.Add(binaryFldCompressed);
            doc.Add(stringFldCompressed);

            /* add the doc to a ram index */
            MockRAMDirectory dir    = new MockRAMDirectory();
            IndexWriter      writer = new IndexWriter(dir, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED, null);

            writer.AddDocument(doc, null);
            writer.Close();

            /* open a reader and fetch the document */
            IndexReader reader        = IndexReader.Open((Directory)dir, false, null);
            Document    docFromReader = reader.Document(0, null);

            Assert.IsTrue(docFromReader != null);

            /* fetch the binary compressed field and compare it's content with the original one */
            System.String binaryFldCompressedTest = new System.String(System.Text.UTF8Encoding.UTF8.GetChars(CompressionTools.Decompress(docFromReader.GetBinaryValue("binaryCompressed", null))));
            Assert.IsTrue(binaryFldCompressedTest.Equals(binaryValCompressed));
            Assert.IsTrue(CompressionTools.DecompressString(docFromReader.GetBinaryValue("stringCompressed", null)).Equals(binaryValCompressed));

            reader.Close();
            dir.Close();
        }
Ejemplo n.º 31
0
        public void IndexFile(string filePath)
        {
            PropertyDescriptors descriptors = new PropertyDescriptors();
            descriptors.LoadData(System.Windows.Forms.Application.StartupPath + "\\PropertyDescriptors.xml");
            Analyzer a = new Lucene.Net.Analysis.Standard.StandardAnalyzer();
            bool create = !(System.IO.Directory.Exists(_idxDir) && IndexReader.IndexExists(_idxDir));
            IndexWriter iw = new IndexWriter(_idxDir, a, create);
            iw.SetUseCompoundFile(true);

            AdDataStream adStream = new AdDataStream(filePath);
            adStream.LoadData();
            foreach (Advert ad in adStream.FetchAd())
            {
                Document doc = new Document();
                foreach (string s in ad.GetDictionary().Keys)
                {
                    string temp = descriptors.GetIndexableFormat(descriptors[s], ad[s]);
                    doc.Add(Field.Text(s, temp));

                }
                iw.AddDocument(doc);
                if (_updateCallback != null)
                {
                    _updateCallback("Added Document: " + ad["Title"]);

                }
            }
            iw.Optimize();
            iw.Close();
        }
Ejemplo n.º 32
0
        public override void  SetUp()
        {
            base.SetUp();
            RAMDirectory directory = new RAMDirectory();
            IndexWriter  writer    = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null);
            long         theLong   = System.Int64.MaxValue;
            double       theDouble = System.Double.MaxValue;
            sbyte        theByte   = (sbyte)System.SByte.MaxValue;
            short        theShort  = System.Int16.MaxValue;
            int          theInt    = System.Int32.MaxValue;
            float        theFloat  = System.Single.MaxValue;

            for (int i = 0; i < NUM_DOCS; i++)
            {
                Document doc = new Document();
                doc.Add(new Field("theLong", System.Convert.ToString(theLong--), Field.Store.NO, Field.Index.NOT_ANALYZED));
                doc.Add(new Field("theDouble", (theDouble--).ToString("E16"), Field.Store.NO, Field.Index.NOT_ANALYZED));
                doc.Add(new Field("theByte", System.Convert.ToString((sbyte)theByte--), Field.Store.NO, Field.Index.NOT_ANALYZED));
                doc.Add(new Field("theShort", System.Convert.ToString(theShort--), Field.Store.NO, Field.Index.NOT_ANALYZED));
                doc.Add(new Field("theInt", System.Convert.ToString(theInt--), Field.Store.NO, Field.Index.NOT_ANALYZED));
                doc.Add(new Field("theFloat", (theFloat--).ToString("E8"), Field.Store.NO, Field.Index.NOT_ANALYZED));
                writer.AddDocument(doc, null);
            }
            writer.Close();
            reader = IndexReader.Open((Directory)directory, true, null);
        }
Ejemplo n.º 33
0
        public void testMissingTerms()
        {
            String fieldName = "field1";
            Directory rd = new RAMDirectory();
            var w = new IndexWriter(rd, new KeywordAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);
            for (int i = 0; i < 100; i++)
            {
                var doc = new Document();
                int term = i*10; //terms are units of 10;
                doc.Add(new Field(fieldName, "" + term, Field.Store.YES, Field.Index.ANALYZED));
                w.AddDocument(doc);
            }
            IndexReader reader = w.GetReader();
            w.Close();

            TermsFilter tf = new TermsFilter();
            tf.AddTerm(new Term(fieldName, "19"));
            FixedBitSet bits = (FixedBitSet) tf.GetDocIdSet(reader);
            Assert.AreEqual(0, bits.Cardinality(), "Must match nothing");

            tf.AddTerm(new Term(fieldName, "20"));
            bits = (FixedBitSet) tf.GetDocIdSet(reader);
            Assert.AreEqual(1, bits.Cardinality(), "Must match 1");

            tf.AddTerm(new Term(fieldName, "10"));
            bits = (FixedBitSet) tf.GetDocIdSet(reader);
            Assert.AreEqual(2, bits.Cardinality(), "Must match 2");

            tf.AddTerm(new Term(fieldName, "00"));
            bits = (FixedBitSet) tf.GetDocIdSet(reader);
            Assert.AreEqual(2, bits.Cardinality(), "Must match 2");

            reader.Close();
            rd.Close();
        }
Ejemplo n.º 34
0
        public override void  SetUp()
        {
            base.SetUp();


            System.String[] data = new System.String[] { "A 1 2 3 4 5 6", "Z       4 5 6", null, "B   2   4 5 6", "Y     3   5 6", null, "C     3     6", "X       4 5 6" };

            index = new RAMDirectory();
            IndexWriter writer = new IndexWriter(index, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null);

            for (int i = 0; i < data.Length; i++)
            {
                Document doc = new Document();
                doc.Add(new Field("id", System.Convert.ToString(i), Field.Store.YES, Field.Index.NOT_ANALYZED)); //Field.Keyword("id",String.valueOf(i)));
                doc.Add(new Field("all", "all", Field.Store.YES, Field.Index.NOT_ANALYZED));                     //Field.Keyword("all","all"));
                if (null != data[i])
                {
                    doc.Add(new Field("data", data[i], Field.Store.YES, Field.Index.ANALYZED));                     //Field.Text("data",data[i]));
                }
                writer.AddDocument(doc, null);
            }

            writer.Optimize(null);
            writer.Close();

            r = IndexReader.Open(index, true, null);
            s = new IndexSearcher(r);

            //System.out.println("Set up " + getName());
        }
Ejemplo n.º 35
0
 public override void  SetUp()
 {
     base.SetUp();
     IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
     //writer.setUseCompoundFile(true);
     //writer.infoStream = System.out;
     for (int i = 0; i < 1000; i++)
     {
         Document doc = new Document();
         Field.TermVector termVector;
         int mod3 = i % 3;
         int mod2 = i % 2;
         if (mod2 == 0 && mod3 == 0)
         {
             termVector = Field.TermVector.WITH_POSITIONS_OFFSETS;
         }
         else if (mod2 == 0)
         {
             termVector = Field.TermVector.WITH_POSITIONS;
         }
         else if (mod3 == 0)
         {
             termVector = Field.TermVector.WITH_OFFSETS;
         }
         else
         {
             termVector = Field.TermVector.YES;
         }
         doc.Add(new Field("field", English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED, termVector));
         writer.AddDocument(doc);
     }
     writer.Close();
     searcher = new IndexSearcher(directory, true);
 }
Ejemplo n.º 36
0
        private static RAMDirectory MakeEmptyIndex(int numDeletedDocs)
        {
            RAMDirectory d = new RAMDirectory();
            IndexWriter  w = new IndexWriter(d, new WhitespaceAnalyzer(), true, MaxFieldLength.LIMITED, null);

            for (int i = 0; i < numDeletedDocs; i++)
            {
                w.AddDocument(new Document(), null);
            }
            w.Commit(null);
            w.DeleteDocuments(null, new MatchAllDocsQuery());
            w.Commit(null);

            if (0 < numDeletedDocs)
            {
                Assert.IsTrue(w.HasDeletions(null), "writer has no deletions");
            }

            Assert.AreEqual(numDeletedDocs, w.MaxDoc(), "writer is missing some deleted docs");
            Assert.AreEqual(0, w.NumDocs(null), "writer has non-deleted docs");
            w.Close();
            IndexReader r = IndexReader.Open((Directory)d, true, null);

            Assert.AreEqual(numDeletedDocs, r.NumDeletedDocs, "reader has wrong number of deleted docs");
            r.Close();
            return(d);
        }
Ejemplo n.º 37
0
        //END
        //this method creates document from an ObjectToIndex
        public void BuildIndex(FileToIndex file)
        {
            using (var analyzer = new Lucene.Net.Analysis.Ru.RussianAnalyzer(Version.LUCENE_30))
            {
                using (IndexWriter idxw = new IndexWriter(_directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED))
                {
                    //check if document exists, if true deletes existing

                    var searchQuery = new TermQuery(new Term("Id", file.Id.ToString()));
                    idxw.DeleteDocuments(searchQuery);
                    //creation
                    Document doc = new Document();
                    doc.Add(new Field("Id", file.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));//аналайзер разбивает строки на слова
                    doc.Add(new Field("Title", file.Title, Field.Store.YES, Field.Index.ANALYZED));
                    doc.Add(new Field("Description", file.Description, Field.Store.YES, Field.Index.ANALYZED));
                    doc.Add(new Field("Authors", file.Authors, Field.Store.YES, Field.Index.ANALYZED));
                    doc.Add(new Field("Text", file.Text, Field.Store.YES, Field.Index.ANALYZED));
                    doc.Add(new Field("Hashtags", file.Hashtags, Field.Store.YES, Field.Index.ANALYZED));
                    doc.Add(new Field("Discipline", file.Discipline, Field.Store.YES, Field.Index.ANALYZED));
                    //write the document to the index
                    idxw.AddDocument(doc);
                    //optimize and close the writer
                    idxw.Commit();

                    idxw.Optimize();

                }
            }
        }
Ejemplo n.º 38
0
        public virtual void TestAddBinaryTwice()
        {
            Analyzer analyzer = new MockAnalyzer(Random());

            Directory directory = NewDirectory();
            // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
            iwc.SetMergePolicy(NewLogMergePolicy());
            IndexWriter iwriter = new IndexWriter(directory, iwc);
            Document doc = new Document();
            doc.Add(new BinaryDocValuesField("dv", new BytesRef("foo!")));
            doc.Add(new BinaryDocValuesField("dv", new BytesRef("bar!")));
            try
            {
                iwriter.AddDocument(doc);
                Assert.Fail("didn't hit expected exception");
            }
            catch (System.ArgumentException expected)
            {
                // expected
            }

            iwriter.Dispose();
            directory.Dispose();
        }
Ejemplo n.º 39
0
		private Directory MakeIndex()
		{
			Directory dir = new RAMDirectory();
			try
			{
				System.Random r = NewRandom();
				Analyzer analyzer = new SimpleAnalyzer();
				IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
				
				writer.SetUseCompoundFile(false);
				
				for (int d = 1; d <= NUM_DOCS; d++)
				{
					Document doc = new Document();
					for (int f = 1; f <= NUM_FIELDS; f++)
					{
						doc.Add(new Field("f" + f, data[f % data.Length] + '#' + data[r.Next(data.Length)], Field.Store.YES, Field.Index.ANALYZED));
					}
					writer.AddDocument(doc);
				}
				writer.Close();
			}
			catch (System.Exception e)
			{
				throw new System.SystemException("", e);
			}
			return dir;
		}
        private static void IndexIndicator(IndicatorMetadata indicatorMetadata,
            IEnumerable<IndicatorMetadataTextProperty> properties, IndexWriter writer)
        {
            Document doc = new Document();
            doc.Add(new Field("id", indicatorMetadata.IndicatorId.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));

            var text = indicatorMetadata.Descriptive;

            StringBuilder sb = new StringBuilder();
            foreach (var indicatorMetadataTextProperty in properties)
            {
                var key = indicatorMetadataTextProperty.ColumnName;

                if (text.ContainsKey(key))
                {
                    sb.Append(text[key]);
                    sb.Append(" ");
                }
            }

            doc.Add(new Field("IndicatorText",
                  sb.ToString().ToLower(), Field.Store.NO,
                  Field.Index.ANALYZED));

            writer.AddDocument(doc);
        }
Ejemplo n.º 41
0
 private static void AddTextToIndex(int txts, string text, IndexWriter writer)
 {
     Document doc = new Document();
     doc.Add(new Field("id", txts.ToString(), Field.Store.YES, Field.Index.UN_TOKENIZED));
     doc.Add(new Field("postBody", text, Field.Store.YES, Field.Index.TOKENIZED));
     writer.AddDocument(doc);
 }
Ejemplo n.º 42
0
		private static Directory MakeIndex()
		{
			Directory dir = new RAMDirectory();
			try
			{
				System.Random r = new System.Random((System.Int32) (BASE_SEED + 42));
				Analyzer analyzer = new SimpleAnalyzer();
				IndexWriter writer = new IndexWriter(dir, analyzer, true);
				
				writer.SetUseCompoundFile(false);
				
				for (int d = 1; d <= NUM_DOCS; d++)
				{
					Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document();
					for (int f = 1; f <= NUM_FIELDS; f++)
					{
						doc.Add(new Field("f" + f, data[f % data.Length] + '#' + data[r.Next(data.Length)], Field.Store.YES, Field.Index.TOKENIZED));
					}
					writer.AddDocument(doc);
				}
				writer.Close();
			}
			catch (System.Exception e)
			{
				throw new System.SystemException("", e);
			}
			return dir;
		}
Ejemplo n.º 43
0
 /// <summary>
 /// 创建索引文档
 /// </summary>
 /// <param name="dic"></param>
 public void AddLuceneIndex(Dictionary<string, string> dic) {
     //var analyzer = new StandardAnalyzer(Version.LUCENE_30);
     var analyzer = GetAnalyzer();
     using (var directory = GetLuceneDirectory())
     using (var writer = new IndexWriter(directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED)) {
         var doc = new Document();
         foreach (KeyValuePair<string, string> pair in dic) {
             // add new index entry
             //Field.Store.YES:表示是否存储原值。
             //只有当Field.Store.YES在后面才能用doc.Get("number")取出值来
             //Field.Index. NOT_ANALYZED:不进行分词保存
             //todo:boost
             if (NotAnalyzeFields.Exists(one => one == pair.Key)) {
                 doc.Add(new Field(pair.Key, pair.Value, Field.Store.YES, Field.Index.NOT_ANALYZED));
             }
             else {
                 doc.Add(new Field(pair.Key, pair.Value, Field.Store.YES, Field.Index.ANALYZED));
             }
         }
         //doc.Boost
         writer.AddDocument(doc);
         writer.Commit();
         writer.Optimize();
         analyzer.Close();
     }
 }
Ejemplo n.º 44
0
        public void CreateIndex(Analyzer analayer) 
        {
            FSDirectory fsDir = new SimpleFSDirectory(new DirectoryInfo(_indexerFolder));
            IndexWriter indexWriter = new IndexWriter(fsDir, analayer, true, Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED);

            string[] files = System.IO.Directory.GetFiles(_textFilesFolder, Config.FileSearchPattern, SearchOption.AllDirectories);
            foreach (string file in files)
            {
                string name = new FileInfo(file).Name;
                string content = File.ReadAllText(file);

                Document doc = new Document();
                doc.Add(new Field(Config.Field_Path, file, Field.Store.YES, Field.Index.NOT_ANALYZED));
                doc.Add(new Field(Config.Field_Name, name, Field.Store.YES, Field.Index.ANALYZED));
                doc.Add(new Field(Config.Field_Content, content, Field.Store.NO, Field.Index.ANALYZED));

                indexWriter.AddDocument(doc);

                Console.WriteLine("{0} - {1}", file, name);
            }

            indexWriter.Optimize();
            indexWriter.Dispose();

            Console.WriteLine("File count: {0}", files.Length);
        }
Ejemplo n.º 45
0
 /// <summary>
 /// Indexes a given string into the index
 /// </summary>
 /// <param name="text">The text to index</param>
 public void IndexText(string text)
 {
     Lucene.Net.Documents.Field    field = new Field(TEXT_FN, text, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES);
     Lucene.Net.Documents.Document doc   = new Document();
     doc.Add(field);
     writer.AddDocument(doc);
 }
Ejemplo n.º 46
0
        public void MrsJones()
        {
            using (var dir = new RAMDirectory())
            using (var analyzer = new LowerCaseKeywordAnalyzer())
            {
                using (var writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED))
                {
                    var document = new Lucene.Net.Documents.Document();
                    document.Add(new Field("Name", "MRS. SHABA", Field.Store.NO, Field.Index.ANALYZED_NO_NORMS));
                    writer.AddDocument(document);
                }

                var searcher = new IndexSearcher(dir, true);

                var termEnum = searcher.IndexReader.Terms();
                while (termEnum.Next())
                {
                    var buffer = termEnum.Term.Text;
                    Console.WriteLine(buffer);
                }

                var queryParser = new RangeQueryParser(Version.LUCENE_29, "", analyzer);
                var query = queryParser.Parse("Name:\"MRS. S*\"");
                Console.WriteLine(query);
                var result = searcher.Search(query, 10);

                Assert.NotEqual(0, result.TotalHits);
            }
        }
Ejemplo n.º 47
0
		public virtual void  TestSimpleSkip()
		{
			RAMDirectory dir = new RAMDirectory();
			IndexWriter writer = new IndexWriter(dir, new PayloadAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
			Term term = new Term("test", "a");
			for (int i = 0; i < 5000; i++)
			{
				Document d1 = new Document();
				d1.Add(new Field(term.Field(), term.Text(), Field.Store.NO, Field.Index.ANALYZED));
				writer.AddDocument(d1);
			}
			writer.Flush();
			writer.Optimize();
			writer.Close();
			
			IndexReader reader = SegmentReader.GetOnlySegmentReader(dir);
			SegmentTermPositions tp = (SegmentTermPositions) reader.TermPositions();
			tp.freqStream_ForNUnit = new CountingStream(this, tp.freqStream_ForNUnit);
			
			for (int i = 0; i < 2; i++)
			{
				counter = 0;
				tp.Seek(term);
				
				CheckSkipTo(tp, 14, 185); // no skips
				CheckSkipTo(tp, 17, 190); // one skip on level 0
				CheckSkipTo(tp, 287, 200); // one skip on level 1, two on level 0
				
				// this test would fail if we had only one skip level,
				// because than more bytes would be read from the freqStream
				CheckSkipTo(tp, 4800, 250); // one skip on level 2
			}
		}
 /// <summary>
 /// 
 /// </summary>
 /// <param name="p"></param>
 /// <param name="writer"></param>
 private static void AddDocumentToIndex(Product p, IndexWriter writer)
 {
     Document doc = new Document();
     doc.Add(new Field("Name",
                        p.Name,
                        Field.Store.YES,
                        Field.Index.ANALYZED,
                        Lucene.Net.Documents.Field.TermVector.YES
                        )
              );
     doc.Add(new Field("Origin",
                        p.Origin.ToString(),
                        Field.Store.YES,
                        Field.Index.ANALYZED,
                        Lucene.Net.Documents.Field.TermVector.YES
                        )
              );
     doc.Add(new Field("Price",
                        p.Price.ToString(),
                        Field.Store.YES,
                        Field.Index.ANALYZED,
                        Lucene.Net.Documents.Field.TermVector.YES
                        )
              );
     writer.AddDocument(doc);
 }
        //to do : read json files, select correct field,add to writer
        public void IndexJsonFile(String path)
        {
            String         jsonPath   = path + "/collection.json";
            JsonSerializer serializer = new JsonSerializer();

            using (FileStream s = File.Open(jsonPath, FileMode.Open))
                using (StreamReader sr = new StreamReader(s))
                    using (JsonReader reader = new JsonTextReader(sr))
                    {
                        while (reader.Read())
                        {
                            // deserialize only when there's "{" character in the stream
                            if (reader.TokenType == JsonToken.StartObject)
                            {
                                //can't index json file here
                                var      p  = serializer.Deserialize <object>(reader);
                                Field    f1 = new Field("Test", p.ToString(), Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
                                Document d1 = new Document();
                                d1.Add(f1);
                                writer.AddDocument(d1);
                            }
                        }
                    }


            //loop the directory and get the json file

            /*
             *          int count = 0;
             *          foreach (System.IO.FileInfo file in files)
             *          {
             *              count++;
             *              if(count > 1)
             *              {
             *                  MessageBox.Show("More than one collection,system will only index the first collection");
             *                  break;//need to be changed
             *              }
             *              String jsonPath = file.Name;
             *              JsonSerializer serializer = new JsonSerializer();
             *              using (FileStream s = File.Open(jsonPath, FileMode.Open))
             *              using (StreamReader sr = new StreamReader(s))
             *              using (JsonReader reader = new JsonTextReader(sr))
             *              {
             *                  while (reader.Read())
             *                  {
             *                      // deserialize only when there's "{" character in the stream
             *                      if (reader.TokenType == JsonToken.StartObject)
             *                      {
             *                          var p = serializer.Deserialize<object>(reader);
             *                          Field f1 = new Field("Test", p.ToString(), Field.Store.NO,             Field.Index.ANALYZED,                               Field.TermVector.WITH_POSITIONS_OFFSETS);
             *                          Document d1 = new Document();
             *                          d1.Add(f1);
             *                          writer.AddDocument(d1);
             *                      }
             *                  }
             *              }
             *           }
             */
        }
Ejemplo n.º 50
0
        // Activity 9

        public void IndexText(string text)
        {
            // TODO: Enter code to index text
            Lucene.Net.Documents.Field    field = new Lucene.Net.Documents.Field("text", text, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
            Lucene.Net.Documents.Document doc   = new Lucene.Net.Documents.Document();
            doc.Add(field);
            writer.AddDocument(doc);
        }
Ejemplo n.º 51
0
        // LUCENE-1404
        private void  AddDoc(IndexWriter writer, System.String id, System.String text)
        {
            Document doc = new Document();

            doc.Add(new Field("id", id, Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.Add(new Field("text", text, Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc, null);
        }
 /// <summary>
 /// Indexes the given text
 /// </summary>
 /// <param name="text">Text to index</param>
 public void IndexText(string text)
 {
     //Done in Week 3 Practical
     Lucene.Net.Documents.Field    field = new Field(TEXT_FN, text, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
     Lucene.Net.Documents.Document doc   = new Document();
     doc.Add(field);
     writer.AddDocument(doc);
 }
Ejemplo n.º 53
0
        private void IndexText(long id, string text)
        {
            Lucene.Net.Documents.Document doc = new Document();
            doc.Add(new Field(QID_FN, id.ToString(), Field.Store.YES, Field.Index.NO, Field.TermVector.NO));
            doc.Add(new Field(TEXT_FN, text, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES));

            writer.AddDocument(doc);
        }
Ejemplo n.º 54
0
 public void BuildIndex(string sBody, string sFileName, string sLastWriteTime)
 {
     Lucene.Net.Documents.Document doc = new Document();
     doc.Add(new Field("Body", sBody + " " + sFileName.Replace(@"\", " ").Replace(".", " "), Field.Store.YES, Field.Index.ANALYZED));
     doc.Add(new Field("FileName", sFileName + string.Empty, Field.Store.YES, Field.Index.NOT_ANALYZED));
     doc.Add(new Field("LastWriteTime", sLastWriteTime, Field.Store.YES, Field.Index.NOT_ANALYZED));
     writer.AddDocument(doc);
 }
Ejemplo n.º 55
0
        public void AddTxt(String txt)
        {
            Document doc  = new Document();
            Field    fTxt = new Field("text", txt, Field.Store.YES, Field.Index.ANALYZED);

            doc.Add(fTxt);
            writer.AddDocument(doc);
        }
Ejemplo n.º 56
0
        public void DoWork(object obj) //chay Parallel.ForEach
        {
            if (obj == null)
            {
                return;
            }
            FileMeta fileMeta = (FileMeta)obj;

            if (string.IsNullOrEmpty(fileMeta.FullName))
            {
                return;
            }

            string sPath = MyConfig.GetMailFolder();

            try
            {
                using (var msg = new MsgReader.Outlook.Storage.Message(fileMeta.FullName))
                {
                    //msg.Type == MsgReader.Outlook.MessageType.Email
                    //msg.Type == MsgReader.Outlook.MessageType.AppointmentRequest
                    //msg.Type == MsgReader.Outlook.MessageType.EmailNonDeliveryReport
                    // etc
                    Document doc = new Document();

                    doc.Add(new Field("SentOn", msg.GetSentOnEx(), Field.Store.YES, Field.Index.NOT_ANALYZED));
                    doc.Add(new Field("ReceivedOn", msg.GetReceivedOnEx(), Field.Store.YES, Field.Index.NOT_ANALYZED));

                    doc.Add(new Field("Subject", msg.Subject.EmptyIfNullEx(), Field.Store.YES, Field.Index.ANALYZED));
                    doc.Add(new Field("Body", msg.BodyText.EmptyIfNullEx(), Field.Store.YES, Field.Index.ANALYZED));

                    doc.Add(new Field("SenderEmailAddress", msg.Sender.Email.EmptyIfNullEx(), Field.Store.YES, Field.Index.ANALYZED));
                    doc.Add(new Field("SenderName", msg.Sender.DisplayName.EmptyIfNullEx(), Field.Store.YES, Field.Index.ANALYZED));

                    doc.Add(new Field("To", msg.GetEmailToEx(), Field.Store.YES, Field.Index.ANALYZED));
                    doc.Add(new Field("CC", msg.GetEmailCcEx(), Field.Store.YES, Field.Index.ANALYZED));

                    doc.Add(new Field("Categories", msg.GetCategoryEx(), Field.Store.YES, Field.Index.ANALYZED));

                    doc.Add(new Field("FlagRequest", msg.GetFlagEx(), Field.Store.YES, Field.Index.NOT_ANALYZED));

                    doc.Add(new Field("Importance", msg.GetImportanceEx(), Field.Store.YES, Field.Index.NOT_ANALYZED));

                    doc.Add(new Field("AttachmentNames", msg.GetAttachmentNames().EmptyIfNullEx(), Field.Store.YES, Field.Index.ANALYZED));

                    doc.Add(new Field("FullFileName", fileMeta.FileNameWithoutPath, Field.Store.YES, Field.Index.NOT_ANALYZED));
                    doc.Add(new Field("LastWriteTime", fileMeta.LastWriteTime, Field.Store.YES, Field.Index.NOT_ANALYZED));

                    Writer.AddDocument(doc);
                    //  ConStackLog.Push(msg.Subject);
                }
            }
            catch (Exception ex)
            {
                ConStackLog.Push(ex.Message + "|" + fileMeta.FullName);
            }
        }
Ejemplo n.º 57
0
 /// <summary>
 /// Indexes the given text
 /// </summary>
 /// <param name="text">Text to index</param>
 public void IndexText(string url, string passage)
 {
     Lucene.Net.Documents.Field    field_URL_text = new Field(TEXT_URL, url, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO);
     Lucene.Net.Documents.Field    field_Text     = new Field(TEXT_PASSAGE, passage, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO);
     Lucene.Net.Documents.Document doc            = new Document();
     doc.Add(field_URL_text);
     doc.Add(field_Text);
     writer.AddDocument(doc);
 }
Ejemplo n.º 58
0
        public void AddToIndex(Passage passage) // Indexes an entry
        {
            // Add all text to one field
            Field    field    = new Field(TEXT_FN, "‰" + passage.url + "‰" + passage.passage_text + "‰" + passage.passage_id, Field.Store.YES, Field.Index.ANALYZED_NO_NORMS, Field.TermVector.NO);
            Document document = new Document();

            document.Add(field);
            writer.AddDocument(document);
        }
Ejemplo n.º 59
0
 /// <summary>
 /// Add the text to the index
 /// </summary>
 /// <param name="text">The text to index</param>
 public void IndexText(string text)
 {
     //System.Console.WriteLine("Indexing " + text);
     Lucene.Net.Documents.Field field = new Field(TEXT_FN, text, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
     //  Lucene.Net.Documents.Field field = new Field("Text", text, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
     Lucene.Net.Documents.Document doc = new Document();
     doc.Add(field);
     writer.AddDocument(doc);
 }
Ejemplo n.º 60
0
        private void  AddDoc(System.String text, IndexWriter iw, float boost)
        {
            Document doc = new Document();
            Field    f   = new Field("key", text, Field.Store.YES, Field.Index.ANALYZED);

            f.Boost = boost;
            doc.Add(f);
            iw.AddDocument(doc, null);
        }