Ejemplo n.º 1
1
        public void TestRollbackIntegrityWithBufferFlush()
        {
            Directory dir = new MockRAMDirectory();
            IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);
            for (int i = 0; i < 5; i++)
            {
                Document doc = new Document();
                doc.Add(new Field("pk", i.ToString(), Field.Store.YES, Field.Index.ANALYZED_NO_NORMS));
                w.AddDocument(doc);
            }
            w.Close();

            // If buffer size is small enough to cause a flush, errors ensue...
            w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);
            w.SetMaxBufferedDocs(2);

            Term pkTerm = new Term("pk", "");
            for (int i = 0; i < 3; i++)
            {
                Document doc = new Document();
                String value = i.ToString();
                doc.Add(new Field("pk", value, Field.Store.YES, Field.Index.ANALYZED_NO_NORMS));
                doc.Add(new Field("text", "foo", Field.Store.YES, Field.Index.ANALYZED_NO_NORMS));
                w.UpdateDocument(pkTerm.CreateTerm(value), doc);
            }
            w.Rollback();

            IndexReader r = IndexReader.Open(dir, true);
            Assert.AreEqual(5, r.NumDocs(), "index should contain same number of docs post rollback");
            r.Close();
            dir.Close();
        }
Ejemplo n.º 2
0
 // ... has multiple qualifications
 private Document MakeQualification(string qualification, int year)
 {
     Document job = new Document();
     job.Add(NewStringField("qualification", qualification, Field.Store.YES));
     job.Add(new IntField("year", year, Field.Store.NO));
     return job;
 }
Ejemplo n.º 3
0
        /// <summary>
        /// BarThread转换成<see cref="Lucene.Net.Documents.Document"/>
        /// </summary>
        /// <param name="BarThread">发帖实体</param>
        /// <returns>Lucene.Net.Documents.Document</returns>
        public static Document Convert(BarThread barThread)
        {
            Document doc = new Document();

            //索引发帖基本信息
            doc.Add(new Field(BarIndexDocument.SectionId, barThread.SectionId.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.Add(new Field(BarIndexDocument.ThreadId, barThread.ThreadId.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.Add(new Field(BarIndexDocument.PostId, "0", Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.Add(new Field(BarIndexDocument.Subject, barThread.Subject.ToLower(), Field.Store.YES, Field.Index.ANALYZED));
            doc.Add(new Field(BarIndexDocument.Body, HtmlUtility.StripHtml(barThread.GetBody(), true, false).ToLower(), Field.Store.NO, Field.Index.ANALYZED));
            doc.Add(new Field(BarIndexDocument.Author, barThread.Author, Field.Store.YES, Field.Index.ANALYZED));
            doc.Add(new Field(BarIndexDocument.IsPost, "0", Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.Add(new Field(BarIndexDocument.DateCreated, DateTools.DateToString(barThread.DateCreated, DateTools.Resolution.DAY), Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.Add(new Field(BarIndexDocument.TenantTypeId, barThread.TenantTypeId, Field.Store.YES, Field.Index.NOT_ANALYZED));

            //索引发帖tag
            TagService tagService = new TagService(TenantTypeIds.Instance().BarThread());

            IEnumerable<ItemInTag> itemInTags = tagService.GetItemInTagsOfItem(barThread.ThreadId);
            foreach (ItemInTag itemInTag in itemInTags)
            {
                doc.Add(new Field(BarIndexDocument.Tag, itemInTag.TagName.ToLower(), Field.Store.YES, Field.Index.ANALYZED));
            }

            return doc;
        }
Ejemplo n.º 4
0
        public void TestReverse()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
            Document doc = new Document();
            doc.Add(NewStringField("value", "foo", Field.Store.NO));
            doc.Add(NewStringField("value", "bar", Field.Store.NO));
            doc.Add(NewStringField("id", "1", Field.Store.YES));
            writer.AddDocument(doc);
            doc = new Document();
            doc.Add(NewStringField("value", "baz", Field.Store.NO));
            doc.Add(NewStringField("id", "2", Field.Store.YES));
            writer.AddDocument(doc);

            IndexReader ir = writer.Reader;
            writer.Dispose();

            IndexSearcher searcher = NewSearcher(ir);
            Sort sort = new Sort(new SortedSetSortField("value", true));

            TopDocs td = searcher.Search(new MatchAllDocsQuery(), 10, sort);
            assertEquals(2, td.TotalHits);
            // 'bar' comes before 'baz'
            assertEquals("2", searcher.Doc(td.ScoreDocs[0].Doc).Get("id"));
            assertEquals("1", searcher.Doc(td.ScoreDocs[1].Doc).Get("id"));

            ir.Dispose();
            dir.Dispose();
        }
        public virtual void TestNGramPrefixGridLosAngeles()
        {
            SpatialContext ctx = SpatialContext.GEO;
            TermQueryPrefixTreeStrategy prefixGridStrategy = new TermQueryPrefixTreeStrategy(new QuadPrefixTree(ctx), "geo");

            Spatial4n.Core.Shapes.IShape point = ctx.MakePoint(-118.243680, 34.052230);

            Document losAngeles = new Document();
            losAngeles.Add(new StringField("name", "Los Angeles", Field.Store.YES));
            foreach (IndexableField field in prefixGridStrategy.CreateIndexableFields(point))
            {
                losAngeles.Add(field);
            }
            losAngeles.Add(new StoredField(prefixGridStrategy.FieldName, point.toString()));//just for diagnostics

            addDocumentsAndCommit(Arrays.AsList(losAngeles));

            // This won't work with simple spatial context...
            SpatialArgsParser spatialArgsParser = new SpatialArgsParser();
            // TODO... use a non polygon query
            //    SpatialArgs spatialArgs = spatialArgsParser.parse(
            //        "Intersects(POLYGON((-127.00390625 39.8125,-112.765625 39.98828125,-111.53515625 31.375,-125.94921875 30.14453125,-127.00390625 39.8125)))",
            //        new SimpleSpatialContext());

            //    Query query = prefixGridStrategy.makeQuery(spatialArgs, fieldInfo);
            //    SearchResults searchResults = executeQuery(query, 1);
            //    assertEquals(1, searchResults.numFound);
        }
Ejemplo n.º 6
0
        /// <summary>
        /// MicroblogEntity转换成<see cref="Lucene.Net.Documents.Document"/>
        /// </summary>
        /// <param name="microblog">微博实体</param>
        /// <returns>Lucene.Net.Documents.Document</returns>
        public static Document Convert(MicroblogEntity microblog)
        {
            Document doc = new Document();
            //索引微博基本信息
            doc.Add(new Field(MicroblogIndexDocument.MicroblogId, microblog.MicroblogId.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
            if (microblog.OriginalMicroblog != null)
            {
                doc.Add(new Field(MicroblogIndexDocument.Body, HtmlUtility.StripHtml(microblog.Body, true, false).ToLower() + HtmlUtility.StripHtml(microblog.OriginalMicroblog.Body, true, false).ToLower(), Field.Store.NO, Field.Index.ANALYZED));
            }
            else
            {
                doc.Add(new Field(MicroblogIndexDocument.Body, HtmlUtility.StripHtml(microblog.Body, true, false).ToLower(), Field.Store.NO, Field.Index.ANALYZED));
            }
            doc.Add(new Field(MicroblogIndexDocument.DateCreated, DateTools.DateToString(microblog.DateCreated, DateTools.Resolution.MILLISECOND), Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.Add(new Field(MicroblogIndexDocument.HasMusic, microblog.HasMusic ? "1" : "0", Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.Add(new Field(MicroblogIndexDocument.HasPhoto, microblog.HasPhoto ? "1" : "0", Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.Add(new Field(MicroblogIndexDocument.HasVideo, microblog.HasVideo ? "1" : "0", Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.Add(new Field(MicroblogIndexDocument.IsOriginality, microblog.ForwardedMicroblogId == 0 ? "1" : "0", Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.Add(new Field(MicroblogIndexDocument.TenantTypeId, microblog.TenantTypeId, Field.Store.YES, Field.Index.NOT_ANALYZED));

            TagService tagService = new TagService(TenantTypeIds.Instance().Microblog());

            IEnumerable<ItemInTag> itemInTags = tagService.GetItemInTagsOfItem(microblog.MicroblogId);
            foreach (ItemInTag itemInTag in itemInTags)
            {
                doc.Add(new Field(MicroblogIndexDocument.Topic, itemInTag.TagName.ToLower(), Field.Store.YES, Field.Index.ANALYZED));
            }

            return doc;
        }
Ejemplo n.º 7
0
        public void MyTestMethod_index()
        {
            string strIndexDir = @"D:\Index";

            Lucene.Net.Store.Directory indexDir = Lucene.Net.Store.FSDirectory.Open(new System.IO.DirectoryInfo(strIndexDir));
            Analyzer std = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30); //Version parameter is used for backward compatibility. Stop words can also be passed to avoid indexing certain words


            using (IndexWriter idxw = new IndexWriter(indexDir, std, true, IndexWriter.MaxFieldLength.UNLIMITED)) //Create an Index writer object.
            {
                Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document();

                //var file = System.IO.File.ReadAllText(
                //    @"d:\test.txt");
                Lucene.Net.Documents.Field fldText = new Lucene.Net.Documents.Field("text", file, Lucene.Net.Documents.Field.Store.YES,
                                                                                    Lucene.Net.Documents.Field.Index.
                                                                                    ANALYZED,
                                                                                    Lucene.Net.Documents.Field.
                                                                                    TermVector.YES);


                doc.Add(fldText);

                doc.Add(new Field("addtime", System.DateTime.Now.ToString(), Lucene.Net.Documents.Field.Store.YES,
                                  Field.Index.ANALYZED, Field.TermVector.YES));

                //write the document to the index
                idxw.AddDocument(doc);
                //optimize and close the writer
                idxw.Optimize();
            }
            Console.WriteLine("Indexing Done");
        }
Ejemplo n.º 8
0
        public override void SetUp()
        {
            base.SetUp();
            _dir = NewDirectory();
            _indexWriter = new RandomIndexWriter(Random(), _dir, new MockAnalyzer(Random()), Similarity, TimeZone);

            FieldType ft = new FieldType(TextField.TYPE_STORED);
            ft.StoreTermVectors = true;
            ft.StoreTermVectorOffsets = true;
            ft.StoreTermVectorPositions = true;

            Analyzer analyzer = new MockAnalyzer(Random());

            Document doc;
            for (int i = 0; i < 100; i++)
            {
                doc = new Document();
                doc.Add(new Field(_idFieldName, Random().toString(), ft));
                doc.Add(new Field(_textFieldName, new StringBuilder(Random().toString()).append(Random().toString()).append(
                    Random().toString()).toString(), ft));
                doc.Add(new Field(_classFieldName, Random().toString(), ft));
                _indexWriter.AddDocument(doc, analyzer);
            }

            _indexWriter.Commit();

            _originalIndex = SlowCompositeReaderWrapper.Wrap(_indexWriter.Reader);
        }
Ejemplo n.º 9
0
 private static void AddTextToIndex(int txts, string text, IndexWriter writer)
 {
     Document doc = new Document();
     doc.Add(new Field("id", txts.ToString(), Field.Store.YES, Field.Index.UN_TOKENIZED));
     doc.Add(new Field("postBody", text, Field.Store.YES, Field.Index.TOKENIZED));
     writer.AddDocument(doc);
 }
        public virtual void  TestPositionIncrementGap()
        {
            Analyzer analyzer = new AnonymousClassAnalyzer(this);

            IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);

            Document doc = new Document();

            doc.Add(new Field("repeated", "repeated one", Field.Store.YES, Field.Index.ANALYZED));
            doc.Add(new Field("repeated", "repeated two", Field.Store.YES, Field.Index.ANALYZED));

            writer.AddDocument(doc);
            writer.Commit();
            SegmentInfo info = writer.NewestSegment();

            writer.Close();
            SegmentReader reader = SegmentReader.Get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);

            TermPositions termPositions = reader.TermPositions(new Term("repeated", "repeated"));

            Assert.IsTrue(termPositions.Next());
            int freq = termPositions.Freq;

            Assert.AreEqual(2, freq);
            Assert.AreEqual(0, termPositions.NextPosition());
            Assert.AreEqual(502, termPositions.NextPosition());
        }
Ejemplo n.º 11
0
        // TODO: refactor call interface: way too many parameters to be legible.
        public void AddDocumentMetadata(bool is_deleted, string fingerprint, string title, string author, string year, string comment, string tag, string annotation, string bibtex, Utilities.BibTex.Parsing.BibTexItem bibtex_item)
        {
            Lucene.Net.Documents.Document document = null;

            // Create the document only if it is not to be deleted
            if (!is_deleted)
            {
                document = new Lucene.Net.Documents.Document();
                document.Add(new Field("fingerprint", fingerprint, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
                document.Add(new Field("page", "0", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));

                StringBuilder content_sb = new StringBuilder();

                AddDocumentMetadata_SB(document, content_sb, "title", title);
                AddDocumentMetadata_SB(document, content_sb, "author", author);
                AddDocumentMetadata_SB(document, content_sb, "year", year);
                AddDocumentMetadata_SB(document, content_sb, "comment", comment);
                AddDocumentMetadata_SB(document, content_sb, "tag", tag);
                AddDocumentMetadata_SB(document, content_sb, "annotation", annotation);
                AddDocumentMetadata_SB(document, content_sb, "bibtex", bibtex);

                AddDocumentMetadata_BibTex(document, bibtex_item);

                string content = content_sb.ToString();
                document.Add(new Field("content", content, Field.Store.NO, Field.Index.ANALYZED));
            }

            AddDocumentPage_INTERNAL(fingerprint, 0, document);
        }
 /// <summary>
 /// 
 /// </summary>
 /// <param name="p"></param>
 /// <param name="writer"></param>
 private static void AddDocumentToIndex(Product p, IndexWriter writer)
 {
     Document doc = new Document();
     doc.Add(new Field("Name",
                        p.Name,
                        Field.Store.YES,
                        Field.Index.ANALYZED,
                        Lucene.Net.Documents.Field.TermVector.YES
                        )
              );
     doc.Add(new Field("Origin",
                        p.Origin.ToString(),
                        Field.Store.YES,
                        Field.Index.ANALYZED,
                        Lucene.Net.Documents.Field.TermVector.YES
                        )
              );
     doc.Add(new Field("Price",
                        p.Price.ToString(),
                        Field.Store.YES,
                        Field.Index.ANALYZED,
                        Lucene.Net.Documents.Field.TermVector.YES
                        )
              );
     writer.AddDocument(doc);
 }
Ejemplo n.º 13
0
 protected override void AddSpecialFields(Document document, Item item)
 {
     Assert.ArgumentNotNull(document, "document");
     Assert.ArgumentNotNull(item, "item");
     document.Add(this.CreateTextField(BuiltinFields.Name, item.Name));
     document.Add(this.CreateDataField(BuiltinFields.Name, item.Name));
     this.DetectRemovalFilterAndProcess(document, item, "DisplayName", BuiltinFields.Name, (itm) => item.Appearance.DisplayName);
     this.DetectRemovalFilterValueField(document, item, "Icon", BuiltinFields.Icon, itm => itm.Appearance.Icon);
     this.DetectRemovalFilterAndProcess(document, item, "Creator", BuiltinFields.Creator, itm => itm.Statistics.CreatedBy);
     this.DetectRemovalFilterAndProcess(document, item, "Editor", BuiltinFields.Editor, itm => itm.Statistics.UpdatedBy);
     this.DetectRemovalFilterAndProcess(document, item, "AllTemplates", BuiltinFields.AllTemplates, this.GetAllTemplates);
     this.DetectRemovalFilterAndProcess(document, item, "TemplateName", BuiltinFields.TemplateName, itm => itm.TemplateName);
     if (this.DetectRemoval("Hidden"))
     {
         if (this.IsHidden(item))
         {
             this.DetectRemovalFilterValueField(document, item, "Hidden", BuiltinFields.Hidden, itm => "1");
         }
     }
     this.DetectRemovalFilterValueField(document, item, "Created", BuiltinFields.Created, itm => item[FieldIDs.Created]);
     this.DetectRemovalFilterValueField(document, item, "Updated", BuiltinFields.Updated, itm => item[FieldIDs.Updated]);
     this.DetectRemovalFilterAndProcess(document, item, "Path", BuiltinFields.Path, this.GetItemPath);
     this.DetectRemovalFilterAndProcess(document, item, "Links", BuiltinFields.Links, this.GetItemLinks);
     var tags = this.Tags;
     if (tags.Length > 0)
     {
         document.Add(this.CreateTextField(BuiltinFields.Tags, tags));
         document.Add(this.CreateDataField(BuiltinFields.Tags, tags));
     }
 }
Ejemplo n.º 14
0
        public void CreateSearchIndex()
        {
            directory = new RAMDirectory();
            analyzer = new StandardAnalyzer(Version.LUCENE_30);
            var ixw = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);
            LookupTable = new Dictionary<string, BaseContent>();
            foreach (BaseContent p in Service.PoIs.ToList())
            {
                var document = new Document();
                document.Add(new Field("id", p.Id.ToString(), Field.Store.YES, Field.Index.NO, Field.TermVector.NO));
                string all = p.Name + " ";
                foreach (MetaInfo mi in p.EffectiveMetaInfo)
                {
                    string value;
                    if (mi.Type != MetaTypes.text || !p.Labels.TryGetValue(mi.Label, out value)) continue;
                    document.Add(new Field(mi.Label, value, Field.Store.YES, Field.Index.ANALYZED));
                    all += value + " ";
                }
                document.Add(new Field("All", all, Field.Store.YES, Field.Index.ANALYZED));

                LookupTable[p.Id.ToString()] = p;
                ixw.AddDocument(document);
            }
            ixw.Commit();
        }
Ejemplo n.º 15
0
 public static Document readTXT(string path)
 {
     Document doc = new Document();
     doc.Add(new Field("Path", path, Field.Store.YES, Field.Index.ANALYZED));
     doc.Add(new Field("Content" , readText(path) , Field.Store.YES, Field.Index.ANALYZED)) ;
     return doc;
 }
Ejemplo n.º 16
0
        /*
         * Phương thức đánh chỉ mục FILE
         */
        private static void BuildIndexFiles(string file, StandardAnalyzer analyzer, FSDirectory indexDir, IndexWriter indexWriter)
        {
            StringBuilder toText = new StringBuilder();
            LDocument     document;

            switch (getExtension(file))
            {
            case ".docx":
                toText = WordToText(file);
                break;

            case ".pdf":
                toText = PdfToText(file);
                break;

            case ".txt":
                toText = TxtToText(file);
                break;
            }

            // File Indexing
            document = new LDocument();

            document.Add(new Field("Filename", file, Field.Store.YES, Field.Index.NOT_ANALYZED));
            document.Add(new Field("Path", file, Field.Store.YES, Field.Index.NOT_ANALYZED));
            document.Add(new Field("Content", toText.ToString(), Field.Store.YES, Field.Index.ANALYZED));
            indexWriter.AddDocument(document);


            indexWriter.Optimize();
            indexWriter.Flush(false, false, false);
        }
Ejemplo n.º 17
0
        public void CreateIndex(Analyzer analayer) 
        {
            FSDirectory fsDir = new SimpleFSDirectory(new DirectoryInfo(_indexerFolder));
            IndexWriter indexWriter = new IndexWriter(fsDir, analayer, true, Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED);

            string[] files = System.IO.Directory.GetFiles(_textFilesFolder, Config.FileSearchPattern, SearchOption.AllDirectories);
            foreach (string file in files)
            {
                string name = new FileInfo(file).Name;
                string content = File.ReadAllText(file);

                Document doc = new Document();
                doc.Add(new Field(Config.Field_Path, file, Field.Store.YES, Field.Index.NOT_ANALYZED));
                doc.Add(new Field(Config.Field_Name, name, Field.Store.YES, Field.Index.ANALYZED));
                doc.Add(new Field(Config.Field_Content, content, Field.Store.NO, Field.Index.ANALYZED));

                indexWriter.AddDocument(doc);

                Console.WriteLine("{0} - {1}", file, name);
            }

            indexWriter.Optimize();
            indexWriter.Dispose();

            Console.WriteLine("File count: {0}", files.Length);
        }
        private static void IndexIndicator(IndicatorMetadata indicatorMetadata,
            IEnumerable<IndicatorMetadataTextProperty> properties, IndexWriter writer)
        {
            Document doc = new Document();
            doc.Add(new Field("id", indicatorMetadata.IndicatorId.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));

            var text = indicatorMetadata.Descriptive;

            StringBuilder sb = new StringBuilder();
            foreach (var indicatorMetadataTextProperty in properties)
            {
                var key = indicatorMetadataTextProperty.ColumnName;

                if (text.ContainsKey(key))
                {
                    sb.Append(text[key]);
                    sb.Append(" ");
                }
            }

            doc.Add(new Field("IndicatorText",
                  sb.ToString().ToLower(), Field.Store.NO,
                  Field.Index.ANALYZED));

            writer.AddDocument(doc);
        }
Ejemplo n.º 19
0
        public static void IndexTopics(CSETWebEntities entity, IndexWriter writer)
        {
            foreach (CATALOG_RECOMMENDATIONS_DATA data in entity.CATALOG_RECOMMENDATIONS_DATA)
            {
                Lucene.Net.Documents.Document lucDoc = new Lucene.Net.Documents.Document();

                string text = "";
                text += " " + data.Heading + " " + data.Requirement + " " + data.Supplemental_Guidance + " " + data.Enhancement;

                lucDoc.Add(new Field(FieldNames.SHORT_NAME, data.Topic_Name, Field.Store.YES, Field.Index.ANALYZED));
                lucDoc.Add(new Field(FieldNames.TEXT, text, Field.Store.YES, Field.Index.ANALYZED));
                lucDoc.Add(new Field(FieldNames.RESOURCE_TYPE, ResourceTypeEnum.Catalog_Recommendation.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
                lucDoc.Add(new Field(FieldNames.DOC_ID, data.Data_Id.ToString(), Field.Store.YES, Field.Index.NO));
                writer.AddDocument(lucDoc);
            }

            foreach (PROCUREMENT_LANGUAGE_DATA data in entity.PROCUREMENT_LANGUAGE_DATA)
            {
                Lucene.Net.Documents.Document lucDoc = new Lucene.Net.Documents.Document();

                string text = "";
                text += " " + data.Basis + " " + data.Language_Guidance + " " + data.Procurement_Language + " " + data.Fatmeasures + " " + data.Satmeasures + " " + data.Maintenance_Guidance;

                lucDoc.Add(new Field(FieldNames.SHORT_NAME, data.Topic_Name, Field.Store.YES, Field.Index.ANALYZED));
                lucDoc.Add(new Field(FieldNames.TEXT, text, Field.Store.YES, Field.Index.ANALYZED));
                lucDoc.Add(new Field(FieldNames.RESOURCE_TYPE, ResourceTypeEnum.Procurement_Language.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));                lucDoc.Add(new Field(FieldNames.DOC_ID, data.Procurement_Id.ToString(), Field.Store.YES, Field.Index.NO));
                writer.AddDocument(lucDoc);
            }
        }
        public virtual void  TestMixedTermVectorSettingsSameField()
        {
            Document doc = new Document();

            // f1 first without tv then with tv
            doc.Add(new Field("f1", "v1", Field.Store.YES, Field.Index.NOT_ANALYZED, TermVector.NO));
            doc.Add(new Field("f1", "v2", Field.Store.YES, Field.Index.NOT_ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
            // f2 first with tv then without tv
            doc.Add(new Field("f2", "v1", Field.Store.YES, Field.Index.NOT_ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
            doc.Add(new Field("f2", "v2", Field.Store.YES, Field.Index.NOT_ANALYZED, TermVector.NO));

            IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT), true,
                                                 IndexWriter.MaxFieldLength.LIMITED);

            writer.AddDocument(doc);
            writer.Close();

            _TestUtil.CheckIndex(dir);

            IndexReader reader = IndexReader.Open(dir, true);
            // f1
            ITermFreqVector tfv1 = reader.GetTermFreqVector(0, "f1");

            Assert.IsNotNull(tfv1);
            Assert.AreEqual(2, tfv1.GetTerms().Length, "the 'with_tv' setting should rule!");
            // f2
            ITermFreqVector tfv2 = reader.GetTermFreqVector(0, "f2");

            Assert.IsNotNull(tfv2);
            Assert.AreEqual(2, tfv2.GetTerms().Length, "the 'with_tv' setting should rule!");
        }
Ejemplo n.º 21
0
        public void CreateIndex()
        {

            Analyzer analyzer = new MockAnalyzer(Random());
            IndexWriter writer = new IndexWriter
              (dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
            try
            {
                for (int docid = 0; docid < NUM_DOCS; docid++)
                {
                    Document d = new Document();
                    d.Add(NewStringField("docid", "" + docid, Field.Store.YES));
                    d.Add(NewStringField("never_load", "fail", Field.Store.YES));
                    foreach (string f in FIELDS)
                    {
                        for (int val = 0; val < NUM_VALUES; val++)
                        {
                            d.Add(NewStringField(f, docid + "_" + f + "_" + val, Field.Store.YES));
                        }
                    }
                    d.Add(NewStringField("load_later", "yes", Field.Store.YES));
                    writer.AddDocument(d);
                }
            }
            finally
            {
                writer.Dispose();
            }
        }
        public virtual void  TestLUCENE_1590()
        {
            Document doc = new Document();

            // f1 has no norms
            doc.Add(new Field("f1", "v1", Field.Store.NO, Field.Index.ANALYZED_NO_NORMS));
            doc.Add(new Field("f1", "v2", Field.Store.YES, Field.Index.NO));
            // f2 has no TF
            Field f = new Field("f2", "v1", Field.Store.NO, Field.Index.ANALYZED);

            f.OmitTermFreqAndPositions = true;
            doc.Add(f);
            doc.Add(new Field("f2", "v2", Field.Store.YES, Field.Index.NO));

            IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED);

            writer.AddDocument(doc);
            writer.Optimize(); // be sure to have a single segment
            writer.Close();

            _TestUtil.CheckIndex(dir);

            SegmentReader reader = SegmentReader.GetOnlySegmentReader(dir);
            FieldInfos    fi     = reader.FieldInfos();

            // f1
            Assert.IsFalse(reader.HasNorms("f1"), "f1 should have no norms");
            Assert.IsFalse(fi.FieldInfo("f1").omitTermFreqAndPositions_ForNUnit, "omitTermFreqAndPositions field bit should not be set for f1");
            // f2
            Assert.IsTrue(reader.HasNorms("f2"), "f2 should have norms");
            Assert.IsTrue(fi.FieldInfo("f2").omitTermFreqAndPositions_ForNUnit, "omitTermFreqAndPositions field bit should be set for f2");
        }
Ejemplo n.º 23
0
        private IndexWriter InitIndex(IConcurrentMergeScheduler scheduler, Random random, MockDirectoryWrapper dir, bool initialCommit)
        {
            dir.LockFactory = NoLockFactory.DoNoLockFactory;

            scheduler.SetSuppressExceptions();

            IndexWriter writer = new IndexWriter(dir,
                NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))
                .SetMaxBufferedDocs(10)
                .SetMergeScheduler(scheduler));

            if (initialCommit)
            {
                writer.Commit();
            }

            Document doc = new Document();
            doc.Add(NewTextField("content", "aaa", Field.Store.NO));
            doc.Add(NewTextField("id", "0", Field.Store.NO));
            for (int i = 0; i < 157; i++)
            {
                writer.AddDocument(doc);
            }

            return writer;
        }
Ejemplo n.º 24
0
        public void IndexDocuments(IEnumerable <Document> documents)
        {
            try
            {
                var  analyzer         = new StandardAnalyzer(Version.LUCENE_30);
                bool createIndexFiles = !LuceneDirectory.FileExists("segments.gen");
                using (var writer = new IndexWriter(LuceneDirectory, analyzer, createIndexFiles, Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED))
                {
                    try
                    {
                        foreach (var document in documents.Where(d => d.FileExtension == ".pdf"))
                        {
                            string documentBody = GetPlainTextFromDocument(document);

                            var doc = new LuceneDocument();

                            doc.Add(new Field("FileID", document.FileID.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
                            doc.Add(new Field("Title", document.Title, Field.Store.YES, Field.Index.ANALYZED));
                            doc.Add(new Field("Body", documentBody, Field.Store.YES, Field.Index.ANALYZED));

                            writer.AddDocument(doc);
                        }

                        writer.Optimize();
                    }
                    catch { }
                    finally
                    {
                        analyzer.Close();
                    }
                }
            }
            catch { }
        }
        private static void AddDocuments(IndexWriter writer) {
            var pages = PagesMetadata.Instance;
            var posts = PostsMetadata.Instance;

            foreach (var page in pages.List) {
                var doc = new Document();

                doc.Add(new Field("Url", "/" + page.Slug, Field.Store.YES, Field.Index.NOT_ANALYZED));
                doc.Add(new Field("Title", page.Title, Field.Store.YES, Field.Index.ANALYZED));
                doc.Add(new Field("Body", new Page(page.Slug, pages).BodyWithoutHtml, Field.Store.YES, Field.Index.ANALYZED));

                writer.AddDocument(doc);
            }

            foreach (var post in posts.List) {
                var doc = new Document();

                doc.Add(new Field("Url", "/blog/" + post.Slug, Field.Store.YES, Field.Index.NOT_ANALYZED));
                doc.Add(new Field("Title", post.Title, Field.Store.YES, Field.Index.ANALYZED));
                doc.Add(new Field("Description", post.ShortDescription, Field.Store.YES, Field.Index.ANALYZED));
                if (post.PublishDate != DateTime.MinValue)
                    doc.Add(new Field("PublishDate", post.PublishDate.ToString("dd MMMM yyyy"), Field.Store.YES, Field.Index.NOT_ANALYZED));
                if (post.LastUpdatedDate != DateTime.MinValue)
                    doc.Add(new Field("LastUpdatedDate", post.LastUpdatedDate.ToString("dd MMMM yyyy"), Field.Store.YES, Field.Index.NOT_ANALYZED));
                doc.Add(new Field("Author", post.Author, Field.Store.YES, Field.Index.ANALYZED));
                doc.Add(new Field("Body", new Post(post.Slug, posts).BodyWithoutHtml, Field.Store.YES, Field.Index.ANALYZED));

                writer.AddDocument(doc);
            }
        }
Ejemplo n.º 26
0
        public override void WriteEndVersion(Process process, AbstractConnection input, Entity entity, bool force = false) {
            if (entity.Updates + entity.Inserts <= 0 && !force)
                return;

            var versionType = entity.Version == null ? "string" : entity.Version.SimpleType;
            var end = entity.End ?? new DefaultFactory(Logger).Convert(entity.End, versionType);

            using (var dir = LuceneDirectoryFactory.Create(this, TflBatchEntity(entity.ProcessName))) {
                using (var writer = new IndexWriter(dir, new KeywordAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED)) {
                    var doc = new Document();
                    doc.Add(new NumericField("id", global::Lucene.Net.Documents.Field.Store.YES, true).SetIntValue(entity.TflBatchId));
                    doc.Add(new global::Lucene.Net.Documents.Field("process", entity.ProcessName, global::Lucene.Net.Documents.Field.Store.YES, global::Lucene.Net.Documents.Field.Index.NOT_ANALYZED_NO_NORMS));
                    doc.Add(new global::Lucene.Net.Documents.Field("connection", input.Name, global::Lucene.Net.Documents.Field.Store.YES, global::Lucene.Net.Documents.Field.Index.NOT_ANALYZED_NO_NORMS));
                    doc.Add(new global::Lucene.Net.Documents.Field("entity", entity.Alias, global::Lucene.Net.Documents.Field.Store.YES, global::Lucene.Net.Documents.Field.Index.NOT_ANALYZED_NO_NORMS));
                    doc.Add(new NumericField("updates", global::Lucene.Net.Documents.Field.Store.YES, true).SetLongValue(entity.Updates));
                    doc.Add(new NumericField("inserts", global::Lucene.Net.Documents.Field.Store.YES, true).SetLongValue(entity.Inserts));
                    doc.Add(new NumericField("deletes", global::Lucene.Net.Documents.Field.Store.YES, true).SetLongValue(entity.Deletes));
                    doc.Add(LuceneWriter.CreateField("version", versionType, new SearchType { Analyzer = "keyword" }, end));
                    doc.Add(new global::Lucene.Net.Documents.Field("version_type", versionType, global::Lucene.Net.Documents.Field.Store.YES, global::Lucene.Net.Documents.Field.Index.NOT_ANALYZED_NO_NORMS));
                    doc.Add(new NumericField("tflupdate", global::Lucene.Net.Documents.Field.Store.YES, true).SetLongValue(DateTime.UtcNow.Ticks));
                    writer.AddDocument(doc);
                    writer.Commit();
                    writer.Optimize();
                }
            }
        }
Ejemplo n.º 27
0
        public virtual void TestBadPrefixTreePrune()
        {

            trie = new QuadPrefixTree(ctx, 12);
            TermQueryPrefixTreeStrategy strategy = new TermQueryPrefixTreeStrategy(trie, "geo");
            Document doc = new Document();
            doc.Add(new TextField("id", "1", Field.Store.YES));

            IShape area = ctx.MakeRectangle(-122.82, -122.78, 48.54, 48.56);

            Field[] fields = strategy.CreateIndexableFields(area, 0.025);
            foreach (Field field in fields)
            {
                doc.Add(field);
            }
            AddDocument(doc);

            IPoint upperleft = ctx.MakePoint(-122.88, 48.54);
            IPoint lowerright = ctx.MakePoint(-122.82, 48.62);

            Query query = strategy.MakeQuery(new SpatialArgs(SpatialOperation.Intersects, ctx.MakeRectangle(upperleft, lowerright)));

            Commit();

            TopDocs search = indexSearcher.Search(query, 10);
            ScoreDoc[] scoreDocs = search.ScoreDocs;
            foreach (ScoreDoc scoreDoc in scoreDocs)
            {
                Console.WriteLine(indexSearcher.Doc(scoreDoc.Doc));
            }

            assertEquals(1, search.TotalHits);
        }
Ejemplo n.º 28
0
 /// <summary>
 /// 创建索引文档
 /// </summary>
 /// <param name="dic"></param>
 public void AddLuceneIndex(Dictionary<string, string> dic) {
     //var analyzer = new StandardAnalyzer(Version.LUCENE_30);
     var analyzer = GetAnalyzer();
     using (var directory = GetLuceneDirectory())
     using (var writer = new IndexWriter(directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED)) {
         var doc = new Document();
         foreach (KeyValuePair<string, string> pair in dic) {
             // add new index entry
             //Field.Store.YES:表示是否存储原值。
             //只有当Field.Store.YES在后面才能用doc.Get("number")取出值来
             //Field.Index. NOT_ANALYZED:不进行分词保存
             //todo:boost
             if (NotAnalyzeFields.Exists(one => one == pair.Key)) {
                 doc.Add(new Field(pair.Key, pair.Value, Field.Store.YES, Field.Index.NOT_ANALYZED));
             }
             else {
                 doc.Add(new Field(pair.Key, pair.Value, Field.Store.YES, Field.Index.ANALYZED));
             }
         }
         //doc.Boost
         writer.AddDocument(doc);
         writer.Commit();
         writer.Optimize();
         analyzer.Close();
     }
 }
        public void CreateIndex()
        {
            IProductService productService = new ProductService();
            int count = productService.GetProductCount(string.Empty);
            var data = productService.GetProducts(count, 1, string.Empty);

            //设置为多文件索引的格式,默认情况下为true,会建立复合索引的文件结构,这里为了分析,先设置为false,生成多文件的索引结构
            //this.indexWriter.SetUseCompoundFile(false);

            foreach (var productInfo in data)
            {
                var doc = new Document();
                var field1 = new Field("title", productInfo.Title, Field.Store.YES, Field.Index.ANALYZED);
                // 向文档中添加域
                doc.Add(field1);
                field1 = new Field("Category", productInfo.CategoryName, Field.Store.YES, Field.Index.ANALYZED);
                doc.Add(field1);
                field1 = new Field("Desc", productInfo.Desc??"", Field.Store.YES, Field.Index.ANALYZED);
                doc.Add(field1);
                this.indexWriter.AddDocument(doc);
            }

            // 优化索引结构
            this.indexWriter.Optimize();

            this.indexWriter.Commit();
            // 关闭写入
            this.indexWriter.Close();
        }
Ejemplo n.º 30
0
        public Engine()
        {
            var directory = new RAMDirectory();
            var analyzer = new StandardAnalyzer(Version.LUCENE_30);

            using (var indexWriter = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED))
            {
                for (int i = 0; i < 10000; i++)
                {
                    Console.Write(".");
                    var document = new Document();
                    document.Add(new Field("Id", i.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
                    document.Add(new Field("Name", "Name" + i.ToString(), Field.Store.YES, Field.Index.ANALYZED));
                    indexWriter.AddDocument(document);
                }
            }

            Console.ReadKey();

            var queryParser = new QueryParser(Version.LUCENE_30, "Name", analyzer);
            var query = queryParser.Parse("Name37~");

            IndexReader indexReader = IndexReader.Open(directory, true);
            var searcher = new IndexSearcher(indexReader);

            TopDocs resultDocs = searcher.Search(query, indexReader.MaxDoc);
        }
Ejemplo n.º 31
0
        public void Set(string name, object value, Document document, Field.Store store, Field.Index index, float? boost)
        {
            DateTime date = (DateTime) value;

            int year = date.Year;
            int month = date.Month;
            int day = date.Day;

            // set year
            Field field = new Field(name + ".year", year.ToString(), store, index);
            if (boost != null)
            {
                field.SetBoost(boost.Value);
            }
            document.Add(field);

            // set month and pad it if necessary
            field = new Field(name + ".month", month.ToString("D2"), store, index);
            if (boost != null)
            {
                field.SetBoost(boost.Value);
            }
            document.Add(field);

            // set day and pad it if necessary
            field = new Field(name + ".day", day.ToString("D2"), store, index);
            if (boost != null)
            {
                field.SetBoost(boost.Value);
            }
            document.Add(field);

            throw new NotImplementedException();
        }
Ejemplo n.º 32
0
        public Document BuildRecord()
        {
            var doc = new Document();

            var numericField = new NumericField("DatabaseID", Field.Store.YES, false);
            numericField.SetIntValue(Email.ID);
            doc.Add(numericField);

            var field = new Field("UniqueID", UniqueID, Field.Store.YES, Field.Index.NOT_ANALYZED);
            doc.Add(field);

            field = new Field("Title", Title, Field.Store.YES, Field.Index.NOT_ANALYZED);
            doc.Add(field);

            field = new Field("Description", Description, Field.Store.YES, Field.Index.NOT_ANALYZED);
            doc.Add(field);

            field = new Field("Type", Type, Field.Store.YES, Field.Index.ANALYZED);
            doc.Add(field);

               /* field = new Field("Name", EventDescription.Name, Field.Store.YES, Field.Index.ANALYZED);
            doc.Add(field);*/

            return doc;
        }
Ejemplo n.º 33
0
 static void AddDocument(int id, string title, IndexWriter writer)
 {
     Document doc = new Document();
     doc.Add(new Field("id", id.ToString(), Field.Store.YES, Field.Index.UN_TOKENIZED));
     doc.Add(new Field("title", title, Field.Store.YES, Field.Index.TOKENIZED));
     writer.AddDocument(doc);
 }
Ejemplo n.º 34
0
        public virtual void TestRollbackIntegrityWithBufferFlush()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter rw = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
            for (int i = 0; i < 5; i++)
            {
                Document doc = new Document();
                doc.Add(NewStringField("pk", Convert.ToString(i), Field.Store.YES));
                rw.AddDocument(doc);
            }
            rw.Dispose();

            // If buffer size is small enough to cause a flush, errors ensue...
            IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(2).SetOpenMode(IndexWriterConfig.OpenMode_e.APPEND));

            for (int i = 0; i < 3; i++)
            {
                Document doc = new Document();
                string value = Convert.ToString(i);
                doc.Add(NewStringField("pk", value, Field.Store.YES));
                doc.Add(NewStringField("text", "foo", Field.Store.YES));
                w.UpdateDocument(new Term("pk", value), doc);
            }
            w.Rollback();

            IndexReader r = DirectoryReader.Open(dir);
            Assert.AreEqual(5, r.NumDocs, "index should contain same number of docs post rollback");
            r.Dispose();
            dir.Dispose();
        }
Ejemplo n.º 35
0
        public void AddOrUpdateDocuments(params CmsDocument[] documents)
        {
            DeleteDocuments(documents);
            using (var writer = new IndexWriter(_Directory, _Analyzer, false, new IndexWriter.MaxFieldLength(1024 * 1024 * 4)))
            {
                foreach (var document in documents)
                {
                    if (document.Id == Guid.Empty)
                        throw new ArgumentOutOfRangeException("Attempt to index transient document: " + document.Title);

                    var doc = new Document();
                    doc.Add(new Field(CmsDocumentField.Id.ToString(), document.Id.ToString("b"), Field.Store.YES, Field.Index.NOT_ANALYZED));
                    if (!String.IsNullOrEmpty(document.Title))
                        doc.Add(new Field(CmsDocumentField.Title.ToString(), document.Title, Field.Store.YES, Field.Index.ANALYZED));
                    foreach (var tag in document.Tags)
                    {
                        doc.Add(new Field(CmsDocumentField.Tag.ToString(), tag, Field.Store.YES, Field.Index.ANALYZED));
                    }
                    foreach (var partValue in document.Parts.Select(p => p.Value))
                    {
                        if(!String.IsNullOrEmpty(partValue))
                            doc.Add(new Field(CmsDocumentField.Value.ToString(), partValue, Field.Store.NO, Field.Index.ANALYZED));
                    }
                    writer.AddDocument(doc);
                }
                writer.Flush(true, true, true);
            }
        }
        public void TestMax()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
            Document doc = new Document();
            doc.Add(new SortedSetDocValuesField("value", new BytesRef("foo")));
            doc.Add(new SortedSetDocValuesField("value", new BytesRef("bar")));
            doc.Add(NewStringField("id", "1", Field.Store.YES));
            writer.AddDocument(doc);
            doc = new Document();
            doc.Add(new SortedSetDocValuesField("value", new BytesRef("baz")));
            doc.Add(NewStringField("id", "2", Field.Store.YES));
            writer.AddDocument(doc);
            IndexReader ir = writer.Reader;
            writer.Dispose();

            // slow wrapper does not support random access ordinals (there is no need for that!)
            IndexSearcher searcher = NewSearcher(ir, false);

            Sort sort = new Sort(new SortedSetSortField("value", false, Selector.MAX));

            TopDocs td = searcher.Search(new MatchAllDocsQuery(), 10, sort);
            assertEquals(2, td.TotalHits);
            // 'baz' comes before 'foo'
            assertEquals("2", searcher.Doc(td.ScoreDocs[0].Doc).Get("id"));
            assertEquals("1", searcher.Doc(td.ScoreDocs[1].Doc).Get("id"));
            assertNoFieldCaches();

            ir.Dispose();
            dir.Dispose();
        }
        public void CreateIndex(List<ISearchEntity> CreateEntities)
        {
            Analyzer analyzer = new StandardAnalyzer();
            IndexWriter writer = new IndexWriter(ConfigElement.IndexDirectory, analyzer, true);
            //第三个参数:是否重新创建索引,True 一律清空 重新建立 False 原有基础上增量添加索引

            foreach (ISearchEntity IndexEntity in CreateEntities)
            {
                ProductModel product = (ProductModel)IndexEntity;

                Document doc = new Document();

                doc.Add(new Field("productid", Convert.ToString(product.EntityIdentity), Field.Store.YES, Field.Index.UN_TOKENIZED));
                doc.Add(new Field("productname", Convert.ToString(product.ProductName), Field.Store.YES, Field.Index.TOKENIZED));
                doc.Add(new Field("cateid", Convert.ToString(product.CategoryID), Field.Store.YES, Field.Index.UN_TOKENIZED));
                doc.Add(new Field("catepath", Convert.ToString(product.CategoryPath), Field.Store.YES, Field.Index.UN_TOKENIZED));
                doc.Add(new Field("keywords", Convert.ToString(product.Keywords), Field.Store.YES, Field.Index.TOKENIZED));
                doc.Add(new Field("description", Convert.ToString(product.Description), Field.Store.YES, Field.Index.TOKENIZED));
                doc.Add(new Field("price", Convert.ToString(product.Price), Field.Store.YES, Field.Index.UN_TOKENIZED));
                doc.Add(new Field("createtime", Convert.ToString(product.CreateTime), Field.Store.YES, Field.Index.UN_TOKENIZED));
                doc.Add(new Field("updatetime", Convert.ToString(product.UpdateTime), Field.Store.YES, Field.Index.UN_TOKENIZED));
                doc.Add(new Field("mainimage", Convert.ToString(product.ProductImage), Field.Store.YES, Field.Index.UN_TOKENIZED));

                writer.AddDocument(doc);
                Console.WriteLine("created index for {0}:{1}", product.EntityIdentity, product.ProductName);
            }

            writer.Optimize();
            writer.Close();
        }
Ejemplo n.º 38
0
        private void  AddDoc(IndexWriter iw, int i)
        {
            Document   d = new Document();
            IFieldable f;
            int        scoreAndID = i + 1;

            f           = new Field(ID_FIELD, Id2String(scoreAndID), Field.Store.YES, Field.Index.NOT_ANALYZED); // for debug purposes
            f.OmitNorms = true;
            d.Add(f);

            f           = new Field(TEXT_FIELD, "text of doc" + scoreAndID + TextLine(i), Field.Store.NO, Field.Index.ANALYZED); // for regular search
            f.OmitNorms = true;
            d.Add(f);

            f           = new Field(INT_FIELD, "" + scoreAndID, Field.Store.NO, Field.Index.NOT_ANALYZED); // for function scoring
            f.OmitNorms = true;
            d.Add(f);

            f           = new Field(FLOAT_FIELD, scoreAndID + ".000", Field.Store.NO, Field.Index.NOT_ANALYZED); // for function scoring
            f.OmitNorms = true;
            d.Add(f);

            iw.AddDocument(d);
            Log("added: " + d);
        }
 private void AddDoc(IndexWriter writer, String name, String id)
 {
     Document doc = new Document();
     doc.Add(new Field("name", name, Field.Store.YES, Field.Index.ANALYZED));
     doc.Add(new Field("id", id, Field.Store.YES, Field.Index.ANALYZED));
     writer.AddDocument(doc);
 }
    // This method indexes the given text.
    private static void AddToIndex(int id, string text, IndexWriter writer)
    {
        Term term = new Term("id", id.ToString());

        Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document();
        doc.Add(new Field("id", id.ToString(), Field.Store.YES, Field.Index.ANALYZED));
        doc.Add(new Field("mainText", text, Field.Store.YES, Field.Index.ANALYZED));
        writer.AddDocument(doc);
        writer.UpdateDocument(term, doc);
    }
Ejemplo n.º 41
0
        public void CreateIndex(Analyzer analayer)
        {
            FSDirectory fsDir         = new SimpleFSDirectory(new DirectoryInfo(_indexerFolder));
            IndexWriter indexWriter   = new IndexWriter(fsDir, analayer, true, Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED);
            Stopwatch   stopWatch     = Stopwatch.StartNew();
            int         analyzedCount = 0;

            string[] files = System.IO.Directory.GetFiles(_textFilesFolder, this._fileSearchPattern, SearchOption.AllDirectories);

            //统计需要索引的文件页数
            int totalPages = GetTotalPages(files);

            WriteLog("Total pages statistics takes {0}ms", stopWatch.Elapsed.Milliseconds);

            stopWatch.Restart();

            TextAbsorber textAbsorber = new TextAbsorber();

            //开始索引
            foreach (string pdfFile in files)
            {
                var fileInfo = new FileInfo(pdfFile);
                var fileName = fileInfo.Name;
                Aspose.Pdf.Document pdfDocument = new Aspose.Pdf.Document(pdfFile);

                WriteLog("Current file is {0}", pdfFile);

                //注意pdf页码从1开始
                for (int i = 1; i <= pdfDocument.Pages.Count; i++)
                {
                    Page page = pdfDocument.Pages[i];
                    page.Accept(textAbsorber);
                    string pageContent = textAbsorber.Text;

                    Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document();
                    doc.Add(new Field(LuceneConfig.Field_Path, pdfFile, Field.Store.YES, Field.Index.NOT_ANALYZED));
                    doc.Add(new Field(LuceneConfig.Field_FileName, fileName, Field.Store.YES, Field.Index.ANALYZED));
                    doc.Add(new Field(LuceneConfig.Field_PageNumber, i.ToString(), Field.Store.YES, Field.Index.ANALYZED));
                    doc.Add(new Field(LuceneConfig.Field_ContentByPage, pageContent, Field.Store.NO, Field.Index.ANALYZED));

                    indexWriter.AddDocument(doc);

                    analyzedCount++;

                    RaiseProgressChanged(analyzedCount * 100 / totalPages);
                }
            }

            indexWriter.Optimize();
            indexWriter.Dispose();

            stopWatch.Stop();
            Console.WriteLine("All completed. It takes {0}ms", stopWatch.Elapsed);
        }
Ejemplo n.º 42
0
        private void  AddNoProxDoc(IndexWriter writer)
        {
            Document doc = new Document();
            Field    f   = new Field("content3", "aaa", Field.Store.YES, Field.Index.ANALYZED);

            f.OmitTermFreqAndPositions = true;
            doc.Add(f);
            f = new Field("content4", "aaa", Field.Store.YES, Field.Index.NO);
            f.OmitTermFreqAndPositions = true;
            doc.Add(f);
            writer.AddDocument(doc);
        }
Ejemplo n.º 43
0
        private void  AddDoc(IndexWriter writer, int id)
        {
            Document doc = new Document();

            doc.Add(new Field("content", "aaa", Field.Store.NO, Field.Index.ANALYZED));
            doc.Add(new Field("id", System.Convert.ToString(id), Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.Add(new Field("autf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
            doc.Add(new Field("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
            doc.Add(new Field("content2", "here is more content with aaa aaa aaa", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
            doc.Add(new Field("fie\u2C77ld", "field with non-ascii name", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));

            /* This was used in 2.9 to generate an index with compressed field:
             *          if (id % 2 == 0)
             *          {
             *                  doc.Add(new Field("compressed", TEXT_TO_COMPRESS, Field.Store.COMPRESS, Field.Index.NOT_ANALYZED));
             *                  doc.Add(new Field("compressedSize", System.Convert.ToString(TEXT_COMPRESSED_LENGTH), Field.Store.YES, Field.Index.NOT_ANALYZED));
             *          }
             *          else
             *          {
             *                  doc.Add(new Field("compressed", BINARY_TO_COMPRESS, Field.Store.COMPRESS));
             *                  doc.Add(new Field("compressedSize", System.Convert.ToString(BINARY_COMPRESSED_LENGTH), Field.Store.YES, Field.Index.NOT_ANALYZED));
             *          }*/
            // Add numeric fields, to test if flex preserves encoding
            doc.Add(new NumericField("trieInt", 4).SetIntValue(id));
            doc.Add(new NumericField("trieLong", 4).SetLongValue(id));
            writer.AddDocument(doc);
        }
Ejemplo n.º 44
0
        /// <summary>
        /// 添加单个索引数据
        /// </summary>
        /// <param name="data"></param>
        /// <param name="writer"></param>
        private static void AddToLuceneIndex(SampleData data, IndexWriter writer)
        {
            var searchQuery = new TermQuery(new Term("Id", data.Id.ToString()));

            writer.DeleteDocuments(searchQuery);

            var doc = new Lucene.Net.Documents.Document();

            doc.Add(new Field("Id", data.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.Add(new Field("Name", data.Name, Field.Store.YES, Field.Index.ANALYZED));
            doc.Add(new Field("Description", data.Description, Field.Store.YES, Field.Index.ANALYZED));

            writer.AddDocument(doc);
        }
Ejemplo n.º 45
0
        public void AddDocumentPage(bool is_deleted, string fingerprint, int page, string content)
        {
            Lucene.Net.Documents.Document document = null;

            // Create the document only if it is not to be deleted
            if (!is_deleted)
            {
                document = new Lucene.Net.Documents.Document();
                document.Add(new Field("fingerprint", fingerprint, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
                document.Add(new Field("page", Convert.ToString(page), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
                document.Add(new Field("content", content, Field.Store.NO, Field.Index.ANALYZED));
            }

            AddDocumentPage_INTERNAL(fingerprint, page, document);
        }
Ejemplo n.º 46
0
        public Document Create(SearchItem searchItem)
        {
            var document = new Lucene.Net.Documents.Document();

            foreach (var keyword in searchItem.Keywords)
            {
                document.Add(this.CreateKeywordField(Constants.Search.Keyword, keyword));
            }

            document.Add(this.CreateStoredField(Constants.Search.EntityID, searchItem.EntityID.ToString()));
            document.Add(this.CreateIndexedTextField(Constants.Search.Abstract, searchItem.Abstract));
            document.Add(this.CreateIndexedTextField(Constants.Search.Text, searchItem.Text));

            return(document);
        }
        public virtual void  TestTokenReuse()
        {
            Analyzer analyzer = new AnonymousClassAnalyzer1(this);

            IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);

            Document doc = new Document();

            doc.Add(new Field("f1", "a 5 a a", Field.Store.YES, Field.Index.ANALYZED));

            writer.AddDocument(doc);
            writer.Commit();
            SegmentInfo info = writer.NewestSegment();

            writer.Close();
            SegmentReader reader = SegmentReader.Get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);

            TermPositions termPositions = reader.TermPositions(new Term("f1", "a"));

            Assert.IsTrue(termPositions.Next());
            int freq = termPositions.Freq;

            Assert.AreEqual(3, freq);
            Assert.AreEqual(0, termPositions.NextPosition());
            Assert.AreEqual(true, termPositions.IsPayloadAvailable);
            Assert.AreEqual(6, termPositions.NextPosition());
            Assert.AreEqual(false, termPositions.IsPayloadAvailable);
            Assert.AreEqual(7, termPositions.NextPosition());
            Assert.AreEqual(false, termPositions.IsPayloadAvailable);
        }
Ejemplo n.º 48
0
 // Indexing...
 public void IndexText(List <Collection> collections)
 {
     foreach (Collection c in collections)
     {
         Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document();
         // TODO: Enter code to index text
         Lucene.Net.Documents.Field field_DocID = new Lucene.Net.Documents.Field("DocID", c.DocID, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
         doc.Add(field_DocID);
         Lucene.Net.Documents.Field field_Title = new Lucene.Net.Documents.Field("Title", c.Title, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
         doc.Add(field_Title);
         doc.Add(new Lucene.Net.Documents.Field("Author", c.Author, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
         doc.Add(new Lucene.Net.Documents.Field("Bibliographic", c.Bibliographic, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
         doc.Add(new Lucene.Net.Documents.Field("Words", c.Words, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
         writer.AddDocument(doc);
     }
 }
Ejemplo n.º 49
0
 /// <summary> Adds the fields above to a document </summary>
 /// <param name="doc">The document to write
 /// </param>
 public static void  SetupDoc(Document doc)
 {
     for (int i = 0; i < Fields.Length; i++)
     {
         doc.Add(Fields[i]);
     }
 }
Ejemplo n.º 50
0
        public void MrsJones()
        {
            var dir      = new RAMDirectory();
            var analyzer = new LowerCaseKeywordAnalyzer();
            var writer   = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);
            var document = new Lucene.Net.Documents.Document();

            document.Add(new Field("Name", "MRS. SHABA", Field.Store.NO, Field.Index.ANALYZED_NO_NORMS));
            writer.AddDocument(document);

            writer.Close(true);


            var searcher = new IndexSearcher(dir, true);

            var termEnum = searcher.GetIndexReader().Terms();

            while (termEnum.Next())
            {
                var buffer = termEnum.Term().Text();
                Console.WriteLine(buffer);
            }

            var queryParser = new RangeQueryParser(Version.LUCENE_29, "", analyzer);
            var query       = queryParser.Parse("Name:\"MRS. S*\"");

            Console.WriteLine(query);
            var result = searcher.Search(query, 10);

            Assert.NotEqual(0, result.TotalHits);
        }
Ejemplo n.º 51
0
        void Index()
        {
            Lucene.Net.Index.IndexWriter wr = new Lucene.Net.Index.IndexWriter(dir, new Lucene.Net.Analysis.WhitespaceAnalyzer(), Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED);

            Lucene.Net.Documents.Document doc = null;
            Lucene.Net.Documents.Field    f   = null;

            doc = new Lucene.Net.Documents.Document();
            f   = new Lucene.Net.Documents.Field("field", "a b c d", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED);
            doc.Add(f);
            wr.AddDocument(doc);

            doc = new Lucene.Net.Documents.Document();
            f   = new Lucene.Net.Documents.Field("field", "a b a d", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED);
            doc.Add(f);
            wr.AddDocument(doc);

            doc = new Lucene.Net.Documents.Document();
            f   = new Lucene.Net.Documents.Field("field", "a b e f", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED);
            doc.Add(f);
            wr.AddDocument(doc);

            doc = new Lucene.Net.Documents.Document();
            f   = new Lucene.Net.Documents.Field("field", "x y z", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED);
            doc.Add(f);
            wr.AddDocument(doc);

            wr.Close();
        }
        public virtual void  TestPreAnalyzedField()
        {
            IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
            Document    doc    = new Document();

            doc.Add(new Field("preanalyzed", new AnonymousClassTokenStream(this), TermVector.NO));

            writer.AddDocument(doc);
            writer.Commit();
            SegmentInfo info = writer.NewestSegment();

            writer.Close();
            SegmentReader reader = SegmentReader.Get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);

            TermPositions termPositions = reader.TermPositions(new Term("preanalyzed", "term1"));

            Assert.IsTrue(termPositions.Next());
            Assert.AreEqual(1, termPositions.Freq);
            Assert.AreEqual(0, termPositions.NextPosition());

            termPositions.Seek(new Term("preanalyzed", "term2"));
            Assert.IsTrue(termPositions.Next());
            Assert.AreEqual(2, termPositions.Freq);
            Assert.AreEqual(1, termPositions.NextPosition());
            Assert.AreEqual(3, termPositions.NextPosition());

            termPositions.Seek(new Term("preanalyzed", "term3"));
            Assert.IsTrue(termPositions.Next());
            Assert.AreEqual(1, termPositions.Freq);
            Assert.AreEqual(2, termPositions.NextPosition());
        }
Ejemplo n.º 53
0
        // Activity 9

        public void IndexText(string text)
        {
            // TODO: Enter code to index text
            Lucene.Net.Documents.Field    field = new Lucene.Net.Documents.Field("text", text, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
            Lucene.Net.Documents.Document doc   = new Lucene.Net.Documents.Document();
            doc.Add(field);
            writer.AddDocument(doc);
        }
Ejemplo n.º 54
0
        private void EnsureWriterHasChanges()
        {
            var doc   = new Lucene.Net.Documents.Document();
            var field = new Lucene.Net.Documents.Field("Path", "/root/indexing_writinggapandgettingunprocessedactivitiesswithgap/fake", LucField.Store.YES, LucField.Index.NOT_ANALYZED, LucField.TermVector.NO);

            doc.Add(field);
            LuceneManager._writer.AddDocument(doc);
        }
Ejemplo n.º 55
0
        public void constructor_should_convert_document_and_scoredoc_to_properties_and_parse_createdon_date()
        {
            // Arrange
            LuceneDocument document = new LuceneDocument();

            document.Add(CreateField("id", "123"));
            document.Add(CreateField("title", "the title"));
            document.Add(CreateField("contentsummary", "the summary"));
            document.Add(CreateField("tags", "tag1 tag2"));
            document.Add(CreateField("createdby", "gandhi"));
            document.Add(CreateField("contentlength", "999"));
            document.Add(CreateField("createdon", DateTime.Today.ToString()));

            ScoreDoc scoreDoc = new ScoreDoc(0, 9.50f);

            // Act
            SearchResultViewModel model = new SearchResultViewModel(document, scoreDoc);

            // Assert
            Assert.That(model.Id, Is.EqualTo(123));
            Assert.That(model.Title, Is.EqualTo("the title"));
            Assert.That(model.ContentSummary, Is.EqualTo("the summary"));
            Assert.That(model.Tags, Is.EqualTo("tag1 tag2"));
            Assert.That(model.CreatedBy, Is.EqualTo("gandhi"));
            Assert.That(model.ContentLength, Is.EqualTo(999));
            Assert.That(model.CreatedOn, Is.EqualTo(DateTime.Today));             // only the date should be parsed
            Assert.That(model.Score, Is.EqualTo(9.50f));
        }
Ejemplo n.º 56
0
        /// <summary>
        /// Indexes the document.
        /// </summary>
        /// <typeparam name="T"></typeparam>
        /// <param name="document">The document.</param>
        /// <param name="indexName">Name of the index.</param>
        /// <param name="mappingType">Type of the mapping.</param>
        public override void IndexDocument <T>(T document, string indexName = null, string mappingType = null)
        {
            try
            {
                Type documentType = document.GetType();
                if (indexName == null)
                {
                    indexName = documentType.Name.ToLower();
                }

                if (mappingType == null)
                {
                    mappingType = documentType.Name.ToLower();
                }

                if (!_indexes.ContainsKey(mappingType))
                {
                    CreateIndex(documentType);
                }

                var index = _indexes[mappingType];

                Document doc = new Document();
                foreach (var typeMappingProperty in index.MappingProperties.Values)
                {
                    TextField textField = new TextField(typeMappingProperty.Name, documentType.GetProperty(typeMappingProperty.Name).GetValue(document, null).ToStringSafe().ToLower(), global::Lucene.Net.Documents.Field.Store.YES);
                    textField.Boost = typeMappingProperty.Boost;
                    doc.Add(textField);
                }

                IndexModelBase docIndexModelBase = document as IndexModelBase;
                string         indexValue        = LuceneID(mappingType, docIndexModelBase.Id);
                doc.AddStringField("type", mappingType, global::Lucene.Net.Documents.Field.Store.YES);
                doc.AddStringField("id", docIndexModelBase.Id.ToString(), global::Lucene.Net.Documents.Field.Store.YES);
                doc.AddStringField("index", indexValue, global::Lucene.Net.Documents.Field.Store.YES);

                // Stores all the properties as JSON to retrieve object on lookup.
                doc.AddStoredField("JSON", document.ToJson());

                // Use the analyzer in fieldAnalyzers if that field is in that dictionary, otherwise use StandardAnalyzer.
                var analyzer = new PerFieldAnalyzerWrapper(defaultAnalyzer: new StandardAnalyzer(_matchVersion, new CharArraySet(_matchVersion, 0, true)), fieldAnalyzers: index.FieldAnalyzers);

                OpenWriter();
                lock ( _lockWriter )
                {
                    if (_writer != null)
                    {
                        _writer.UpdateDocument(new Term("index", indexValue), doc, analyzer);     // Must specify analyzer because the default analyzer that is specified in indexWriterConfig is null.
                    }
                }
            }
            catch (Exception ex)
            {
                HttpContext context2 = HttpContext.Current;
                ExceptionLogService.LogException(ex, context2);
            }
        }
Ejemplo n.º 57
0
            private static void _addToLuceneIndex(SampleData sampleData, IndexWriter writer)
            {
                // remove older index entry
                var searchQuery = new Lucene.Net.Search.TermQuery(new Term("Id", sampleData.Id.ToString()));

                writer.DeleteDocuments(searchQuery);

                // add new index entry
                var doc = new Lucene.Net.Documents.Document();

                // add lucene fields mapped to db fields
                doc.Add(new Field("Id", sampleData.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
                doc.Add(new Field("Name", sampleData.Name, Field.Store.YES, Field.Index.ANALYZED));
                doc.Add(new Field("Description", sampleData.Description, Field.Store.YES, Field.Index.ANALYZED));

                // add entry to index
                writer.AddDocument(doc);
            }
Ejemplo n.º 58
0
        public Lucene.Net.Documents.Document CreateDocWith(string fileContent)
        {
            Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document();

            string[] tags = { ".I", "\n.T\n", "\n.A\n", "\n.B\n", "\n.W\n" };
            string[] splitedContentWithTags = fileContent.Split(tags, StringSplitOptions.None);

            // edit indexing method here

            doc.Add(new Field(DOCID_FN, splitedContentWithTags[0], Field.Store.NO, Field.Index.NO));
            doc.Add(new Field(TITLE_FN, splitedContentWithTags[1], Field.Store.YES, Field.Index.ANALYZED));
            doc.Add(new Field(AUTHOR_FN, splitedContentWithTags[2], Field.Store.YES, Field.Index.ANALYZED));
            doc.Add(new Field(BIBLIOGRAPHICINFORMATION_FN,
                              splitedContentWithTags[4].Replace(splitedContentWithTags[1] + "\n", ""),             // remove title from abstract
                              Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.Add(new Field(ABSTRACT_FN, splitedContentWithTags[4], Field.Store.YES, Field.Index.ANALYZED));
            return(doc);
        }
Ejemplo n.º 59
0
        private SegmentInfo IndexDoc(IndexWriter writer, System.String fileName)
        {
            System.IO.FileInfo file = new System.IO.FileInfo(System.IO.Path.Combine(workDir.FullName, fileName));
            Document           doc  = FileDocument.Document(file);

            doc.Add(new Field("contents", new System.IO.StreamReader(file.FullName)));
            writer.AddDocument(doc);
            writer.Commit();
            return(writer.NewestSegment());
        }
Ejemplo n.º 60
0
        public void CreateIndex()
        {
            var indexDirectory = FSDirectory.Open(new System.IO.DirectoryInfo(Settings.IndexLocation));
            var stdAnalyzer    = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29);
            var startTime      = DateTime.Now;

            Console.WriteLine("Indexing Started at " + startTime.ToString());
            try
            {
                using (var indexWriter = new IndexWriter(indexDirectory, stdAnalyzer, true, IndexWriter.MaxFieldLength.UNLIMITED))
                {
                    var files = System.IO.Directory.GetFiles(Settings.DataFileLocation, "*.pdf", System.IO.SearchOption.AllDirectories);

                    foreach (var file in files)
                    {
                        using (var reader = new iTextSharp.text.pdf.PdfReader(file))
                        {
                            var text     = new StringBuilder();
                            var totPages = reader.NumberOfPages;
                            for (int pageNo = 1; pageNo <= totPages; pageNo++)
                            {
                                var document = new Lucene.Net.Documents.Document();
                                text.Append(iTextSharp.text.pdf.parser.PdfTextExtractor.GetTextFromPage(reader, pageNo));
                                document.Add(new Field("file", file, Field.Store.YES, Field.Index.ANALYZED));
                                document.Add(new Field("pageno", pageNo.ToString(), Field.Store.YES, Field.Index.ANALYZED));
                                document.Add(new Field("content", text.ToString(), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));

                                indexWriter.AddDocument(document);
                                indexWriter.Optimize();
                            }
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                Console.WriteLine("Failed to Index {0}", ex.StackTrace.ToString());
            }
            var endTime = DateTime.Now;

            Console.WriteLine("Indexing Completed at " + endTime.ToString());
        }