public void Save(SearchDocument doc)
        {
            if (doc == null) throw new ArgumentNullException(nameof(doc));

            if (doc.Fields == null || doc.Fields.Count == 0)
                return;

            var document = new Document();
            foreach (var docField in doc.Fields)
            {
                if (string.IsNullOrWhiteSpace(docField.FieldName))
                    throw new Exception("Field name cannot be empty");

                if (string.IsNullOrWhiteSpace(docField.Value))
                    continue;

                var field = new Field(docField.FieldName, docField.Value, Field.Store.YES, Field.Index.ANALYZED);
                document.Add(field);
            }

            var writer = new IndexWriter(
                _directory,
                new StandardAnalyzer(global::Lucene.Net.Util.Version.LUCENE_30),
                !_created,
                IndexWriter.MaxFieldLength.UNLIMITED);

            using (writer)
            {
                writer.AddDocument(document);
            }

            _created = true;
        }
Beispiel #2
0
        public static IndexWriter CreateIndex(Content[] contents)
        {
            var v = Lucene.Net.Util.Version.LUCENE_30;
            var l = Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED;
            var d = FSDirectory.Open(new DirectoryInfo(IndexPath));

            IndexWriter writer = new IndexWriter(d, new StandardAnalyzer(v), l);

            try
            {
                foreach (var item in contents)
                {
                    Document doc = new Document();

                    Field id = new Field("id", item.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED);
                    Field title = new Field("title", item.Title, Field.Store.YES, Field.Index.ANALYZED);
                    Field username = new Field("username", item.User.UserName, Field.Store.YES, Field.Index.ANALYZED);
                    doc.Add(id);
                    doc.Add(title);
                    doc.Add(username);
                    writer.AddDocument(doc);
                }
                writer.Optimize();
                writer.Dispose();
            }
            catch (System.Exception ex)
            {

            }

            return writer;
        }
        public void TestReadersWriters()
        {
            Directory dir;
            
            using(dir = new RAMDirectory())
            {
                Document doc;
                IndexWriter writer;
                IndexReader reader;

                using (writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED))
                {
                    Field field = new Field("name", "value", Field.Store.YES,Field.Index.ANALYZED);
                    doc = new Document();
                    doc.Add(field);
                    writer.AddDocument(doc);
                    writer.Commit();

                    using (reader = writer.GetReader())
                    {
                        IndexReader r1 = reader.Reopen();
                    }

                    Assert.Throws<AlreadyClosedException>(() => reader.Reopen(), "IndexReader shouldn't be open here");
                }
                
                Assert.Throws<AlreadyClosedException>(() => writer.AddDocument(doc), "IndexWriter shouldn't be open here");

                Assert.IsTrue(dir.isOpen_ForNUnit, "RAMDirectory");
            }
            Assert.IsFalse(dir.isOpen_ForNUnit, "RAMDirectory");
        }
Beispiel #4
0
        void Index()
        {
            Lucene.Net.Index.IndexWriter wr = new Lucene.Net.Index.IndexWriter(dir, new Lucene.Net.Analysis.WhitespaceAnalyzer(), Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED);

            Lucene.Net.Documents.Document doc = null;
            Lucene.Net.Documents.Field    f   = null;

            doc = new Lucene.Net.Documents.Document();
            f   = new Lucene.Net.Documents.Field("field", "a b c d", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED);
            doc.Add(f);
            wr.AddDocument(doc);

            doc = new Lucene.Net.Documents.Document();
            f   = new Lucene.Net.Documents.Field("field", "a b a d", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED);
            doc.Add(f);
            wr.AddDocument(doc);

            doc = new Lucene.Net.Documents.Document();
            f   = new Lucene.Net.Documents.Field("field", "a b e f", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED);
            doc.Add(f);
            wr.AddDocument(doc);

            doc = new Lucene.Net.Documents.Document();
            f   = new Lucene.Net.Documents.Field("field", "x y z", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED);
            doc.Add(f);
            wr.AddDocument(doc);

            wr.Close();
        }
Beispiel #5
0
        /// <summary>
        /// Create a Field
        /// </summary>
        /// <param name="name">
        /// The name.
        /// </param>
        /// <param name="value">
        /// The value.
        /// </param>
        /// <param name="storageType">
        /// The storage type.
        /// </param>
        /// <param name="indexType">
        /// The index type.
        /// </param>
        /// <param name="vectorType">
        /// The vector type.
        /// </param>
        /// <param name="boost">
        /// The boost.
        /// </param>
        /// <returns>
        /// Abstract Field
        /// </returns>
        protected AbstractField CreateField(string name, string value, LuceneField.Store storageType, LuceneField.Index indexType, LuceneField.TermVector vectorType, float boost)
        {
            var field = new LuceneField(name, value, storageType, indexType, vectorType);

            field.SetBoost(boost);
            return(field);
        }
        public Document ToDocument()
        {
            var document = new Document();

            var hash = new Field(ExtensionHashField, ModelHelpers.GetMD5Hash(ToString()), Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO);
            hash.Boost = ExtensionHashFieldBoost;
            var name = new Field(ExtensionNameField, Name, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO);
            name.Boost = ExtensionNameFieldBoost;
            var fullName = new Field(ExtensionFullNameField, FullName, Field.Store.YES, Field.Index.NO, Field.TermVector.NO);
            var ns = new Field(ExtensionNamespaceField, Namespace, Field.Store.YES, Field.Index.NO, Field.TermVector.NO);
            var assemblyName = new Field(ExtensionAssemblyNameField, AssemblyName, Field.Store.YES, Field.Index.NO, Field.TermVector.NO);
            var packageName = new Field(ExtensionPackageNameField, PackageName, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO);
            var packageVersion = new Field(ExtensionPackageVersionField, PackageVersion, Field.Store.YES, Field.Index.NO, Field.TermVector.NO);
            var targetFrameworks = new Field(ExtensionTargetFrameworksField, GetTargetFrameworksString(), Field.Store.YES, Field.Index.NO, Field.TermVector.NO);

            document.Add(hash);
            document.Add(name);
            document.Add(fullName);
            document.Add(ns);
            document.Add(assemblyName);
            document.Add(packageName);
            document.Add(packageVersion);
            document.Add(targetFrameworks);

            return document;
        }
Beispiel #7
0
        private void EnsureDocument()
        {
            if (document == null)
            {
                document = new Document();
                var fields = DocumentBuilder.GetFields(TypeName);
                foreach (var f in fields)
                {
                    AbstractField af = null;
                    switch (f.FieldType)
                    {
                    case FieldType.Int:
                    case FieldType.Float:
                    case FieldType.Long:
                    case FieldType.Double:
                        af = new LN.Documents.NumericField(f.FieldName, (LN.Documents.Field.Store)((int)f.StoreMode), (int)f.IndexMode > 0);
                        break;

                    case FieldType.String:
                    case FieldType.DateTime:
                    default:
                        af = new LN.Documents.Field(f.FieldName, string.Empty, (LN.Documents.Field.Store)((int)f.StoreMode), (LN.Documents.Field.Index)((int)f.IndexMode));
                        break;
                    }
                    af.Boost = f.Boost;
                    document.Add(af);
                }
            }
        }
Beispiel #8
0
        protected override Document BuildLuceneDocument(Document document)
        {
            Field.Store storeContent = Field.Store.YES;
            #if DEBUG
            storeContent = Field.Store.YES;
            #endif
            var fieldTitle = new Field("title", photo.Title, storeContent, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
            document.Add(fieldTitle);

            var fieldTitleSort = new Field("title_sort", photo.Title, storeContent, Field.Index.NOT_ANALYZED_NO_NORMS);
            document.Add(fieldTitleSort);

            var fieldAuthor = new Field("owner", photo.Owner ?? String.Empty, storeContent, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
            document.Add(fieldAuthor);

            var ownerId = new Field("owner_id", photo.OwnerId.ToString(), storeContent, Field.Index.NOT_ANALYZED_NO_NORMS);
            document.Add(ownerId);

            var fieldAuthorIndex = new Field("owner_index", photo.Owner ?? String.Empty, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
            document.Add(fieldAuthorIndex);

            var fieldDescription = new Field("description", photo.Description ?? String.Empty, storeContent, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
            document.Add(fieldDescription);

            return document;
        }
        private void  AddDoc(IndexWriter iw, int i)
        {
            Document   d = new Document();
            IFieldable f;
            int        scoreAndID = i + 1;

            f           = new Field(ID_FIELD, Id2String(scoreAndID), Field.Store.YES, Field.Index.NOT_ANALYZED); // for debug purposes
            f.OmitNorms = true;
            d.Add(f);

            f           = new Field(TEXT_FIELD, "text of doc" + scoreAndID + TextLine(i), Field.Store.NO, Field.Index.ANALYZED); // for regular search
            f.OmitNorms = true;
            d.Add(f);

            f           = new Field(INT_FIELD, "" + scoreAndID, Field.Store.NO, Field.Index.NOT_ANALYZED); // for function scoring
            f.OmitNorms = true;
            d.Add(f);

            f           = new Field(FLOAT_FIELD, scoreAndID + ".000", Field.Store.NO, Field.Index.NOT_ANALYZED); // for function scoring
            f.OmitNorms = true;
            d.Add(f);

            iw.AddDocument(d);
            Log("added: " + d);
        }
Beispiel #10
0
		public static Field.Index GetIndex(this IndexDefinition self, string name, Field.Index? defaultIndex)
		{
			if (self.Indexes == null)
				return defaultIndex ?? Field.Index.ANALYZED_NO_NORMS;
			FieldIndexing value;
			if (self.Indexes.TryGetValue(name, out value) == false)
			{
				if (self.Indexes.TryGetValue(Constants.AllFields, out value) == false)
				{
					string ignored;
					if (self.Analyzers.TryGetValue(name, out ignored) ||
						self.Analyzers.TryGetValue(Constants.AllFields, out ignored))
					{
						return Field.Index.ANALYZED; // if there is a custom analyzer, the value should be analyzed
					}
					return defaultIndex ?? Field.Index.ANALYZED_NO_NORMS;
				}
			}
			switch (value)
			{
				case FieldIndexing.No:
					return Field.Index.NO;
				case FieldIndexing.Analyzed:
					return Field.Index.ANALYZED_NO_NORMS;
				case FieldIndexing.NotAnalyzed:
					return Field.Index.NOT_ANALYZED_NO_NORMS;
				case FieldIndexing.Default:
					return defaultIndex ?? Field.Index.ANALYZED_NO_NORMS;
				default:
					throw new ArgumentOutOfRangeException();
			}
		}
        public virtual void  TestLUCENE_1590()
        {
            Document doc = new Document();

            // f1 has no norms
            doc.Add(new Field("f1", "v1", Field.Store.NO, Field.Index.ANALYZED_NO_NORMS));
            doc.Add(new Field("f1", "v2", Field.Store.YES, Field.Index.NO));
            // f2 has no TF
            Field f = new Field("f2", "v1", Field.Store.NO, Field.Index.ANALYZED);

            f.OmitTermFreqAndPositions = true;
            doc.Add(f);
            doc.Add(new Field("f2", "v2", Field.Store.YES, Field.Index.NO));

            IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED);

            writer.AddDocument(doc);
            writer.Optimize(); // be sure to have a single segment
            writer.Close();

            _TestUtil.CheckIndex(dir);

            SegmentReader reader = SegmentReader.GetOnlySegmentReader(dir);
            FieldInfos    fi     = reader.FieldInfos();

            // f1
            Assert.IsFalse(reader.HasNorms("f1"), "f1 should have no norms");
            Assert.IsFalse(fi.FieldInfo("f1").omitTermFreqAndPositions_ForNUnit, "omitTermFreqAndPositions field bit should not be set for f1");
            // f2
            Assert.IsTrue(reader.HasNorms("f2"), "f2 should have norms");
            Assert.IsTrue(fi.FieldInfo("f2").omitTermFreqAndPositions_ForNUnit, "omitTermFreqAndPositions field bit should be set for f2");
        }
        internal static void AddField(this Document doc, string propertyName, string propertyValue, Field.Store fieldStore, Field.Index fieldIndex)
        {
            if (string.IsNullOrWhiteSpace(propertyValue))
                return;

            doc.Add(new Field(propertyName, propertyValue, fieldStore, fieldIndex));
        }
Beispiel #13
0
        /// <summary>
        /// create document from SampleData
        /// </summary>
        /// <param name="obj"></param>
        /// <returns></returns>
        private static Document CreateDocument(SampleData obj)
        {
            // add lucene fields mapped to db fields
            var doc = new Document();

            Field f = new Field("ArtNo", obj.ArtNo, Field.Store.NO, Field.Index.ANALYZED);

            f.SetBoost(2F);
            doc.Add(f);


            f = new Field("Id", obj.Id.ToString(CultureInfo.InvariantCulture), Field.Store.YES, Field.Index.NOT_ANALYZED);
            f.SetBoost(1);
            doc.Add(f);

            f = new Field("Name", obj.Name, Field.Store.NO, Field.Index.ANALYZED);
            f.SetBoost(.1F);
            doc.Add(f);

            f = new Field("NameWithWiteSpace", obj.Name.RemoveSymbols(" "), Field.Store.NO, Field.Index.ANALYZED);
            f.SetBoost(.1F);
            doc.Add(f);

            return(doc);
        }
		public virtual void  TestMultiValueSource()
		{
			Directory dir = new MockRAMDirectory();
			IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);
			Document doc = new Document();
			Field f = new Field("field", "", Field.Store.NO, Field.Index.NOT_ANALYZED);
			doc.Add(f);
			
			for (int i = 0; i < 17; i++)
			{
				f.SetValue("" + i);
				w.AddDocument(doc);
				w.Commit();
			}
			
			IndexReader r = w.GetReader();
			w.Close();
			
			Assert.IsTrue(r.GetSequentialSubReaders().Length > 1);
			
			ValueSource s1 = new IntFieldSource("field");
			DocValues v1 = s1.GetValues(r);
			DocValues v2 = new MultiValueSource(s1).GetValues(r);
			
			for (int i = 0; i < r.MaxDoc(); i++)
			{
				Assert.AreEqual(v1.IntVal(i), i);
				Assert.AreEqual(v2.IntVal(i), i);
			}
			
			Lucene.Net.Search.FieldCache_Fields.DEFAULT.PurgeAllCaches();
			
			r.Close();
			dir.Close();
		}
 private void AddMetaDataField(Document doc, Term term, int[] meta)
 {
     IntMetaDataTokenStream tokenStream = new IntMetaDataTokenStream(term.Text);
     tokenStream.SetMetaData(meta);
     Field field = new Field(term.Field, tokenStream);
     doc.Add(field);
 }
        public void CreateIndex()
        {
            IProductService productService = new ProductService();
            int count = productService.GetProductCount(string.Empty);
            var data = productService.GetProducts(count, 1, string.Empty);

            //设置为多文件索引的格式,默认情况下为true,会建立复合索引的文件结构,这里为了分析,先设置为false,生成多文件的索引结构
            //this.indexWriter.SetUseCompoundFile(false);

            foreach (var productInfo in data)
            {
                var doc = new Document();
                var field1 = new Field("title", productInfo.Title, Field.Store.YES, Field.Index.ANALYZED);
                // 向文档中添加域
                doc.Add(field1);
                field1 = new Field("Category", productInfo.CategoryName, Field.Store.YES, Field.Index.ANALYZED);
                doc.Add(field1);
                field1 = new Field("Desc", productInfo.Desc??"", Field.Store.YES, Field.Index.ANALYZED);
                doc.Add(field1);
                this.indexWriter.AddDocument(doc);
            }

            // 优化索引结构
            this.indexWriter.Optimize();

            this.indexWriter.Commit();
            // 关闭写入
            this.indexWriter.Close();
        }
        public void Set(string name, object value, Document document, Field.Store store, Field.Index index, float? boost)
        {
            DateTime date = (DateTime) value;

            int year = date.Year;
            int month = date.Month;
            int day = date.Day;

            // set year
            Field field = new Field(name + ".year", year.ToString(), store, index);
            if (boost != null)
            {
                field.SetBoost(boost.Value);
            }
            document.Add(field);

            // set month and pad it if necessary
            field = new Field(name + ".month", month.ToString("D2"), store, index);
            if (boost != null)
            {
                field.SetBoost(boost.Value);
            }
            document.Add(field);

            // set day and pad it if necessary
            field = new Field(name + ".day", day.ToString("D2"), store, index);
            if (boost != null)
            {
                field.SetBoost(boost.Value);
            }
            document.Add(field);

            throw new NotImplementedException();
        }
        public Document BuildRecord()
        {
            var doc = new Document();

            var numericField = new NumericField("DatabaseID", Field.Store.YES, false);
            numericField.SetIntValue(Email.ID);
            doc.Add(numericField);

            var field = new Field("UniqueID", UniqueID, Field.Store.YES, Field.Index.NOT_ANALYZED);
            doc.Add(field);

            field = new Field("Title", Title, Field.Store.YES, Field.Index.NOT_ANALYZED);
            doc.Add(field);

            field = new Field("Description", Description, Field.Store.YES, Field.Index.NOT_ANALYZED);
            doc.Add(field);

            field = new Field("Type", Type, Field.Store.YES, Field.Index.ANALYZED);
            doc.Add(field);

               /* field = new Field("Name", EventDescription.Name, Field.Store.YES, Field.Index.ANALYZED);
            doc.Add(field);*/

            return doc;
        }
Beispiel #19
0
        public static Field CreateInstance(object data, XmlNode node)
        {
            if (data == null)
            {
                throw new ArgumentNullException("data");
            }

            if (node == null)
            {
                throw new ArgumentNullException("node");
            }

            String name = node.Attributes["name"].Value.ToLower();
            String datasource = node.Attributes["datasource"].Value;
            String store = node.Attributes["store"].Value;
            String index = node.Attributes["index"].Value;

            Lucene.Net.Documents.Field.Store st;
            Enum.TryParse<Lucene.Net.Documents.Field.Store>(store,out st);

            Lucene.Net.Documents.Field.Index idx ;
            Enum.TryParse<Lucene.Net.Documents.Field.Index>(index,out idx);

            String value = getData(data,datasource);

            //name = name.ToLower();
            Field ret = new Field(name,value,st,idx);

            return ret;
        }
        public void Test_IndexReader_IsCurrent()
        {
            RAMDirectory ramDir = new RAMDirectory();
            IndexWriter writer = new IndexWriter(ramDir, new KeywordAnalyzer(), true, new IndexWriter.MaxFieldLength(1000));
            Field field = new Field("TEST", "mytest", Field.Store.YES, Field.Index.ANALYZED);
            Document doc = new Document();
            doc.Add(field);
            writer.AddDocument(doc);

            IndexReader reader = writer.GetReader();

            writer.DeleteDocuments(new Lucene.Net.Index.Term("TEST", "mytest"));

            Assert.IsFalse(reader.IsCurrent());

            int resCount1 = new IndexSearcher(reader).Search(new TermQuery(new Term("TEST", "mytest")),100).TotalHits;
            Assert.AreEqual(1, resCount1);

            writer.Commit();

            Assert.IsFalse(reader.IsCurrent());

            int resCount2 = new IndexSearcher(reader).Search(new TermQuery(new Term("TEST", "mytest")),100).TotalHits;
            Assert.AreEqual(1, resCount2, "Reopen not invoked yet, resultCount must still be 1.");

            reader = reader.Reopen();
            Assert.IsTrue(reader.IsCurrent());

            int resCount3 = new IndexSearcher(reader).Search(new TermQuery(new Term("TEST", "mytest")), 100).TotalHits;
            Assert.AreEqual(0, resCount3, "After reopen, resultCount must be 0.");

            reader.Close();
            writer.Dispose();
        }
		public IEnumerable<AbstractField> Index(RavenJObject document, Field.Store defaultStorage)
		{
			return from property in document
				   where property.Key != Constants.DocumentIdFieldName
				   from field in CreateFields(property.Key, GetPropertyValue(property.Value), defaultStorage)
			       select field;
		}
        protected override Document BuildLuceneDocument(Document document)
        {
            Field.Store storeContent = Field.Store.YES;
            #if DEBUG
            storeContent = Field.Store.YES;
            #endif

            var name = new Field("author_name", author.Name, storeContent, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
            document.Add(name);

            var fieldNameSort = new Field("author_sort", author.Name, storeContent, Field.Index.NOT_ANALYZED_NO_NORMS);
            document.Add(fieldNameSort);

            var fieldName = new Field("author_firstName", author.FirstName, storeContent, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
            document.Add(fieldName);

            var lastName = new Field("author_lastName", author.LastName, storeContent, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
            document.Add(lastName);

            var fieldBiography = new Field("biography", author.Biography ?? String.Empty, storeContent, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
            document.Add(fieldBiography);

            var fieldAuthorId = new Field("author_id", author.ElanId.ToString(), storeContent, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
            document.Add(fieldAuthorId);

            return document;
        }
        /// <summary>
        /// Adds a field to the index, bit more configurable than the other helper methods, but more verbose as a consequence.
        /// </summary>
        /// <param name="document">The document to add the field to </param>
        /// <param name="fieldName">The name of the field to add</param>
        /// <param name="value">The value of the field</param>
        /// <param name="store">A boolean denoting whether to store the value in the index - allows retrieval of the original value from the index</param>
        /// <param name="caseSensitive">Whether to store the value in its original case</param>
        /// <param name="index">The type of indexing to apply to the field</param>
        /// <returns>The input document object</returns>
        public static Document AddField(this Document document, string fieldName, string value, bool caseSensitive, Field.Store store, Field.Index index)
        {
            if (value == null)
            {
                return document;
            }

            if (store == null)
            {
                store = Field.Store.NO;
            }

            if (!caseSensitive)
            {
                value = value.ToLower();
            }

            if (index == null)
            {
                index = Field.Index.ANALYZED;
            }

            Field field = new Field(fieldName, value, store, index);
            document.Add(field);
            return document;
        }
Beispiel #24
0
        public void MyTestMethod_index()
        {
            string strIndexDir = @"D:\Index";

            Lucene.Net.Store.Directory indexDir = Lucene.Net.Store.FSDirectory.Open(new System.IO.DirectoryInfo(strIndexDir));
            Analyzer std = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30); //Version parameter is used for backward compatibility. Stop words can also be passed to avoid indexing certain words


            using (IndexWriter idxw = new IndexWriter(indexDir, std, true, IndexWriter.MaxFieldLength.UNLIMITED)) //Create an Index writer object.
            {
                Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document();

                //var file = System.IO.File.ReadAllText(
                //    @"d:\test.txt");
                Lucene.Net.Documents.Field fldText = new Lucene.Net.Documents.Field("text", file, Lucene.Net.Documents.Field.Store.YES,
                                                                                    Lucene.Net.Documents.Field.Index.
                                                                                    ANALYZED,
                                                                                    Lucene.Net.Documents.Field.
                                                                                    TermVector.YES);


                doc.Add(fldText);

                doc.Add(new Field("addtime", System.DateTime.Now.ToString(), Lucene.Net.Documents.Field.Store.YES,
                                  Field.Index.ANALYZED, Field.TermVector.YES));

                //write the document to the index
                idxw.AddDocument(doc);
                //optimize and close the writer
                idxw.Optimize();
            }
            Console.WriteLine("Indexing Done");
        }
 /// <summary>
 /// Кастомизация индекса
 /// </summary>
 /// <param name="sender"></param>
 /// <param name="e"></param>
 private void InsertExternalFields(object sender, Examine.LuceneEngine.DocumentWritingEventArgs e)
 {
     if (!e.Fields.ContainsKey("criteria")) return;
     var criteriaString = e.Fields["criteria"];
     var xCriteriaField = new Field("xCriteria", criteriaString.Replace(',', ' '), Field.Store.YES, Field.Index.ANALYZED_NO_NORMS, Field.TermVector.NO);
     e.Document.Add(xCriteriaField);
 }
		public IEnumerable<AbstractField> Index(object val, PropertyDescriptorCollection properties, Field.Store defaultStorage)
		{
			return from property in properties.Cast<PropertyDescriptor>()
			       where property.Name != Constants.DocumentIdFieldName
			       from field in CreateFields(property.Name, property.GetValue(val), defaultStorage)
			       select field;
		}
 private void AddTextField(Document doc, string fieldName, string[] sections)
 {
     for (int i = 0; i < sections.Length; i++)
     {
         Field field = new Field(fieldName, new SectionTokenStream(analyzer.TokenStream(fieldName, new System.IO.StringReader(sections[i])), i));
         doc.Add(field);
     }
 }
        private void EnsureWriterHasChanges()
        {
            var doc   = new Lucene.Net.Documents.Document();
            var field = new Lucene.Net.Documents.Field("Path", "/root/indexing_writinggapandgettingunprocessedactivitiesswithgap/fake", LucField.Store.YES, LucField.Index.NOT_ANALYZED, LucField.TermVector.NO);

            doc.Add(field);
            LuceneManager._writer.AddDocument(doc);
        }
        public static Document AddField(this Document document, string name, string value, Field.Store store, Field.Index index)
        {
            if (String.IsNullOrEmpty(value))
                return document;

            document.Add(new Field(name, value, store, index));
            return document;
        }
Beispiel #30
0
        // Activity 9

        public void IndexText(string text)
        {
            // TODO: Enter code to index text
            Lucene.Net.Documents.Field    field = new Lucene.Net.Documents.Field("text", text, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
            Lucene.Net.Documents.Document doc   = new Lucene.Net.Documents.Document();
            doc.Add(field);
            writer.AddDocument(doc);
        }
Beispiel #31
0
 public IndexFieldInfo(string name, string value, FieldInfoType type, Field.Store store, Field.Index index, Field.TermVector termVector)
 {
     Name = name;
     Value = value;
     Type = type;
     Store = store;
     Index = index;
     TermVector = termVector;
 }
 private void AddMetaDataField(Document doc, string name, string[] vals)
 {
     foreach (string val in vals)
     {
         Field field = new Field(name, val, Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS);
         field.OmitTermFreqAndPositions = (true);
         doc.Add(field);
     }
 }
 protected internal static Document Doc(Field[] fields)
 {
     Document doc = new Document();
     for (int i = 0; i < fields.Length; i++)
     {
         doc.Add(fields[i]);
     }
     return doc;
 }
		public static IEnumerable<AbstractField> Index(object val, PropertyDescriptorCollection properties, IndexDefinition indexDefinition, Field.Store defaultStorage)
		{
			return (from property in properties.Cast<PropertyDescriptor>()
			        let name = property.Name
					where name != Constants.DocumentIdFieldName
			        let value = property.GetValue(val)
			        from field in CreateFields(name, value, indexDefinition, defaultStorage)
			        select field);
		}
		public IEnumerable<AbstractField> Index(RavenJObject document, Field.Store defaultStorage)
		{
			return (from property in document
			        let name = property.Key
					where name != Constants.DocumentIdFieldName
			        let value = GetPropertyValue(property.Value)
			        from field in CreateFields(name, value, defaultStorage)
			        select field);
		}
 public static IEnumerable<AbstractField> Index(object val, PropertyDescriptorCollection properties, IndexDefinition indexDefinition, Field.Store defaultStorage)
 {
     return (from property in properties.Cast<PropertyDescriptor>()
             let name = property.Name
             where name != "__document_id"
             let value = property.GetValue(val)
             where value != null
             select Createfield(name, value, indexDefinition, defaultStorage));
 }
        public static IEnumerable<AbstractField> Index(JObject document, IndexDefinition indexDefinition, Field.Store defaultStorage)
        {
        	return (from property in document.Cast<JProperty>()
        	        let name = property.Name
					where name != Constants.DocumentIdFieldName
        	        let value = GetPropertyValue(property)
        	        from field in CreateFields(name, value, indexDefinition, defaultStorage)
        	        select field);
        }
 public static IEnumerable<AbstractField> Index(JObject document, IndexDefinition indexDefinition, Field.Store defaultStorage)
 {
     return (from property in document.Cast<JProperty>()
             let name = property.Name
             where name != "__document_id"
             let value = GetPropertyValue(property)
             where value != null
             select Createfield(name, value, indexDefinition, defaultStorage));
 }
Beispiel #39
0
		protected internal AbstractField(System.String name, Field.Store store, Field.Index index, Field.TermVector termVector)
		{
			if (name == null)
				throw new System.NullReferenceException("name cannot be null");
			this.name = String.Intern(name); // field names are interned
			
			if (store == Field.Store.YES)
			{
				this.isStored = true;
				this.isCompressed = false;
			}
			else if (store == Field.Store.COMPRESS)
			{
				this.isStored = true;
				this.isCompressed = true;
			}
			else if (store == Field.Store.NO)
			{
				this.isStored = false;
				this.isCompressed = false;
			}
			else
			{
				throw new System.ArgumentException("unknown store parameter " + store);
			}
			
			if (index == Field.Index.NO)
			{
				this.isIndexed = false;
				this.isTokenized = false;
			}
			else if (index == Field.Index.TOKENIZED)
			{
				this.isIndexed = true;
				this.isTokenized = true;
			}
			else if (index == Field.Index.UN_TOKENIZED)
			{
				this.isIndexed = true;
				this.isTokenized = false;
			}
			else if (index == Field.Index.NO_NORMS)
			{
				this.isIndexed = true;
				this.isTokenized = false;
				this.omitNorms = true;
			}
			else
			{
				throw new System.ArgumentException("unknown index parameter " + index);
			}
			
			this.isBinary = false;
			
			SetStoreTermVector(termVector);
		}
Beispiel #40
0
        public void Test_Index_ReusableStringReader()
        {
            Lucene.Net.Index.IndexWriter wr = new Lucene.Net.Index.IndexWriter(new Lucene.Net.Store.RAMDirectory(), new TestAnalyzer(), true);

            Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document();
            Lucene.Net.Documents.Field    f1  = new Lucene.Net.Documents.Field("f1", TEST_STRING, Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.TOKENIZED);
            doc.Add(f1);
            wr.AddDocument(doc);

            wr.Close();
        }
Beispiel #41
0
        public void Test_Index_ReusableStringReader()
        {
            Lucene.Net.Index.IndexWriter wr = new Lucene.Net.Index.IndexWriter(new Lucene.Net.Store.RAMDirectory(), new TestAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);

            Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document();
            Lucene.Net.Documents.Field    f1  = new Lucene.Net.Documents.Field("f1", TEST_STRING, Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.ANALYZED);
            doc.Add(f1);
            wr.AddDocument(doc);

            wr.Dispose();
        }
Beispiel #42
0
        private void  AddNoProxDoc(IndexWriter writer)
        {
            Document doc = new Document();
            Field    f   = new Field("content3", "aaa", Field.Store.YES, Field.Index.ANALYZED);

            f.OmitTermFreqAndPositions = true;
            doc.Add(f);
            f = new Field("content4", "aaa", Field.Store.YES, Field.Index.NO);
            f.OmitTermFreqAndPositions = true;
            doc.Add(f);
            writer.AddDocument(doc);
        }
 // Indexing...
 public void IndexText(List <Collection> collections)
 {
     foreach (Collection c in collections)
     {
         Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document();
         // TODO: Enter code to index text
         Lucene.Net.Documents.Field field_DocID = new Lucene.Net.Documents.Field("DocID", c.DocID, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
         doc.Add(field_DocID);
         Lucene.Net.Documents.Field field_Title = new Lucene.Net.Documents.Field("Title", c.Title, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
         doc.Add(field_Title);
         doc.Add(new Lucene.Net.Documents.Field("Author", c.Author, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
         doc.Add(new Lucene.Net.Documents.Field("Bibliographic", c.Bibliographic, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
         doc.Add(new Lucene.Net.Documents.Field("Words", c.Words, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
         writer.AddDocument(doc);
     }
 }
Beispiel #44
0
        void LUCENENET_100_CreateIndex()
        {
            Lucene.Net.Index.IndexWriter w = new Lucene.Net.Index.IndexWriter(LUCENENET_100_Dir, new Lucene.Net.Analysis.Standard.StandardAnalyzer(Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.UNLIMITED);

            Lucene.Net.Documents.Field    f1 = new Lucene.Net.Documents.Field("field1", "dark side of the moon", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.ANALYZED);
            Lucene.Net.Documents.Field    f2 = new Lucene.Net.Documents.Field("field2", "123", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.NOT_ANALYZED);
            Lucene.Net.Documents.Document d  = new Lucene.Net.Documents.Document();
            d.Add(f1);
            d.Add(f2);
            w.AddDocument(d);

            f1 = new Lucene.Net.Documents.Field("field1", "Fly me to the moon", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.ANALYZED);
            f2 = new Lucene.Net.Documents.Field("field2", "456", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.NOT_ANALYZED);
            d  = new Lucene.Net.Documents.Document();
            d.Add(f1);
            d.Add(f2);
            w.AddDocument(d);

            w.Dispose();
        }
Beispiel #45
0
        void LUCENENET_100_CreateIndex()
        {
            Lucene.Net.Index.IndexWriter w = new Lucene.Net.Index.IndexWriter(LUCENENET_100_Dir, new Lucene.Net.Analysis.Standard.StandardAnalyzer(), true);

            Lucene.Net.Documents.Field    f1 = new Lucene.Net.Documents.Field("field1", "dark side of the moon", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.TOKENIZED);
            Lucene.Net.Documents.Field    f2 = new Lucene.Net.Documents.Field("field2", "123", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.UN_TOKENIZED);
            Lucene.Net.Documents.Document d  = new Lucene.Net.Documents.Document();
            d.Add(f1);
            d.Add(f2);
            w.AddDocument(d);

            f1 = new Lucene.Net.Documents.Field("field1", "Fly me to the moon", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.TOKENIZED);
            f2 = new Lucene.Net.Documents.Field("field2", "456", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.UN_TOKENIZED);
            d  = new Lucene.Net.Documents.Document();
            d.Add(f1);
            d.Add(f2);
            w.AddDocument(d);

            w.Close();
        }
Beispiel #46
0
        public static AbstractField CreateField(FieldSearchType field, object value)
        {
            var           s = field.SearchType.Store ? LuceneField.Store.YES : LuceneField.Store.NO;
            AbstractField abstractField;

            switch (field.Field.Type)
            {
            case "byte":
                abstractField = new NumericField(field.Alias, s, field.SearchType.Index).SetIntValue(Convert.ToInt32(value));
                break;

            case "short":
            case "int16":
                abstractField = new NumericField(field.Alias, s, field.SearchType.Index).SetIntValue(Convert.ToInt32(value));
                break;

            case "int":
            case "int32":
                abstractField = new NumericField(field.Alias, s, field.SearchType.Index).SetIntValue((int)value);
                break;

            case "int64":
            case "long":
                abstractField = new NumericField(field.Alias, s, field.SearchType.Index).SetLongValue((long)value);
                break;

            case "double":
                abstractField = new NumericField(field.Alias, s, field.SearchType.Index).SetDoubleValue((double)value);
                break;

            case "decimal":
                abstractField = new LuceneField(field.Alias, ((decimal)value).ToString(field.Field.DecimalFormat), s, field.SearchType.Index ? LuceneField.Index.NOT_ANALYZED_NO_NORMS : LuceneField.Index.NO, LuceneField.TermVector.NO);
                break;

            case "float":
            case "single":
                abstractField = new NumericField(field.Alias, s, field.SearchType.Index).SetFloatValue((float)value);
                break;

            case "bool":
            case "boolean":
                abstractField = new LuceneField(field.Alias, (bool)value ? "1" : "0", s, LuceneField.Index.NOT_ANALYZED_NO_NORMS);
                break;

            case "datetime":
                abstractField = new LuceneField(field.Alias, DateTools.DateToString((DateTime)value, DateTools.Resolution.MILLISECOND), s, field.SearchType.Index ? LuceneField.Index.NOT_ANALYZED_NO_NORMS : LuceneField.Index.NO, LuceneField.TermVector.NO);
                break;

            case "rowversion":
            case "byte[]":
                abstractField = field.SearchType.Index ?
                                new LuceneField(field.Alias, Utility.BytesToHexString((byte[])value), s, LuceneField.Index.NOT_ANALYZED_NO_NORMS) :
                                new LuceneField(field.Alias, (byte[])value, s);
                break;

            case "string":
                var iString = field.SearchType.Index ? (
                    field.SearchType.Analyzer.Equals("keyword") ?
                    (field.SearchType.Norms ? LuceneField.Index.NOT_ANALYZED : LuceneField.Index.NOT_ANALYZED_NO_NORMS) :
                    (field.SearchType.Norms ? LuceneField.Index.ANALYZED : LuceneField.Index.ANALYZED_NO_NORMS)
                    ) :
                              LuceneField.Index.NO;

                abstractField = new LuceneField(field.Alias, value.ToString(), s, iString);
                break;

            default:
                var i = field.SearchType.Index ?
                        (field.SearchType.Norms ? LuceneField.Index.NOT_ANALYZED : LuceneField.Index.NOT_ANALYZED_NO_NORMS) :
                        LuceneField.Index.NO;

                abstractField = new LuceneField(field.Alias, value.ToString(), s, i);
                break;
            }
            return(abstractField);
        }
Beispiel #47
0
        static DocHelper()
        {
            textField1 = new Field(TEXT_FIELD_1_KEY, FIELD_1_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO);
            textField2 = new Field(TEXT_FIELD_2_KEY, FIELD_2_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
            textField3 = new Field(TEXT_FIELD_3_KEY, FIELD_3_TEXT, Field.Store.YES, Field.Index.ANALYZED);
            {
                textField3.OmitNorms = true;
            }
            keyField     = new Field(KEYWORD_FIELD_KEY, KEYWORD_TEXT, Field.Store.YES, Field.Index.NOT_ANALYZED);
            noNormsField = new Field(NO_NORMS_KEY, NO_NORMS_TEXT, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
            noTFField    = new Field(NO_TF_KEY, NO_TF_TEXT, Field.Store.YES, Field.Index.ANALYZED);
            {
                noTFField.OmitTermFreqAndPositions = true;
            }
            unIndField     = new Field(UNINDEXED_FIELD_KEY, UNINDEXED_FIELD_TEXT, Field.Store.YES, Field.Index.NO);
            unStoredField1 = new Field(UNSTORED_FIELD_1_KEY, UNSTORED_1_FIELD_TEXT, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO);
            unStoredField2 = new Field(UNSTORED_FIELD_2_KEY, UNSTORED_2_FIELD_TEXT, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES);
            lazyField      = new Field(LAZY_FIELD_KEY, LAZY_FIELD_TEXT, Field.Store.YES, Field.Index.ANALYZED);
            textUtfField1  = new Field(TEXT_FIELD_UTF1_KEY, FIELD_UTF1_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO);
            textUtfField2  = new Field(TEXT_FIELD_UTF2_KEY, FIELD_UTF2_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
            fields         = new Field[] { textField1, textField2, textField3, keyField, noNormsField, noTFField, unIndField, unStoredField1, unStoredField2, textUtfField1, textUtfField2, lazyField, lazyFieldBinary, largeLazyField };
            {
                //Initialize the large Lazy Field
                System.Text.StringBuilder buffer = new System.Text.StringBuilder();
                for (int i = 0; i < 10000; i++)
                {
                    buffer.Append("Lazily loading lengths of language in lieu of laughing ");
                }

                try
                {
                    LAZY_FIELD_BINARY_BYTES = System.Text.Encoding.UTF8.GetBytes("These are some binary field bytes");
                }
                catch (System.IO.IOException)
                {
                }
                lazyFieldBinary           = new Field(LAZY_FIELD_BINARY_KEY, LAZY_FIELD_BINARY_BYTES, Field.Store.YES);
                fields[fields.Length - 2] = lazyFieldBinary;
                LARGE_LAZY_FIELD_TEXT     = buffer.ToString();
                largeLazyField            = new Field(LARGE_LAZY_FIELD_KEY, LARGE_LAZY_FIELD_TEXT, Field.Store.YES, Field.Index.ANALYZED);

                fields[fields.Length - 1] = largeLazyField;
                for (int i = 0; i < fields.Length; i++)
                {
                    IFieldable f = fields[i];
                    Add(all, f);
                    if (f.IsIndexed)
                    {
                        Add(indexed, f);
                    }
                    else
                    {
                        Add(unindexed, f);
                    }
                    if (f.IsTermVectorStored)
                    {
                        Add(termvector, f);
                    }
                    if (f.IsIndexed && !f.IsTermVectorStored)
                    {
                        Add(notermvector, f);
                    }
                    if (f.IsStored)
                    {
                        Add(stored, f);
                    }
                    else
                    {
                        Add(unstored, f);
                    }
                    if (f.OmitNorms)
                    {
                        Add(noNorms, f);
                    }
                    if (f.OmitTermFreqAndPositions)
                    {
                        Add(noTf, f);
                    }
                    if (f.IsLazy)
                    {
                        Add(lazy, f);
                    }
                }
            }
            {
                nameValues = new System.Collections.Hashtable();
                nameValues[TEXT_FIELD_1_KEY]      = FIELD_1_TEXT;
                nameValues[TEXT_FIELD_2_KEY]      = FIELD_2_TEXT;
                nameValues[TEXT_FIELD_3_KEY]      = FIELD_3_TEXT;
                nameValues[KEYWORD_FIELD_KEY]     = KEYWORD_TEXT;
                nameValues[NO_NORMS_KEY]          = NO_NORMS_TEXT;
                nameValues[NO_TF_KEY]             = NO_TF_TEXT;
                nameValues[UNINDEXED_FIELD_KEY]   = UNINDEXED_FIELD_TEXT;
                nameValues[UNSTORED_FIELD_1_KEY]  = UNSTORED_1_FIELD_TEXT;
                nameValues[UNSTORED_FIELD_2_KEY]  = UNSTORED_2_FIELD_TEXT;
                nameValues[LAZY_FIELD_KEY]        = LAZY_FIELD_TEXT;
                nameValues[LAZY_FIELD_BINARY_KEY] = LAZY_FIELD_BINARY_BYTES;
                nameValues[LARGE_LAZY_FIELD_KEY]  = LARGE_LAZY_FIELD_TEXT;
                nameValues[TEXT_FIELD_UTF1_KEY]   = FIELD_UTF1_TEXT;
                nameValues[TEXT_FIELD_UTF2_KEY]   = FIELD_UTF2_TEXT;
            }
        }
        private Document CreateIndexDocuementForTicket(Ticket ticket)
        {
            var doc = new Document();

            var commentTexts = (from c in ticket.TicketComments
                                select c.Comment);
            StringBuilder sb = new StringBuilder();

            foreach (var c in commentTexts)
            {
                sb.AppendLine(c);
            }
            var commentText = sb.ToString();

            Lucene.Net.Documents.Field idField = new Lucene.Net.Documents.Field
                                                 (
                "ticketid",
                ticket.TicketId.ToString(),
                Lucene.Net.Documents.Field.Store.YES,
                Lucene.Net.Documents.Field.Index.NO,
                Lucene.Net.Documents.Field.TermVector.NO
                                                 );

            Lucene.Net.Documents.Field titleField = new Lucene.Net.Documents.Field
                                                    (
                "title",
                ticket.Title ?? string.Empty,
                Lucene.Net.Documents.Field.Store.YES,
                Lucene.Net.Documents.Field.Index.ANALYZED,
                Lucene.Net.Documents.Field.TermVector.YES
                                                    );
            titleField.SetBoost(1.5F);

            Lucene.Net.Documents.Field detailsField = new Lucene.Net.Documents.Field
                                                      (
                "details",
                ticket.Details ?? string.Empty,
                Lucene.Net.Documents.Field.Store.NO,
                Lucene.Net.Documents.Field.Index.ANALYZED,
                Lucene.Net.Documents.Field.TermVector.YES
                                                      );
            detailsField.SetBoost(1F);



            Lucene.Net.Documents.Field tagsField = new Lucene.Net.Documents.Field
                                                   (
                "tags",
                ticket.TagList ?? string.Empty,
                Lucene.Net.Documents.Field.Store.NO,
                Lucene.Net.Documents.Field.Index.ANALYZED,
                Lucene.Net.Documents.Field.TermVector.NO
                                                   );
            tagsField.SetBoost(2F);

            Lucene.Net.Documents.Field commentsField = new Lucene.Net.Documents.Field
                                                       (
                "comments",
                commentText ?? string.Empty,
                Lucene.Net.Documents.Field.Store.NO,
                Lucene.Net.Documents.Field.Index.ANALYZED,
                Lucene.Net.Documents.Field.TermVector.YES
                                                       );
            commentsField.SetBoost(.8F);


            doc.Add(idField);
            doc.Add(titleField);
            doc.Add(detailsField);
            doc.Add(tagsField);
            doc.Add(commentsField);
            if (ticket.CurrentStatus != "Closed")
            {
                doc.SetBoost(10F);
            }
            return(doc);
        }
Beispiel #49
0
        static DocHelper()
        {
            TextField1 = new Field(TextField1Key, Field1Text, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO);
            TextField2 = new Field(TextField2Key, Field2Text, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
            TextField3 = new Field(TextField3Key, Field3Text, Field.Store.YES, Field.Index.ANALYZED);
            {
                TextField3.OmitNorms = true;
            }
            KeyField     = new Field(KeywordFieldKey, KeywordText, Field.Store.YES, Field.Index.NOT_ANALYZED);
            NoNormsField = new Field(NoNormsKey, NoNormsText, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
            NoTfField    = new Field(NoTfKey, NoTfText, Field.Store.YES, Field.Index.ANALYZED);
            {
                NoTfField.OmitTermFreqAndPositions = true;
            }
            UnIndField     = new Field(UnindexedFieldKey, UnindexedFieldText, Field.Store.YES, Field.Index.NO);
            UnStoredField1 = new Field(UnstoredField1Key, Unstored1FieldText, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO);
            UnStoredField2 = new Field(UnstoredField2Key, Unstored2FieldText, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES);
            LazyField      = new Field(LazyFieldKey, LazyFieldText, Field.Store.YES, Field.Index.ANALYZED);
            TextUtfField1  = new Field(TextFieldUtf1Key, FieldUtf1Text, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO);
            TextUtfField2  = new Field(TextFieldUtf2Key, FieldUtf2Text, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
            Fields         = new Field[] { TextField1, TextField2, TextField3, KeyField, NoNormsField, NoTfField, UnIndField, UnStoredField1, UnStoredField2, TextUtfField1, TextUtfField2, LazyField, LazyFieldBinary, LargeLazyField };
            {
                //Initialize the large Lazy Field
                System.Text.StringBuilder buffer = new System.Text.StringBuilder();
                for (int i = 0; i < 10000; i++)
                {
                    buffer.Append("Lazily loading lengths of language in lieu of laughing ");
                }

                try
                {
                    LazyFieldBinaryBytes = System.Text.Encoding.UTF8.GetBytes("These are some binary field bytes");
                }
                catch (System.IO.IOException)
                {
                }
                LazyFieldBinary           = new Field(LazyFieldBinaryKey, LazyFieldBinaryBytes, Field.Store.YES);
                Fields[Fields.Length - 2] = LazyFieldBinary;
                LargeLazyFieldText        = buffer.ToString();
                LargeLazyField            = new Field(LargeLazyFieldKey, LargeLazyFieldText, Field.Store.YES, Field.Index.ANALYZED);

                Fields[Fields.Length - 1] = LargeLazyField;
                for (int i = 0; i < Fields.Length; i++)
                {
                    IFieldable f = Fields[i];
                    Add(All, f);
                    if (f.IsIndexed)
                    {
                        Add(Indexed, f);
                    }
                    else
                    {
                        Add(Unindexed, f);
                    }
                    if (f.IsTermVectorStored)
                    {
                        Add(Termvector, f);
                    }
                    if (f.IsIndexed && !f.IsTermVectorStored)
                    {
                        Add(Notermvector, f);
                    }
                    if (f.IsStored)
                    {
                        Add(Stored, f);
                    }
                    else
                    {
                        Add(Unstored, f);
                    }
                    if (f.OmitNorms)
                    {
                        Add(NoNorms, f);
                    }
                    if (f.OmitTermFreqAndPositions)
                    {
                        Add(NoTf, f);
                    }
                    if (f.IsLazy)
                    {
                        Add(Lazy, f);
                    }
                }
            }
            {
                NameValues = new System.Collections.Hashtable();
                NameValues[TextField1Key]      = Field1Text;
                NameValues[TextField2Key]      = Field2Text;
                NameValues[TextField3Key]      = Field3Text;
                NameValues[KeywordFieldKey]    = KeywordText;
                NameValues[NoNormsKey]         = NoNormsText;
                NameValues[NoTfKey]            = NoTfText;
                NameValues[UnindexedFieldKey]  = UnindexedFieldText;
                NameValues[UnstoredField1Key]  = Unstored1FieldText;
                NameValues[UnstoredField2Key]  = Unstored2FieldText;
                NameValues[LazyFieldKey]       = LazyFieldText;
                NameValues[LazyFieldBinaryKey] = LazyFieldBinaryBytes;
                NameValues[LargeLazyFieldKey]  = LargeLazyFieldText;
                NameValues[TextFieldUtf1Key]   = FieldUtf1Text;
                NameValues[TextFieldUtf2Key]   = FieldUtf2Text;
            }
        }
Beispiel #50
0
        public static Lucene.Net.Documents.Document ConvertToLuceneDocument(DocumentDto document)
        {
            //Convert WCF document to Lucene document
            var luceneDocument = new Lucene.Net.Documents.Document();

            foreach (var field in document.Fields)
            {
                Lucene.Net.Documents.Field.Index indexType;
                switch (field.Index)
                {
                case FieldIndexType.NotIndexed:
                    indexType = Field.Index.NO;
                    break;

                case FieldIndexType.Analyzed:
                    indexType = Field.Index.ANALYZED;
                    break;

                case FieldIndexType.AnalyzedNoNorms:
                    indexType = Field.Index.ANALYZED_NO_NORMS;
                    break;

                case FieldIndexType.NotAnalyzed:
                    indexType = Field.Index.NOT_ANALYZED;
                    break;

                case FieldIndexType.NotAnalyzedNoNorms:
                    indexType = Field.Index.NOT_ANALYZED_NO_NORMS;
                    break;

                default:
                    throw new ArgumentOutOfRangeException("Unknown or invalid field index type: " + field.Index);
                }

                Lucene.Net.Documents.Field.Store storeType;
                switch (field.Store)
                {
                case FieldStorageType.Stored:
                    storeType = Field.Store.YES;
                    break;

                case FieldStorageType.NotStored:
                    storeType = Field.Store.NO;
                    break;

                default:
                    throw new ArgumentOutOfRangeException("Unknown or invalid field store type: " + field.Store);
                }

                Lucene.Net.Documents.Field.TermVector termVectorType;
                switch (field.TermVector)
                {
                case FieldTermVectorType.Yes:
                    termVectorType = Field.TermVector.YES;
                    break;

                case FieldTermVectorType.WithOffsets:
                    termVectorType = Field.TermVector.WITH_OFFSETS;
                    break;

                case FieldTermVectorType.WithPositions:
                    termVectorType = Field.TermVector.WITH_POSITIONS;
                    break;

                case FieldTermVectorType.WithPositionsOffsets:
                    termVectorType = Field.TermVector.WITH_POSITIONS_OFFSETS;
                    break;

                case FieldTermVectorType.No:
                    termVectorType = Field.TermVector.NO;
                    break;

                default:
                    throw new ArgumentOutOfRangeException("Unknown or invalid field term vector type: " + field.TermVector);
                }

                IFieldable luceneField;

                if (field is StringFieldDto)
                {
                    var stringField = field as StringFieldDto;

                    luceneField = new Lucene.Net.Documents.Field(stringField.Name, true, stringField.Value,
                                                                 storeType, indexType, termVectorType);
                }
                else if (field is DateFieldDto)
                {
                    var dateField = field as DateFieldDto;

                    var dateString = DateTools.DateToString(dateField.Value, DateTools.Resolution.MILLISECOND);
                    luceneField = new Field(dateField.Name, dateString, storeType, Field.Index.NOT_ANALYZED, termVectorType);
                }
                else if (field is NumericFieldDto)
                {
                    var numericField = field as NumericFieldDto;

                    luceneField = new Lucene.Net.Documents.NumericField(numericField.Name, numericField.PrecisionStep,
                                                                        storeType,
                                                                        field.Index != FieldIndexType.NotIndexed);
                }
                else
                {
                    throw new NotImplementedException();
                }

                if (field.Boost.HasValue)
                {
                    luceneField.Boost = field.Boost.Value;
                }

                luceneDocument.Add(luceneField);
            }

            return(luceneDocument);
        }
Beispiel #51
0
        public virtual void  searchIndex(System.String dirName, System.String oldName)
        {
            //QueryParser parser = new QueryParser("contents", new WhitespaceAnalyzer());
            //Query query = parser.parse("handle:1");

            dirName = FullDir(dirName);

            Directory     dir      = FSDirectory.Open(new System.IO.DirectoryInfo(dirName));
            IndexSearcher searcher = new IndexSearcher(dir, true);
            IndexReader   reader   = searcher.IndexReader;

            _TestUtil.CheckIndex(dir);

            for (int i = 0; i < 35; i++)
            {
                if (!reader.IsDeleted(i))
                {
                    Document d      = reader.Document(i);
                    var      fields = d.GetFields();
                    if (!oldName.StartsWith("19.") && !oldName.StartsWith("20.") && !oldName.StartsWith("21.") && !oldName.StartsWith("22."))
                    {
                        if (d.GetField("content3") == null)
                        {
                            int numFields = oldName.StartsWith("29.") ? 7 : 5;
                            Assert.AreEqual(numFields, fields.Count);
                            Field f = d.GetField("id");
                            Assert.AreEqual("" + i, f.StringValue);

                            f = (Field)d.GetField("utf8");
                            Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.StringValue);

                            f = (Field)d.GetField("autf8");
                            Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.StringValue);

                            f = (Field)d.GetField("content2");
                            Assert.AreEqual("here is more content with aaa aaa aaa", f.StringValue);

                            f = (Field)d.GetField("fie\u2C77ld");
                            Assert.AreEqual("field with non-ascii name", f.StringValue);
                        }
                    }
                }
                // Only ID 7 is deleted
                else
                {
                    Assert.AreEqual(7, i);
                }
            }

            ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;

            // First document should be #21 since it's norm was
            // increased:
            Document d2 = searcher.Doc(hits[0].Doc);

            Assert.AreEqual("21", d2.Get("id"), "didn't get the right document first");

            TestHits(hits, 34, searcher.IndexReader);

            if (!oldName.StartsWith("19.") && !oldName.StartsWith("20.") && !oldName.StartsWith("21.") && !oldName.StartsWith("22."))
            {
                // Test on indices >= 2.3
                hits = searcher.Search(new TermQuery(new Term("utf8", "\u0000")), null, 1000).ScoreDocs;
                Assert.AreEqual(34, hits.Length);
                hits = searcher.Search(new TermQuery(new Term("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne")), null, 1000).ScoreDocs;
                Assert.AreEqual(34, hits.Length);
                hits = searcher.Search(new TermQuery(new Term("utf8", "ab\ud917\udc17cd")), null, 1000).ScoreDocs;
                Assert.AreEqual(34, hits.Length);
            }

            searcher.Close();
            dir.Close();
        }
        /// <summary>
        /// Indexes a given string into the index
        /// </summary>
        /// </summary>
        /// <param name="text">The text to index</param>
        public void IndexText(string path)
        {
            System.IO.DirectoryInfo root  = new System.IO.DirectoryInfo(path); // Create DirectoryInfo object
            System.IO.FileInfo[]    files = null;                              // Create FileInfo array

            // Get all files in the directory
            try
            {
                files = root.GetFiles("*.*"); //Access Each files in the Directory
            }

            catch (UnauthorizedAccessException e)
            {
                System.Console.WriteLine(e.Message);
            }

            catch (System.IO.DirectoryNotFoundException e)
            {
                Console.WriteLine(e.Message);
            }

            if (files != null)
            {
                foreach (System.IO.FileInfo fil in files)
                {
                    string name = fil.FullName;    // Get file name



                    StreamReader reader = new StreamReader(name);                                                                    // Create a reader
                    string       text   = reader.ReadToEnd();                                                                        // Read the whole text

                    int    indexT = text.IndexOf(".T");                                                                              // Get title starting index
                    int    indexA = text.IndexOf(".A");                                                                              // Get author starting index
                    int    indexB = text.IndexOf(".B");                                                                              // Get bibliography starting index
                    string title  = text.Substring(indexT + 3, ((indexA - 1 - (indexT + 3)) > 0) ? (indexA - 1 - (indexT + 3)) : 0); // Get title string
                    string author = text.Substring(indexA + 3, ((indexB - 1 - (indexA + 3)) > 0) ? (indexB - 1 - (indexA + 3)) : 0); // Get author string


                    //This section is focused on removing the title from the abstract
                    int startTitle    = text.IndexOf(".T\n") + 2;      // Get title starting index
                    int startAbstract = text.IndexOf(".A\n") - 1;      // Get index before author starting
                    int startWords    = text.IndexOf(".W\n");          // Get Words Starting index
                    int lengthOfTitle = startAbstract - startTitle;    //Calculate length of title
                    text = text.Remove(startWords + 2, lengthOfTitle); //Remove title from Words section.


                    // Indexing with the fields
                    Lucene.Net.Documents.Document doc = new Document();                                                                                                                                                // Create document
                    doc.Add(new Lucene.Net.Documents.Field(TEXT_FN, text, Field.Store.YES, Field.Index.ANALYZED_NO_NORMS, Field.TermVector.WITH_POSITIONS_OFFSETS));                                                   //For Field Names
                    Lucene.Net.Documents.Field titleFieldWithBoost  = new Lucene.Net.Documents.Field(TEXT_FN_TITLE, title, Field.Store.YES, Field.Index.ANALYZED_NO_NORMS, Field.TermVector.WITH_POSITIONS_OFFSETS);   //For Field Title
                    Lucene.Net.Documents.Field authorFieldWithBoost = new Lucene.Net.Documents.Field(TEXT_FN_AUTHOR, author, Field.Store.YES, Field.Index.ANALYZED_NO_NORMS, Field.TermVector.WITH_POSITIONS_OFFSETS); //For Field Author

                    /// Field level boosting of the Search.
                    ///
                    authorFieldWithBoost.Boost = 3; // default is 1. So we set to 3  -- the default value has the effect of ignoring the boost number
                    titleFieldWithBoost.Boost  = 4; // default is 1, we set it to 4
                    doc.Add(titleFieldWithBoost);
                    doc.Add(authorFieldWithBoost);  //doc.Add(new Lucene.Net.Documents.Field(TEXT_FN_AUTHOR, author, Field.Store.YES, Field.Index.ANALYZED_NO_NORMS, Field.TermVector.NO)); //For Field Author

                    writer.AddDocument(doc);        // Add document
                    reader.Close();
                }
            }
        }
Beispiel #53
0
        /// <summary>
        /// Indexes a given string into the index
        /// </summary>
        /// <param name="text">The text to index</param>
        public void IndexText(string path)
        {
            System.IO.DirectoryInfo root  = new System.IO.DirectoryInfo(path); // Create DirectoryInfo object
            System.IO.FileInfo[]    files = null;                              // Create FileInfo array

            // Get all files in the directory
            try
            {
                files = root.GetFiles("*.txt");
            }

            catch (UnauthorizedAccessException e)
            {
                System.Console.WriteLine(e.Message);
            }

            catch (System.IO.DirectoryNotFoundException e)
            {
                Console.WriteLine(e.Message);
            }

            if (files != null)
            {
                foreach (System.IO.FileInfo fi in files)
                {
                    string name = fi.Name;                                            // Get file name
                    Console.WriteLine("Adding doc " + name + " to Index");
                    StreamReader reader = new StreamReader(fi.FullName);              // Create a reader
                    string       text   = reader.ReadToEnd();                         // Read the whole text

                    Regex           rxi    = new Regex(".I ", RegexOptions.Compiled); // Set the RE to match first sentence of abstract
                    Regex           rxa    = new Regex(".A\r\n", RegexOptions.Compiled);
                    Regex           rxb    = new Regex(".B\r\n", RegexOptions.Compiled);
                    Regex           rxt    = new Regex(".T\r\n", RegexOptions.Compiled);
                    Regex           rxw    = new Regex(".W\r\n", RegexOptions.Compiled);
                    MatchCollection abst_i = rxi.Matches(text);
                    MatchCollection abst_a = rxa.Matches(text);
                    MatchCollection abst_b = rxb.Matches(text);
                    MatchCollection abst_t = rxt.Matches(text);
                    MatchCollection abst_w = rxw.Matches(text);

                    Console.WriteLine("This Length is:  " + abst_i.Count);
                    Console.WriteLine("This Length is:  " + abst_a.Count);
                    Console.WriteLine("This Length is:  " + abst_b.Count);
                    Console.WriteLine("This Length is:  " + abst_t.Count);
                    Console.WriteLine("This Length is:  " + abst_w.Count);

                    if (abst_i.Count > 0 && abst_a.Count > 0 && abst_b.Count > 0 && abst_t.Count > 0 && abst_w.Count > 0)
                    {
                        int indexI = text.IndexOf(".I ");                                                                                // Get ID starting index
                        if (abst_i.Count > 1)                                                                                            // When having more than 1 .I
                        {
                            text = text.Substring(0, indexI + 3) + text.Substring(indexI + 3).Replace(".I ", " ");                       // Remove the others except the first one
                        }
                        int indexT = text.IndexOf(".T\r\n");                                                                             // Get title starting index
                        if (abst_t.Count > 1)                                                                                            // When having more than 1 .T
                        {
                            text = text.Substring(0, indexT + 3) + text.Substring(indexT + 3).Replace(".T\r\n", "");                     // Remove the others except the first one
                        }
                        int indexA = text.IndexOf(".A\r\n");                                                                             // Get author starting index
                        if (abst_a.Count > 1)                                                                                            // When having more than 1 .A
                        {
                            text = text.Substring(0, indexA + 3) + text.Substring(indexA + 3).Replace(".A\r\n", "");                     // Remove the others except the first one
                        }
                        int indexB = text.IndexOf(".B\r\n");                                                                             // Get bibliography starting index
                        if (abst_b.Count > 1)                                                                                            // When having more than 1 .B
                        {
                            text = text.Substring(0, indexB + 3) + text.Substring(indexB + 3).Replace(".B\r\n", "");                     // Remove the others except the first one
                        }
                        int indexW = text.IndexOf(".W\r\n");                                                                             // Get abstract starting index
                        if (abst_w.Count > 1)                                                                                            // When having more than 1 .W
                        {
                            text = text.Substring(0, indexW + 3) + text.Substring(indexW + 3).Replace(".W\n", "");                       // Remove the others except the first one
                        }
                        indexA = text.IndexOf(".A\r\n");                                                                                 // Get again the index just in case it has been changed
                        indexB = text.IndexOf(".B\r\n");                                                                                 // Get again the index just in case it has been changed
                        string title  = text.Substring(indexT + 3, ((indexA - 1 - (indexT + 3)) > 0) ? (indexA - 1 - (indexT + 3)) : 0); // Get title string
                        string author = text.Substring(indexA + 3, ((indexB - 1 - (indexA + 3)) > 0) ? (indexB - 1 - (indexA + 3)) : 0); // Get author string

                        //This section is focused on removing the title from the abstract
                        int startTitle    = text.IndexOf(".T\r\n") + 2;    // Get title starting index
                        int startAbstract = text.IndexOf(".A\r\n") - 1;    // Get index before author starting
                        int startWords    = text.IndexOf(".W\r\n");        // Get Words Starting index
                        int lengthOfTitle = startAbstract - startTitle;    //Calculate length of title
                        text = text.Remove(startWords + 2, lengthOfTitle); //Remove title from Words section.

                        // Indexing by using the fields
                        Lucene.Net.Documents.Document doc         = new Document();                                                                                                                         // Create document
                        Lucene.Net.Documents.Field    titleField  = new Lucene.Net.Documents.Field(TEXT_FN_TITLE, title, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);   //Indexing field title
                        Lucene.Net.Documents.Field    authorField = new Lucene.Net.Documents.Field(TEXT_FN_AUTHOR, author, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); //Indexing field author
                        doc.Add(new Lucene.Net.Documents.Field(TEXT_FN, text, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));                                             //indexing field text
                        doc.Add(titleField);
                        doc.Add(authorField);
                        writer.AddDocument(doc);    // Add document
                        reader.Close();
                    }
                    else
                    {
                        Console.WriteLine(name);
                        exFile.Add(name);
                    }
                }
                var message = string.Join(Environment.NewLine, exFile);
                MessageBox.Show("The following files are excluded from the index because of the incorrect format:\n" + message);
            }
        }