public void Save(SearchDocument doc) { if (doc == null) throw new ArgumentNullException(nameof(doc)); if (doc.Fields == null || doc.Fields.Count == 0) return; var document = new Document(); foreach (var docField in doc.Fields) { if (string.IsNullOrWhiteSpace(docField.FieldName)) throw new Exception("Field name cannot be empty"); if (string.IsNullOrWhiteSpace(docField.Value)) continue; var field = new Field(docField.FieldName, docField.Value, Field.Store.YES, Field.Index.ANALYZED); document.Add(field); } var writer = new IndexWriter( _directory, new StandardAnalyzer(global::Lucene.Net.Util.Version.LUCENE_30), !_created, IndexWriter.MaxFieldLength.UNLIMITED); using (writer) { writer.AddDocument(document); } _created = true; }
public static IndexWriter CreateIndex(Content[] contents) { var v = Lucene.Net.Util.Version.LUCENE_30; var l = Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED; var d = FSDirectory.Open(new DirectoryInfo(IndexPath)); IndexWriter writer = new IndexWriter(d, new StandardAnalyzer(v), l); try { foreach (var item in contents) { Document doc = new Document(); Field id = new Field("id", item.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED); Field title = new Field("title", item.Title, Field.Store.YES, Field.Index.ANALYZED); Field username = new Field("username", item.User.UserName, Field.Store.YES, Field.Index.ANALYZED); doc.Add(id); doc.Add(title); doc.Add(username); writer.AddDocument(doc); } writer.Optimize(); writer.Dispose(); } catch (System.Exception ex) { } return writer; }
public void TestReadersWriters() { Directory dir; using(dir = new RAMDirectory()) { Document doc; IndexWriter writer; IndexReader reader; using (writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED)) { Field field = new Field("name", "value", Field.Store.YES,Field.Index.ANALYZED); doc = new Document(); doc.Add(field); writer.AddDocument(doc); writer.Commit(); using (reader = writer.GetReader()) { IndexReader r1 = reader.Reopen(); } Assert.Throws<AlreadyClosedException>(() => reader.Reopen(), "IndexReader shouldn't be open here"); } Assert.Throws<AlreadyClosedException>(() => writer.AddDocument(doc), "IndexWriter shouldn't be open here"); Assert.IsTrue(dir.isOpen_ForNUnit, "RAMDirectory"); } Assert.IsFalse(dir.isOpen_ForNUnit, "RAMDirectory"); }
void Index() { Lucene.Net.Index.IndexWriter wr = new Lucene.Net.Index.IndexWriter(dir, new Lucene.Net.Analysis.WhitespaceAnalyzer(), Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED); Lucene.Net.Documents.Document doc = null; Lucene.Net.Documents.Field f = null; doc = new Lucene.Net.Documents.Document(); f = new Lucene.Net.Documents.Field("field", "a b c d", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED); doc.Add(f); wr.AddDocument(doc); doc = new Lucene.Net.Documents.Document(); f = new Lucene.Net.Documents.Field("field", "a b a d", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED); doc.Add(f); wr.AddDocument(doc); doc = new Lucene.Net.Documents.Document(); f = new Lucene.Net.Documents.Field("field", "a b e f", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED); doc.Add(f); wr.AddDocument(doc); doc = new Lucene.Net.Documents.Document(); f = new Lucene.Net.Documents.Field("field", "x y z", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED); doc.Add(f); wr.AddDocument(doc); wr.Close(); }
/// <summary> /// Create a Field /// </summary> /// <param name="name"> /// The name. /// </param> /// <param name="value"> /// The value. /// </param> /// <param name="storageType"> /// The storage type. /// </param> /// <param name="indexType"> /// The index type. /// </param> /// <param name="vectorType"> /// The vector type. /// </param> /// <param name="boost"> /// The boost. /// </param> /// <returns> /// Abstract Field /// </returns> protected AbstractField CreateField(string name, string value, LuceneField.Store storageType, LuceneField.Index indexType, LuceneField.TermVector vectorType, float boost) { var field = new LuceneField(name, value, storageType, indexType, vectorType); field.SetBoost(boost); return(field); }
public Document ToDocument() { var document = new Document(); var hash = new Field(ExtensionHashField, ModelHelpers.GetMD5Hash(ToString()), Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO); hash.Boost = ExtensionHashFieldBoost; var name = new Field(ExtensionNameField, Name, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO); name.Boost = ExtensionNameFieldBoost; var fullName = new Field(ExtensionFullNameField, FullName, Field.Store.YES, Field.Index.NO, Field.TermVector.NO); var ns = new Field(ExtensionNamespaceField, Namespace, Field.Store.YES, Field.Index.NO, Field.TermVector.NO); var assemblyName = new Field(ExtensionAssemblyNameField, AssemblyName, Field.Store.YES, Field.Index.NO, Field.TermVector.NO); var packageName = new Field(ExtensionPackageNameField, PackageName, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO); var packageVersion = new Field(ExtensionPackageVersionField, PackageVersion, Field.Store.YES, Field.Index.NO, Field.TermVector.NO); var targetFrameworks = new Field(ExtensionTargetFrameworksField, GetTargetFrameworksString(), Field.Store.YES, Field.Index.NO, Field.TermVector.NO); document.Add(hash); document.Add(name); document.Add(fullName); document.Add(ns); document.Add(assemblyName); document.Add(packageName); document.Add(packageVersion); document.Add(targetFrameworks); return document; }
private void EnsureDocument() { if (document == null) { document = new Document(); var fields = DocumentBuilder.GetFields(TypeName); foreach (var f in fields) { AbstractField af = null; switch (f.FieldType) { case FieldType.Int: case FieldType.Float: case FieldType.Long: case FieldType.Double: af = new LN.Documents.NumericField(f.FieldName, (LN.Documents.Field.Store)((int)f.StoreMode), (int)f.IndexMode > 0); break; case FieldType.String: case FieldType.DateTime: default: af = new LN.Documents.Field(f.FieldName, string.Empty, (LN.Documents.Field.Store)((int)f.StoreMode), (LN.Documents.Field.Index)((int)f.IndexMode)); break; } af.Boost = f.Boost; document.Add(af); } } }
protected override Document BuildLuceneDocument(Document document) { Field.Store storeContent = Field.Store.YES; #if DEBUG storeContent = Field.Store.YES; #endif var fieldTitle = new Field("title", photo.Title, storeContent, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); document.Add(fieldTitle); var fieldTitleSort = new Field("title_sort", photo.Title, storeContent, Field.Index.NOT_ANALYZED_NO_NORMS); document.Add(fieldTitleSort); var fieldAuthor = new Field("owner", photo.Owner ?? String.Empty, storeContent, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); document.Add(fieldAuthor); var ownerId = new Field("owner_id", photo.OwnerId.ToString(), storeContent, Field.Index.NOT_ANALYZED_NO_NORMS); document.Add(ownerId); var fieldAuthorIndex = new Field("owner_index", photo.Owner ?? String.Empty, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); document.Add(fieldAuthorIndex); var fieldDescription = new Field("description", photo.Description ?? String.Empty, storeContent, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); document.Add(fieldDescription); return document; }
private void AddDoc(IndexWriter iw, int i) { Document d = new Document(); IFieldable f; int scoreAndID = i + 1; f = new Field(ID_FIELD, Id2String(scoreAndID), Field.Store.YES, Field.Index.NOT_ANALYZED); // for debug purposes f.OmitNorms = true; d.Add(f); f = new Field(TEXT_FIELD, "text of doc" + scoreAndID + TextLine(i), Field.Store.NO, Field.Index.ANALYZED); // for regular search f.OmitNorms = true; d.Add(f); f = new Field(INT_FIELD, "" + scoreAndID, Field.Store.NO, Field.Index.NOT_ANALYZED); // for function scoring f.OmitNorms = true; d.Add(f); f = new Field(FLOAT_FIELD, scoreAndID + ".000", Field.Store.NO, Field.Index.NOT_ANALYZED); // for function scoring f.OmitNorms = true; d.Add(f); iw.AddDocument(d); Log("added: " + d); }
public static Field.Index GetIndex(this IndexDefinition self, string name, Field.Index? defaultIndex) { if (self.Indexes == null) return defaultIndex ?? Field.Index.ANALYZED_NO_NORMS; FieldIndexing value; if (self.Indexes.TryGetValue(name, out value) == false) { if (self.Indexes.TryGetValue(Constants.AllFields, out value) == false) { string ignored; if (self.Analyzers.TryGetValue(name, out ignored) || self.Analyzers.TryGetValue(Constants.AllFields, out ignored)) { return Field.Index.ANALYZED; // if there is a custom analyzer, the value should be analyzed } return defaultIndex ?? Field.Index.ANALYZED_NO_NORMS; } } switch (value) { case FieldIndexing.No: return Field.Index.NO; case FieldIndexing.Analyzed: return Field.Index.ANALYZED_NO_NORMS; case FieldIndexing.NotAnalyzed: return Field.Index.NOT_ANALYZED_NO_NORMS; case FieldIndexing.Default: return defaultIndex ?? Field.Index.ANALYZED_NO_NORMS; default: throw new ArgumentOutOfRangeException(); } }
public virtual void TestLUCENE_1590() { Document doc = new Document(); // f1 has no norms doc.Add(new Field("f1", "v1", Field.Store.NO, Field.Index.ANALYZED_NO_NORMS)); doc.Add(new Field("f1", "v2", Field.Store.YES, Field.Index.NO)); // f2 has no TF Field f = new Field("f2", "v1", Field.Store.NO, Field.Index.ANALYZED); f.OmitTermFreqAndPositions = true; doc.Add(f); doc.Add(new Field("f2", "v2", Field.Store.YES, Field.Index.NO)); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED); writer.AddDocument(doc); writer.Optimize(); // be sure to have a single segment writer.Close(); _TestUtil.CheckIndex(dir); SegmentReader reader = SegmentReader.GetOnlySegmentReader(dir); FieldInfos fi = reader.FieldInfos(); // f1 Assert.IsFalse(reader.HasNorms("f1"), "f1 should have no norms"); Assert.IsFalse(fi.FieldInfo("f1").omitTermFreqAndPositions_ForNUnit, "omitTermFreqAndPositions field bit should not be set for f1"); // f2 Assert.IsTrue(reader.HasNorms("f2"), "f2 should have norms"); Assert.IsTrue(fi.FieldInfo("f2").omitTermFreqAndPositions_ForNUnit, "omitTermFreqAndPositions field bit should be set for f2"); }
internal static void AddField(this Document doc, string propertyName, string propertyValue, Field.Store fieldStore, Field.Index fieldIndex) { if (string.IsNullOrWhiteSpace(propertyValue)) return; doc.Add(new Field(propertyName, propertyValue, fieldStore, fieldIndex)); }
/// <summary> /// create document from SampleData /// </summary> /// <param name="obj"></param> /// <returns></returns> private static Document CreateDocument(SampleData obj) { // add lucene fields mapped to db fields var doc = new Document(); Field f = new Field("ArtNo", obj.ArtNo, Field.Store.NO, Field.Index.ANALYZED); f.SetBoost(2F); doc.Add(f); f = new Field("Id", obj.Id.ToString(CultureInfo.InvariantCulture), Field.Store.YES, Field.Index.NOT_ANALYZED); f.SetBoost(1); doc.Add(f); f = new Field("Name", obj.Name, Field.Store.NO, Field.Index.ANALYZED); f.SetBoost(.1F); doc.Add(f); f = new Field("NameWithWiteSpace", obj.Name.RemoveSymbols(" "), Field.Store.NO, Field.Index.ANALYZED); f.SetBoost(.1F); doc.Add(f); return(doc); }
public virtual void TestMultiValueSource() { Directory dir = new MockRAMDirectory(); IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); Document doc = new Document(); Field f = new Field("field", "", Field.Store.NO, Field.Index.NOT_ANALYZED); doc.Add(f); for (int i = 0; i < 17; i++) { f.SetValue("" + i); w.AddDocument(doc); w.Commit(); } IndexReader r = w.GetReader(); w.Close(); Assert.IsTrue(r.GetSequentialSubReaders().Length > 1); ValueSource s1 = new IntFieldSource("field"); DocValues v1 = s1.GetValues(r); DocValues v2 = new MultiValueSource(s1).GetValues(r); for (int i = 0; i < r.MaxDoc(); i++) { Assert.AreEqual(v1.IntVal(i), i); Assert.AreEqual(v2.IntVal(i), i); } Lucene.Net.Search.FieldCache_Fields.DEFAULT.PurgeAllCaches(); r.Close(); dir.Close(); }
private void AddMetaDataField(Document doc, Term term, int[] meta) { IntMetaDataTokenStream tokenStream = new IntMetaDataTokenStream(term.Text); tokenStream.SetMetaData(meta); Field field = new Field(term.Field, tokenStream); doc.Add(field); }
public void CreateIndex() { IProductService productService = new ProductService(); int count = productService.GetProductCount(string.Empty); var data = productService.GetProducts(count, 1, string.Empty); //设置为多文件索引的格式,默认情况下为true,会建立复合索引的文件结构,这里为了分析,先设置为false,生成多文件的索引结构 //this.indexWriter.SetUseCompoundFile(false); foreach (var productInfo in data) { var doc = new Document(); var field1 = new Field("title", productInfo.Title, Field.Store.YES, Field.Index.ANALYZED); // 向文档中添加域 doc.Add(field1); field1 = new Field("Category", productInfo.CategoryName, Field.Store.YES, Field.Index.ANALYZED); doc.Add(field1); field1 = new Field("Desc", productInfo.Desc??"", Field.Store.YES, Field.Index.ANALYZED); doc.Add(field1); this.indexWriter.AddDocument(doc); } // 优化索引结构 this.indexWriter.Optimize(); this.indexWriter.Commit(); // 关闭写入 this.indexWriter.Close(); }
public void Set(string name, object value, Document document, Field.Store store, Field.Index index, float? boost) { DateTime date = (DateTime) value; int year = date.Year; int month = date.Month; int day = date.Day; // set year Field field = new Field(name + ".year", year.ToString(), store, index); if (boost != null) { field.SetBoost(boost.Value); } document.Add(field); // set month and pad it if necessary field = new Field(name + ".month", month.ToString("D2"), store, index); if (boost != null) { field.SetBoost(boost.Value); } document.Add(field); // set day and pad it if necessary field = new Field(name + ".day", day.ToString("D2"), store, index); if (boost != null) { field.SetBoost(boost.Value); } document.Add(field); throw new NotImplementedException(); }
public Document BuildRecord() { var doc = new Document(); var numericField = new NumericField("DatabaseID", Field.Store.YES, false); numericField.SetIntValue(Email.ID); doc.Add(numericField); var field = new Field("UniqueID", UniqueID, Field.Store.YES, Field.Index.NOT_ANALYZED); doc.Add(field); field = new Field("Title", Title, Field.Store.YES, Field.Index.NOT_ANALYZED); doc.Add(field); field = new Field("Description", Description, Field.Store.YES, Field.Index.NOT_ANALYZED); doc.Add(field); field = new Field("Type", Type, Field.Store.YES, Field.Index.ANALYZED); doc.Add(field); /* field = new Field("Name", EventDescription.Name, Field.Store.YES, Field.Index.ANALYZED); doc.Add(field);*/ return doc; }
public static Field CreateInstance(object data, XmlNode node) { if (data == null) { throw new ArgumentNullException("data"); } if (node == null) { throw new ArgumentNullException("node"); } String name = node.Attributes["name"].Value.ToLower(); String datasource = node.Attributes["datasource"].Value; String store = node.Attributes["store"].Value; String index = node.Attributes["index"].Value; Lucene.Net.Documents.Field.Store st; Enum.TryParse<Lucene.Net.Documents.Field.Store>(store,out st); Lucene.Net.Documents.Field.Index idx ; Enum.TryParse<Lucene.Net.Documents.Field.Index>(index,out idx); String value = getData(data,datasource); //name = name.ToLower(); Field ret = new Field(name,value,st,idx); return ret; }
public void Test_IndexReader_IsCurrent() { RAMDirectory ramDir = new RAMDirectory(); IndexWriter writer = new IndexWriter(ramDir, new KeywordAnalyzer(), true, new IndexWriter.MaxFieldLength(1000)); Field field = new Field("TEST", "mytest", Field.Store.YES, Field.Index.ANALYZED); Document doc = new Document(); doc.Add(field); writer.AddDocument(doc); IndexReader reader = writer.GetReader(); writer.DeleteDocuments(new Lucene.Net.Index.Term("TEST", "mytest")); Assert.IsFalse(reader.IsCurrent()); int resCount1 = new IndexSearcher(reader).Search(new TermQuery(new Term("TEST", "mytest")),100).TotalHits; Assert.AreEqual(1, resCount1); writer.Commit(); Assert.IsFalse(reader.IsCurrent()); int resCount2 = new IndexSearcher(reader).Search(new TermQuery(new Term("TEST", "mytest")),100).TotalHits; Assert.AreEqual(1, resCount2, "Reopen not invoked yet, resultCount must still be 1."); reader = reader.Reopen(); Assert.IsTrue(reader.IsCurrent()); int resCount3 = new IndexSearcher(reader).Search(new TermQuery(new Term("TEST", "mytest")), 100).TotalHits; Assert.AreEqual(0, resCount3, "After reopen, resultCount must be 0."); reader.Close(); writer.Dispose(); }
public IEnumerable<AbstractField> Index(RavenJObject document, Field.Store defaultStorage) { return from property in document where property.Key != Constants.DocumentIdFieldName from field in CreateFields(property.Key, GetPropertyValue(property.Value), defaultStorage) select field; }
protected override Document BuildLuceneDocument(Document document) { Field.Store storeContent = Field.Store.YES; #if DEBUG storeContent = Field.Store.YES; #endif var name = new Field("author_name", author.Name, storeContent, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); document.Add(name); var fieldNameSort = new Field("author_sort", author.Name, storeContent, Field.Index.NOT_ANALYZED_NO_NORMS); document.Add(fieldNameSort); var fieldName = new Field("author_firstName", author.FirstName, storeContent, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); document.Add(fieldName); var lastName = new Field("author_lastName", author.LastName, storeContent, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); document.Add(lastName); var fieldBiography = new Field("biography", author.Biography ?? String.Empty, storeContent, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); document.Add(fieldBiography); var fieldAuthorId = new Field("author_id", author.ElanId.ToString(), storeContent, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); document.Add(fieldAuthorId); return document; }
/// <summary> /// Adds a field to the index, bit more configurable than the other helper methods, but more verbose as a consequence. /// </summary> /// <param name="document">The document to add the field to </param> /// <param name="fieldName">The name of the field to add</param> /// <param name="value">The value of the field</param> /// <param name="store">A boolean denoting whether to store the value in the index - allows retrieval of the original value from the index</param> /// <param name="caseSensitive">Whether to store the value in its original case</param> /// <param name="index">The type of indexing to apply to the field</param> /// <returns>The input document object</returns> public static Document AddField(this Document document, string fieldName, string value, bool caseSensitive, Field.Store store, Field.Index index) { if (value == null) { return document; } if (store == null) { store = Field.Store.NO; } if (!caseSensitive) { value = value.ToLower(); } if (index == null) { index = Field.Index.ANALYZED; } Field field = new Field(fieldName, value, store, index); document.Add(field); return document; }
public void MyTestMethod_index() { string strIndexDir = @"D:\Index"; Lucene.Net.Store.Directory indexDir = Lucene.Net.Store.FSDirectory.Open(new System.IO.DirectoryInfo(strIndexDir)); Analyzer std = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30); //Version parameter is used for backward compatibility. Stop words can also be passed to avoid indexing certain words using (IndexWriter idxw = new IndexWriter(indexDir, std, true, IndexWriter.MaxFieldLength.UNLIMITED)) //Create an Index writer object. { Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); //var file = System.IO.File.ReadAllText( // @"d:\test.txt"); Lucene.Net.Documents.Field fldText = new Lucene.Net.Documents.Field("text", file, Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index. ANALYZED, Lucene.Net.Documents.Field. TermVector.YES); doc.Add(fldText); doc.Add(new Field("addtime", System.DateTime.Now.ToString(), Lucene.Net.Documents.Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES)); //write the document to the index idxw.AddDocument(doc); //optimize and close the writer idxw.Optimize(); } Console.WriteLine("Indexing Done"); }
/// <summary> /// Кастомизация индекса /// </summary> /// <param name="sender"></param> /// <param name="e"></param> private void InsertExternalFields(object sender, Examine.LuceneEngine.DocumentWritingEventArgs e) { if (!e.Fields.ContainsKey("criteria")) return; var criteriaString = e.Fields["criteria"]; var xCriteriaField = new Field("xCriteria", criteriaString.Replace(',', ' '), Field.Store.YES, Field.Index.ANALYZED_NO_NORMS, Field.TermVector.NO); e.Document.Add(xCriteriaField); }
public IEnumerable<AbstractField> Index(object val, PropertyDescriptorCollection properties, Field.Store defaultStorage) { return from property in properties.Cast<PropertyDescriptor>() where property.Name != Constants.DocumentIdFieldName from field in CreateFields(property.Name, property.GetValue(val), defaultStorage) select field; }
private void AddTextField(Document doc, string fieldName, string[] sections) { for (int i = 0; i < sections.Length; i++) { Field field = new Field(fieldName, new SectionTokenStream(analyzer.TokenStream(fieldName, new System.IO.StringReader(sections[i])), i)); doc.Add(field); } }
private void EnsureWriterHasChanges() { var doc = new Lucene.Net.Documents.Document(); var field = new Lucene.Net.Documents.Field("Path", "/root/indexing_writinggapandgettingunprocessedactivitiesswithgap/fake", LucField.Store.YES, LucField.Index.NOT_ANALYZED, LucField.TermVector.NO); doc.Add(field); LuceneManager._writer.AddDocument(doc); }
public static Document AddField(this Document document, string name, string value, Field.Store store, Field.Index index) { if (String.IsNullOrEmpty(value)) return document; document.Add(new Field(name, value, store, index)); return document; }
// Activity 9 public void IndexText(string text) { // TODO: Enter code to index text Lucene.Net.Documents.Field field = new Lucene.Net.Documents.Field("text", text, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); doc.Add(field); writer.AddDocument(doc); }
public IndexFieldInfo(string name, string value, FieldInfoType type, Field.Store store, Field.Index index, Field.TermVector termVector) { Name = name; Value = value; Type = type; Store = store; Index = index; TermVector = termVector; }
private void AddMetaDataField(Document doc, string name, string[] vals) { foreach (string val in vals) { Field field = new Field(name, val, Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS); field.OmitTermFreqAndPositions = (true); doc.Add(field); } }
protected internal static Document Doc(Field[] fields) { Document doc = new Document(); for (int i = 0; i < fields.Length; i++) { doc.Add(fields[i]); } return doc; }
public static IEnumerable<AbstractField> Index(object val, PropertyDescriptorCollection properties, IndexDefinition indexDefinition, Field.Store defaultStorage) { return (from property in properties.Cast<PropertyDescriptor>() let name = property.Name where name != Constants.DocumentIdFieldName let value = property.GetValue(val) from field in CreateFields(name, value, indexDefinition, defaultStorage) select field); }
public IEnumerable<AbstractField> Index(RavenJObject document, Field.Store defaultStorage) { return (from property in document let name = property.Key where name != Constants.DocumentIdFieldName let value = GetPropertyValue(property.Value) from field in CreateFields(name, value, defaultStorage) select field); }
public static IEnumerable<AbstractField> Index(object val, PropertyDescriptorCollection properties, IndexDefinition indexDefinition, Field.Store defaultStorage) { return (from property in properties.Cast<PropertyDescriptor>() let name = property.Name where name != "__document_id" let value = property.GetValue(val) where value != null select Createfield(name, value, indexDefinition, defaultStorage)); }
public static IEnumerable<AbstractField> Index(JObject document, IndexDefinition indexDefinition, Field.Store defaultStorage) { return (from property in document.Cast<JProperty>() let name = property.Name where name != Constants.DocumentIdFieldName let value = GetPropertyValue(property) from field in CreateFields(name, value, indexDefinition, defaultStorage) select field); }
public static IEnumerable<AbstractField> Index(JObject document, IndexDefinition indexDefinition, Field.Store defaultStorage) { return (from property in document.Cast<JProperty>() let name = property.Name where name != "__document_id" let value = GetPropertyValue(property) where value != null select Createfield(name, value, indexDefinition, defaultStorage)); }
protected internal AbstractField(System.String name, Field.Store store, Field.Index index, Field.TermVector termVector) { if (name == null) throw new System.NullReferenceException("name cannot be null"); this.name = String.Intern(name); // field names are interned if (store == Field.Store.YES) { this.isStored = true; this.isCompressed = false; } else if (store == Field.Store.COMPRESS) { this.isStored = true; this.isCompressed = true; } else if (store == Field.Store.NO) { this.isStored = false; this.isCompressed = false; } else { throw new System.ArgumentException("unknown store parameter " + store); } if (index == Field.Index.NO) { this.isIndexed = false; this.isTokenized = false; } else if (index == Field.Index.TOKENIZED) { this.isIndexed = true; this.isTokenized = true; } else if (index == Field.Index.UN_TOKENIZED) { this.isIndexed = true; this.isTokenized = false; } else if (index == Field.Index.NO_NORMS) { this.isIndexed = true; this.isTokenized = false; this.omitNorms = true; } else { throw new System.ArgumentException("unknown index parameter " + index); } this.isBinary = false; SetStoreTermVector(termVector); }
public void Test_Index_ReusableStringReader() { Lucene.Net.Index.IndexWriter wr = new Lucene.Net.Index.IndexWriter(new Lucene.Net.Store.RAMDirectory(), new TestAnalyzer(), true); Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); Lucene.Net.Documents.Field f1 = new Lucene.Net.Documents.Field("f1", TEST_STRING, Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.TOKENIZED); doc.Add(f1); wr.AddDocument(doc); wr.Close(); }
public void Test_Index_ReusableStringReader() { Lucene.Net.Index.IndexWriter wr = new Lucene.Net.Index.IndexWriter(new Lucene.Net.Store.RAMDirectory(), new TestAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED); Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); Lucene.Net.Documents.Field f1 = new Lucene.Net.Documents.Field("f1", TEST_STRING, Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.ANALYZED); doc.Add(f1); wr.AddDocument(doc); wr.Dispose(); }
private void AddNoProxDoc(IndexWriter writer) { Document doc = new Document(); Field f = new Field("content3", "aaa", Field.Store.YES, Field.Index.ANALYZED); f.OmitTermFreqAndPositions = true; doc.Add(f); f = new Field("content4", "aaa", Field.Store.YES, Field.Index.NO); f.OmitTermFreqAndPositions = true; doc.Add(f); writer.AddDocument(doc); }
// Indexing... public void IndexText(List <Collection> collections) { foreach (Collection c in collections) { Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); // TODO: Enter code to index text Lucene.Net.Documents.Field field_DocID = new Lucene.Net.Documents.Field("DocID", c.DocID, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); doc.Add(field_DocID); Lucene.Net.Documents.Field field_Title = new Lucene.Net.Documents.Field("Title", c.Title, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); doc.Add(field_Title); doc.Add(new Lucene.Net.Documents.Field("Author", c.Author, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); doc.Add(new Lucene.Net.Documents.Field("Bibliographic", c.Bibliographic, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); doc.Add(new Lucene.Net.Documents.Field("Words", c.Words, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); writer.AddDocument(doc); } }
void LUCENENET_100_CreateIndex() { Lucene.Net.Index.IndexWriter w = new Lucene.Net.Index.IndexWriter(LUCENENET_100_Dir, new Lucene.Net.Analysis.Standard.StandardAnalyzer(Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.UNLIMITED); Lucene.Net.Documents.Field f1 = new Lucene.Net.Documents.Field("field1", "dark side of the moon", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.ANALYZED); Lucene.Net.Documents.Field f2 = new Lucene.Net.Documents.Field("field2", "123", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.NOT_ANALYZED); Lucene.Net.Documents.Document d = new Lucene.Net.Documents.Document(); d.Add(f1); d.Add(f2); w.AddDocument(d); f1 = new Lucene.Net.Documents.Field("field1", "Fly me to the moon", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.ANALYZED); f2 = new Lucene.Net.Documents.Field("field2", "456", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.NOT_ANALYZED); d = new Lucene.Net.Documents.Document(); d.Add(f1); d.Add(f2); w.AddDocument(d); w.Dispose(); }
void LUCENENET_100_CreateIndex() { Lucene.Net.Index.IndexWriter w = new Lucene.Net.Index.IndexWriter(LUCENENET_100_Dir, new Lucene.Net.Analysis.Standard.StandardAnalyzer(), true); Lucene.Net.Documents.Field f1 = new Lucene.Net.Documents.Field("field1", "dark side of the moon", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.TOKENIZED); Lucene.Net.Documents.Field f2 = new Lucene.Net.Documents.Field("field2", "123", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.UN_TOKENIZED); Lucene.Net.Documents.Document d = new Lucene.Net.Documents.Document(); d.Add(f1); d.Add(f2); w.AddDocument(d); f1 = new Lucene.Net.Documents.Field("field1", "Fly me to the moon", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.TOKENIZED); f2 = new Lucene.Net.Documents.Field("field2", "456", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.UN_TOKENIZED); d = new Lucene.Net.Documents.Document(); d.Add(f1); d.Add(f2); w.AddDocument(d); w.Close(); }
public static AbstractField CreateField(FieldSearchType field, object value) { var s = field.SearchType.Store ? LuceneField.Store.YES : LuceneField.Store.NO; AbstractField abstractField; switch (field.Field.Type) { case "byte": abstractField = new NumericField(field.Alias, s, field.SearchType.Index).SetIntValue(Convert.ToInt32(value)); break; case "short": case "int16": abstractField = new NumericField(field.Alias, s, field.SearchType.Index).SetIntValue(Convert.ToInt32(value)); break; case "int": case "int32": abstractField = new NumericField(field.Alias, s, field.SearchType.Index).SetIntValue((int)value); break; case "int64": case "long": abstractField = new NumericField(field.Alias, s, field.SearchType.Index).SetLongValue((long)value); break; case "double": abstractField = new NumericField(field.Alias, s, field.SearchType.Index).SetDoubleValue((double)value); break; case "decimal": abstractField = new LuceneField(field.Alias, ((decimal)value).ToString(field.Field.DecimalFormat), s, field.SearchType.Index ? LuceneField.Index.NOT_ANALYZED_NO_NORMS : LuceneField.Index.NO, LuceneField.TermVector.NO); break; case "float": case "single": abstractField = new NumericField(field.Alias, s, field.SearchType.Index).SetFloatValue((float)value); break; case "bool": case "boolean": abstractField = new LuceneField(field.Alias, (bool)value ? "1" : "0", s, LuceneField.Index.NOT_ANALYZED_NO_NORMS); break; case "datetime": abstractField = new LuceneField(field.Alias, DateTools.DateToString((DateTime)value, DateTools.Resolution.MILLISECOND), s, field.SearchType.Index ? LuceneField.Index.NOT_ANALYZED_NO_NORMS : LuceneField.Index.NO, LuceneField.TermVector.NO); break; case "rowversion": case "byte[]": abstractField = field.SearchType.Index ? new LuceneField(field.Alias, Utility.BytesToHexString((byte[])value), s, LuceneField.Index.NOT_ANALYZED_NO_NORMS) : new LuceneField(field.Alias, (byte[])value, s); break; case "string": var iString = field.SearchType.Index ? ( field.SearchType.Analyzer.Equals("keyword") ? (field.SearchType.Norms ? LuceneField.Index.NOT_ANALYZED : LuceneField.Index.NOT_ANALYZED_NO_NORMS) : (field.SearchType.Norms ? LuceneField.Index.ANALYZED : LuceneField.Index.ANALYZED_NO_NORMS) ) : LuceneField.Index.NO; abstractField = new LuceneField(field.Alias, value.ToString(), s, iString); break; default: var i = field.SearchType.Index ? (field.SearchType.Norms ? LuceneField.Index.NOT_ANALYZED : LuceneField.Index.NOT_ANALYZED_NO_NORMS) : LuceneField.Index.NO; abstractField = new LuceneField(field.Alias, value.ToString(), s, i); break; } return(abstractField); }
static DocHelper() { textField1 = new Field(TEXT_FIELD_1_KEY, FIELD_1_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO); textField2 = new Field(TEXT_FIELD_2_KEY, FIELD_2_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); textField3 = new Field(TEXT_FIELD_3_KEY, FIELD_3_TEXT, Field.Store.YES, Field.Index.ANALYZED); { textField3.OmitNorms = true; } keyField = new Field(KEYWORD_FIELD_KEY, KEYWORD_TEXT, Field.Store.YES, Field.Index.NOT_ANALYZED); noNormsField = new Field(NO_NORMS_KEY, NO_NORMS_TEXT, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); noTFField = new Field(NO_TF_KEY, NO_TF_TEXT, Field.Store.YES, Field.Index.ANALYZED); { noTFField.OmitTermFreqAndPositions = true; } unIndField = new Field(UNINDEXED_FIELD_KEY, UNINDEXED_FIELD_TEXT, Field.Store.YES, Field.Index.NO); unStoredField1 = new Field(UNSTORED_FIELD_1_KEY, UNSTORED_1_FIELD_TEXT, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO); unStoredField2 = new Field(UNSTORED_FIELD_2_KEY, UNSTORED_2_FIELD_TEXT, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES); lazyField = new Field(LAZY_FIELD_KEY, LAZY_FIELD_TEXT, Field.Store.YES, Field.Index.ANALYZED); textUtfField1 = new Field(TEXT_FIELD_UTF1_KEY, FIELD_UTF1_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO); textUtfField2 = new Field(TEXT_FIELD_UTF2_KEY, FIELD_UTF2_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); fields = new Field[] { textField1, textField2, textField3, keyField, noNormsField, noTFField, unIndField, unStoredField1, unStoredField2, textUtfField1, textUtfField2, lazyField, lazyFieldBinary, largeLazyField }; { //Initialize the large Lazy Field System.Text.StringBuilder buffer = new System.Text.StringBuilder(); for (int i = 0; i < 10000; i++) { buffer.Append("Lazily loading lengths of language in lieu of laughing "); } try { LAZY_FIELD_BINARY_BYTES = System.Text.Encoding.UTF8.GetBytes("These are some binary field bytes"); } catch (System.IO.IOException) { } lazyFieldBinary = new Field(LAZY_FIELD_BINARY_KEY, LAZY_FIELD_BINARY_BYTES, Field.Store.YES); fields[fields.Length - 2] = lazyFieldBinary; LARGE_LAZY_FIELD_TEXT = buffer.ToString(); largeLazyField = new Field(LARGE_LAZY_FIELD_KEY, LARGE_LAZY_FIELD_TEXT, Field.Store.YES, Field.Index.ANALYZED); fields[fields.Length - 1] = largeLazyField; for (int i = 0; i < fields.Length; i++) { IFieldable f = fields[i]; Add(all, f); if (f.IsIndexed) { Add(indexed, f); } else { Add(unindexed, f); } if (f.IsTermVectorStored) { Add(termvector, f); } if (f.IsIndexed && !f.IsTermVectorStored) { Add(notermvector, f); } if (f.IsStored) { Add(stored, f); } else { Add(unstored, f); } if (f.OmitNorms) { Add(noNorms, f); } if (f.OmitTermFreqAndPositions) { Add(noTf, f); } if (f.IsLazy) { Add(lazy, f); } } } { nameValues = new System.Collections.Hashtable(); nameValues[TEXT_FIELD_1_KEY] = FIELD_1_TEXT; nameValues[TEXT_FIELD_2_KEY] = FIELD_2_TEXT; nameValues[TEXT_FIELD_3_KEY] = FIELD_3_TEXT; nameValues[KEYWORD_FIELD_KEY] = KEYWORD_TEXT; nameValues[NO_NORMS_KEY] = NO_NORMS_TEXT; nameValues[NO_TF_KEY] = NO_TF_TEXT; nameValues[UNINDEXED_FIELD_KEY] = UNINDEXED_FIELD_TEXT; nameValues[UNSTORED_FIELD_1_KEY] = UNSTORED_1_FIELD_TEXT; nameValues[UNSTORED_FIELD_2_KEY] = UNSTORED_2_FIELD_TEXT; nameValues[LAZY_FIELD_KEY] = LAZY_FIELD_TEXT; nameValues[LAZY_FIELD_BINARY_KEY] = LAZY_FIELD_BINARY_BYTES; nameValues[LARGE_LAZY_FIELD_KEY] = LARGE_LAZY_FIELD_TEXT; nameValues[TEXT_FIELD_UTF1_KEY] = FIELD_UTF1_TEXT; nameValues[TEXT_FIELD_UTF2_KEY] = FIELD_UTF2_TEXT; } }
private Document CreateIndexDocuementForTicket(Ticket ticket) { var doc = new Document(); var commentTexts = (from c in ticket.TicketComments select c.Comment); StringBuilder sb = new StringBuilder(); foreach (var c in commentTexts) { sb.AppendLine(c); } var commentText = sb.ToString(); Lucene.Net.Documents.Field idField = new Lucene.Net.Documents.Field ( "ticketid", ticket.TicketId.ToString(), Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.NO, Lucene.Net.Documents.Field.TermVector.NO ); Lucene.Net.Documents.Field titleField = new Lucene.Net.Documents.Field ( "title", ticket.Title ?? string.Empty, Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.YES ); titleField.SetBoost(1.5F); Lucene.Net.Documents.Field detailsField = new Lucene.Net.Documents.Field ( "details", ticket.Details ?? string.Empty, Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.YES ); detailsField.SetBoost(1F); Lucene.Net.Documents.Field tagsField = new Lucene.Net.Documents.Field ( "tags", ticket.TagList ?? string.Empty, Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.NO ); tagsField.SetBoost(2F); Lucene.Net.Documents.Field commentsField = new Lucene.Net.Documents.Field ( "comments", commentText ?? string.Empty, Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.YES ); commentsField.SetBoost(.8F); doc.Add(idField); doc.Add(titleField); doc.Add(detailsField); doc.Add(tagsField); doc.Add(commentsField); if (ticket.CurrentStatus != "Closed") { doc.SetBoost(10F); } return(doc); }
static DocHelper() { TextField1 = new Field(TextField1Key, Field1Text, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO); TextField2 = new Field(TextField2Key, Field2Text, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); TextField3 = new Field(TextField3Key, Field3Text, Field.Store.YES, Field.Index.ANALYZED); { TextField3.OmitNorms = true; } KeyField = new Field(KeywordFieldKey, KeywordText, Field.Store.YES, Field.Index.NOT_ANALYZED); NoNormsField = new Field(NoNormsKey, NoNormsText, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); NoTfField = new Field(NoTfKey, NoTfText, Field.Store.YES, Field.Index.ANALYZED); { NoTfField.OmitTermFreqAndPositions = true; } UnIndField = new Field(UnindexedFieldKey, UnindexedFieldText, Field.Store.YES, Field.Index.NO); UnStoredField1 = new Field(UnstoredField1Key, Unstored1FieldText, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO); UnStoredField2 = new Field(UnstoredField2Key, Unstored2FieldText, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES); LazyField = new Field(LazyFieldKey, LazyFieldText, Field.Store.YES, Field.Index.ANALYZED); TextUtfField1 = new Field(TextFieldUtf1Key, FieldUtf1Text, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO); TextUtfField2 = new Field(TextFieldUtf2Key, FieldUtf2Text, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); Fields = new Field[] { TextField1, TextField2, TextField3, KeyField, NoNormsField, NoTfField, UnIndField, UnStoredField1, UnStoredField2, TextUtfField1, TextUtfField2, LazyField, LazyFieldBinary, LargeLazyField }; { //Initialize the large Lazy Field System.Text.StringBuilder buffer = new System.Text.StringBuilder(); for (int i = 0; i < 10000; i++) { buffer.Append("Lazily loading lengths of language in lieu of laughing "); } try { LazyFieldBinaryBytes = System.Text.Encoding.UTF8.GetBytes("These are some binary field bytes"); } catch (System.IO.IOException) { } LazyFieldBinary = new Field(LazyFieldBinaryKey, LazyFieldBinaryBytes, Field.Store.YES); Fields[Fields.Length - 2] = LazyFieldBinary; LargeLazyFieldText = buffer.ToString(); LargeLazyField = new Field(LargeLazyFieldKey, LargeLazyFieldText, Field.Store.YES, Field.Index.ANALYZED); Fields[Fields.Length - 1] = LargeLazyField; for (int i = 0; i < Fields.Length; i++) { IFieldable f = Fields[i]; Add(All, f); if (f.IsIndexed) { Add(Indexed, f); } else { Add(Unindexed, f); } if (f.IsTermVectorStored) { Add(Termvector, f); } if (f.IsIndexed && !f.IsTermVectorStored) { Add(Notermvector, f); } if (f.IsStored) { Add(Stored, f); } else { Add(Unstored, f); } if (f.OmitNorms) { Add(NoNorms, f); } if (f.OmitTermFreqAndPositions) { Add(NoTf, f); } if (f.IsLazy) { Add(Lazy, f); } } } { NameValues = new System.Collections.Hashtable(); NameValues[TextField1Key] = Field1Text; NameValues[TextField2Key] = Field2Text; NameValues[TextField3Key] = Field3Text; NameValues[KeywordFieldKey] = KeywordText; NameValues[NoNormsKey] = NoNormsText; NameValues[NoTfKey] = NoTfText; NameValues[UnindexedFieldKey] = UnindexedFieldText; NameValues[UnstoredField1Key] = Unstored1FieldText; NameValues[UnstoredField2Key] = Unstored2FieldText; NameValues[LazyFieldKey] = LazyFieldText; NameValues[LazyFieldBinaryKey] = LazyFieldBinaryBytes; NameValues[LargeLazyFieldKey] = LargeLazyFieldText; NameValues[TextFieldUtf1Key] = FieldUtf1Text; NameValues[TextFieldUtf2Key] = FieldUtf2Text; } }
public static Lucene.Net.Documents.Document ConvertToLuceneDocument(DocumentDto document) { //Convert WCF document to Lucene document var luceneDocument = new Lucene.Net.Documents.Document(); foreach (var field in document.Fields) { Lucene.Net.Documents.Field.Index indexType; switch (field.Index) { case FieldIndexType.NotIndexed: indexType = Field.Index.NO; break; case FieldIndexType.Analyzed: indexType = Field.Index.ANALYZED; break; case FieldIndexType.AnalyzedNoNorms: indexType = Field.Index.ANALYZED_NO_NORMS; break; case FieldIndexType.NotAnalyzed: indexType = Field.Index.NOT_ANALYZED; break; case FieldIndexType.NotAnalyzedNoNorms: indexType = Field.Index.NOT_ANALYZED_NO_NORMS; break; default: throw new ArgumentOutOfRangeException("Unknown or invalid field index type: " + field.Index); } Lucene.Net.Documents.Field.Store storeType; switch (field.Store) { case FieldStorageType.Stored: storeType = Field.Store.YES; break; case FieldStorageType.NotStored: storeType = Field.Store.NO; break; default: throw new ArgumentOutOfRangeException("Unknown or invalid field store type: " + field.Store); } Lucene.Net.Documents.Field.TermVector termVectorType; switch (field.TermVector) { case FieldTermVectorType.Yes: termVectorType = Field.TermVector.YES; break; case FieldTermVectorType.WithOffsets: termVectorType = Field.TermVector.WITH_OFFSETS; break; case FieldTermVectorType.WithPositions: termVectorType = Field.TermVector.WITH_POSITIONS; break; case FieldTermVectorType.WithPositionsOffsets: termVectorType = Field.TermVector.WITH_POSITIONS_OFFSETS; break; case FieldTermVectorType.No: termVectorType = Field.TermVector.NO; break; default: throw new ArgumentOutOfRangeException("Unknown or invalid field term vector type: " + field.TermVector); } IFieldable luceneField; if (field is StringFieldDto) { var stringField = field as StringFieldDto; luceneField = new Lucene.Net.Documents.Field(stringField.Name, true, stringField.Value, storeType, indexType, termVectorType); } else if (field is DateFieldDto) { var dateField = field as DateFieldDto; var dateString = DateTools.DateToString(dateField.Value, DateTools.Resolution.MILLISECOND); luceneField = new Field(dateField.Name, dateString, storeType, Field.Index.NOT_ANALYZED, termVectorType); } else if (field is NumericFieldDto) { var numericField = field as NumericFieldDto; luceneField = new Lucene.Net.Documents.NumericField(numericField.Name, numericField.PrecisionStep, storeType, field.Index != FieldIndexType.NotIndexed); } else { throw new NotImplementedException(); } if (field.Boost.HasValue) { luceneField.Boost = field.Boost.Value; } luceneDocument.Add(luceneField); } return(luceneDocument); }
public virtual void searchIndex(System.String dirName, System.String oldName) { //QueryParser parser = new QueryParser("contents", new WhitespaceAnalyzer()); //Query query = parser.parse("handle:1"); dirName = FullDir(dirName); Directory dir = FSDirectory.Open(new System.IO.DirectoryInfo(dirName)); IndexSearcher searcher = new IndexSearcher(dir, true); IndexReader reader = searcher.IndexReader; _TestUtil.CheckIndex(dir); for (int i = 0; i < 35; i++) { if (!reader.IsDeleted(i)) { Document d = reader.Document(i); var fields = d.GetFields(); if (!oldName.StartsWith("19.") && !oldName.StartsWith("20.") && !oldName.StartsWith("21.") && !oldName.StartsWith("22.")) { if (d.GetField("content3") == null) { int numFields = oldName.StartsWith("29.") ? 7 : 5; Assert.AreEqual(numFields, fields.Count); Field f = d.GetField("id"); Assert.AreEqual("" + i, f.StringValue); f = (Field)d.GetField("utf8"); Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.StringValue); f = (Field)d.GetField("autf8"); Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.StringValue); f = (Field)d.GetField("content2"); Assert.AreEqual("here is more content with aaa aaa aaa", f.StringValue); f = (Field)d.GetField("fie\u2C77ld"); Assert.AreEqual("field with non-ascii name", f.StringValue); } } } // Only ID 7 is deleted else { Assert.AreEqual(7, i); } } ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs; // First document should be #21 since it's norm was // increased: Document d2 = searcher.Doc(hits[0].Doc); Assert.AreEqual("21", d2.Get("id"), "didn't get the right document first"); TestHits(hits, 34, searcher.IndexReader); if (!oldName.StartsWith("19.") && !oldName.StartsWith("20.") && !oldName.StartsWith("21.") && !oldName.StartsWith("22.")) { // Test on indices >= 2.3 hits = searcher.Search(new TermQuery(new Term("utf8", "\u0000")), null, 1000).ScoreDocs; Assert.AreEqual(34, hits.Length); hits = searcher.Search(new TermQuery(new Term("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne")), null, 1000).ScoreDocs; Assert.AreEqual(34, hits.Length); hits = searcher.Search(new TermQuery(new Term("utf8", "ab\ud917\udc17cd")), null, 1000).ScoreDocs; Assert.AreEqual(34, hits.Length); } searcher.Close(); dir.Close(); }
/// <summary> /// Indexes a given string into the index /// </summary> /// </summary> /// <param name="text">The text to index</param> public void IndexText(string path) { System.IO.DirectoryInfo root = new System.IO.DirectoryInfo(path); // Create DirectoryInfo object System.IO.FileInfo[] files = null; // Create FileInfo array // Get all files in the directory try { files = root.GetFiles("*.*"); //Access Each files in the Directory } catch (UnauthorizedAccessException e) { System.Console.WriteLine(e.Message); } catch (System.IO.DirectoryNotFoundException e) { Console.WriteLine(e.Message); } if (files != null) { foreach (System.IO.FileInfo fil in files) { string name = fil.FullName; // Get file name StreamReader reader = new StreamReader(name); // Create a reader string text = reader.ReadToEnd(); // Read the whole text int indexT = text.IndexOf(".T"); // Get title starting index int indexA = text.IndexOf(".A"); // Get author starting index int indexB = text.IndexOf(".B"); // Get bibliography starting index string title = text.Substring(indexT + 3, ((indexA - 1 - (indexT + 3)) > 0) ? (indexA - 1 - (indexT + 3)) : 0); // Get title string string author = text.Substring(indexA + 3, ((indexB - 1 - (indexA + 3)) > 0) ? (indexB - 1 - (indexA + 3)) : 0); // Get author string //This section is focused on removing the title from the abstract int startTitle = text.IndexOf(".T\n") + 2; // Get title starting index int startAbstract = text.IndexOf(".A\n") - 1; // Get index before author starting int startWords = text.IndexOf(".W\n"); // Get Words Starting index int lengthOfTitle = startAbstract - startTitle; //Calculate length of title text = text.Remove(startWords + 2, lengthOfTitle); //Remove title from Words section. // Indexing with the fields Lucene.Net.Documents.Document doc = new Document(); // Create document doc.Add(new Lucene.Net.Documents.Field(TEXT_FN, text, Field.Store.YES, Field.Index.ANALYZED_NO_NORMS, Field.TermVector.WITH_POSITIONS_OFFSETS)); //For Field Names Lucene.Net.Documents.Field titleFieldWithBoost = new Lucene.Net.Documents.Field(TEXT_FN_TITLE, title, Field.Store.YES, Field.Index.ANALYZED_NO_NORMS, Field.TermVector.WITH_POSITIONS_OFFSETS); //For Field Title Lucene.Net.Documents.Field authorFieldWithBoost = new Lucene.Net.Documents.Field(TEXT_FN_AUTHOR, author, Field.Store.YES, Field.Index.ANALYZED_NO_NORMS, Field.TermVector.WITH_POSITIONS_OFFSETS); //For Field Author /// Field level boosting of the Search. /// authorFieldWithBoost.Boost = 3; // default is 1. So we set to 3 -- the default value has the effect of ignoring the boost number titleFieldWithBoost.Boost = 4; // default is 1, we set it to 4 doc.Add(titleFieldWithBoost); doc.Add(authorFieldWithBoost); //doc.Add(new Lucene.Net.Documents.Field(TEXT_FN_AUTHOR, author, Field.Store.YES, Field.Index.ANALYZED_NO_NORMS, Field.TermVector.NO)); //For Field Author writer.AddDocument(doc); // Add document reader.Close(); } } }
/// <summary> /// Indexes a given string into the index /// </summary> /// <param name="text">The text to index</param> public void IndexText(string path) { System.IO.DirectoryInfo root = new System.IO.DirectoryInfo(path); // Create DirectoryInfo object System.IO.FileInfo[] files = null; // Create FileInfo array // Get all files in the directory try { files = root.GetFiles("*.txt"); } catch (UnauthorizedAccessException e) { System.Console.WriteLine(e.Message); } catch (System.IO.DirectoryNotFoundException e) { Console.WriteLine(e.Message); } if (files != null) { foreach (System.IO.FileInfo fi in files) { string name = fi.Name; // Get file name Console.WriteLine("Adding doc " + name + " to Index"); StreamReader reader = new StreamReader(fi.FullName); // Create a reader string text = reader.ReadToEnd(); // Read the whole text Regex rxi = new Regex(".I ", RegexOptions.Compiled); // Set the RE to match first sentence of abstract Regex rxa = new Regex(".A\r\n", RegexOptions.Compiled); Regex rxb = new Regex(".B\r\n", RegexOptions.Compiled); Regex rxt = new Regex(".T\r\n", RegexOptions.Compiled); Regex rxw = new Regex(".W\r\n", RegexOptions.Compiled); MatchCollection abst_i = rxi.Matches(text); MatchCollection abst_a = rxa.Matches(text); MatchCollection abst_b = rxb.Matches(text); MatchCollection abst_t = rxt.Matches(text); MatchCollection abst_w = rxw.Matches(text); Console.WriteLine("This Length is: " + abst_i.Count); Console.WriteLine("This Length is: " + abst_a.Count); Console.WriteLine("This Length is: " + abst_b.Count); Console.WriteLine("This Length is: " + abst_t.Count); Console.WriteLine("This Length is: " + abst_w.Count); if (abst_i.Count > 0 && abst_a.Count > 0 && abst_b.Count > 0 && abst_t.Count > 0 && abst_w.Count > 0) { int indexI = text.IndexOf(".I "); // Get ID starting index if (abst_i.Count > 1) // When having more than 1 .I { text = text.Substring(0, indexI + 3) + text.Substring(indexI + 3).Replace(".I ", " "); // Remove the others except the first one } int indexT = text.IndexOf(".T\r\n"); // Get title starting index if (abst_t.Count > 1) // When having more than 1 .T { text = text.Substring(0, indexT + 3) + text.Substring(indexT + 3).Replace(".T\r\n", ""); // Remove the others except the first one } int indexA = text.IndexOf(".A\r\n"); // Get author starting index if (abst_a.Count > 1) // When having more than 1 .A { text = text.Substring(0, indexA + 3) + text.Substring(indexA + 3).Replace(".A\r\n", ""); // Remove the others except the first one } int indexB = text.IndexOf(".B\r\n"); // Get bibliography starting index if (abst_b.Count > 1) // When having more than 1 .B { text = text.Substring(0, indexB + 3) + text.Substring(indexB + 3).Replace(".B\r\n", ""); // Remove the others except the first one } int indexW = text.IndexOf(".W\r\n"); // Get abstract starting index if (abst_w.Count > 1) // When having more than 1 .W { text = text.Substring(0, indexW + 3) + text.Substring(indexW + 3).Replace(".W\n", ""); // Remove the others except the first one } indexA = text.IndexOf(".A\r\n"); // Get again the index just in case it has been changed indexB = text.IndexOf(".B\r\n"); // Get again the index just in case it has been changed string title = text.Substring(indexT + 3, ((indexA - 1 - (indexT + 3)) > 0) ? (indexA - 1 - (indexT + 3)) : 0); // Get title string string author = text.Substring(indexA + 3, ((indexB - 1 - (indexA + 3)) > 0) ? (indexB - 1 - (indexA + 3)) : 0); // Get author string //This section is focused on removing the title from the abstract int startTitle = text.IndexOf(".T\r\n") + 2; // Get title starting index int startAbstract = text.IndexOf(".A\r\n") - 1; // Get index before author starting int startWords = text.IndexOf(".W\r\n"); // Get Words Starting index int lengthOfTitle = startAbstract - startTitle; //Calculate length of title text = text.Remove(startWords + 2, lengthOfTitle); //Remove title from Words section. // Indexing by using the fields Lucene.Net.Documents.Document doc = new Document(); // Create document Lucene.Net.Documents.Field titleField = new Lucene.Net.Documents.Field(TEXT_FN_TITLE, title, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); //Indexing field title Lucene.Net.Documents.Field authorField = new Lucene.Net.Documents.Field(TEXT_FN_AUTHOR, author, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); //Indexing field author doc.Add(new Lucene.Net.Documents.Field(TEXT_FN, text, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); //indexing field text doc.Add(titleField); doc.Add(authorField); writer.AddDocument(doc); // Add document reader.Close(); } else { Console.WriteLine(name); exFile.Add(name); } } var message = string.Join(Environment.NewLine, exFile); MessageBox.Show("The following files are excluded from the index because of the incorrect format:\n" + message); } }