public void Set(string name, object value, Document document, Field.Store store, Field.Index index, float? boost) { DateTime date = (DateTime) value; int year = date.Year; int month = date.Month; int day = date.Day; // set year Field field = new Field(name + ".year", year.ToString(), store, index); if (boost != null) { field.SetBoost(boost.Value); } document.Add(field); // set month and pad it if necessary field = new Field(name + ".month", month.ToString("D2"), store, index); if (boost != null) { field.SetBoost(boost.Value); } document.Add(field); // set day and pad it if necessary field = new Field(name + ".day", day.ToString("D2"), store, index); if (boost != null) { field.SetBoost(boost.Value); } document.Add(field); throw new NotImplementedException(); }
/// <summary> /// Create a Field /// </summary> /// <param name="name"> /// The name. /// </param> /// <param name="value"> /// The value. /// </param> /// <param name="storageType"> /// The storage type. /// </param> /// <param name="indexType"> /// The index type. /// </param> /// <param name="vectorType"> /// The vector type. /// </param> /// <param name="boost"> /// The boost. /// </param> /// <returns> /// Abstract Field /// </returns> protected AbstractField CreateField(string name, string value, LuceneField.Store storageType, LuceneField.Index indexType, LuceneField.TermVector vectorType, float boost) { var field = new LuceneField(name, value, storageType, indexType, vectorType); field.SetBoost(boost); return(field); }
/// <summary> /// create document from SampleData /// </summary> /// <param name="obj"></param> /// <returns></returns> private static Document CreateDocument(SampleData obj) { // add lucene fields mapped to db fields var doc = new Document(); Field f = new Field("ArtNo", obj.ArtNo, Field.Store.NO, Field.Index.ANALYZED); f.SetBoost(2F); doc.Add(f); f = new Field("Id", obj.Id.ToString(CultureInfo.InvariantCulture), Field.Store.YES, Field.Index.NOT_ANALYZED); f.SetBoost(1); doc.Add(f); f = new Field("Name", obj.Name, Field.Store.NO, Field.Index.ANALYZED); f.SetBoost(.1F); doc.Add(f); f = new Field("NameWithWiteSpace", obj.Name.RemoveSymbols(" "), Field.Store.NO, Field.Index.ANALYZED); f.SetBoost(.1F); doc.Add(f); return(doc); }
public void Set(String name, Object value, Document document, Field.Store store, Field.Index index, float? boost) { String indexedString = (String) value; //Do not add fields on empty strings, seems a sensible default in most situations if (StringHelper.IsNotEmpty(indexedString)) { Field field = new Field(name, indexedString.Substring(0, indexedString.Length/2), store, index); if (boost != null) field.SetBoost(boost.Value); document.Add(field); } }
public void Set(String name, Object value, Document document, Field.Store store, Field.Index index, float? boost) { String indexedString = stringBridge.ObjectToString(value); //Do not add fields on empty strings, seems a sensible default in most situations //TODO if Store, probably also save empty ones if (StringHelper.IsNotEmpty(indexedString)) { Field field = new Field(name, indexedString, store, index); if (boost != null) field.SetBoost(boost.Value); document.Add(field); } }
public void Set(string name, object value, Document document, Field.Store store, Field.Index index, float? boost) { // In this particular class the name of the new field was passed // from the name field of the ClassBridge Annotation. This is not // a requirement. It just works that way in this instance. The // actual name could be supplied by hard coding it below. Department dep = (Department) value; String fieldValue1 = dep.Branch ?? string.Empty; String fieldValue2 = dep.Network ?? string.Empty; String fieldValue = fieldValue1 + sepChar + fieldValue2; Field field = new Field(name, fieldValue, store, index); if (boost != null) { field.SetBoost((float) boost); } document.Add(field); }
public virtual void TestDocBoost_Renamed_Method() { RAMDirectory store = new RAMDirectory(); IndexWriter writer = new IndexWriter(store, new SimpleAnalyzer(), true); Fieldable f1 = new Field("field", "word", Field.Store.YES, Field.Index.TOKENIZED); Fieldable f2 = new Field("field", "word", Field.Store.YES, Field.Index.TOKENIZED); f2.SetBoost(2.0f); Lucene.Net.Documents.Document d1 = new Lucene.Net.Documents.Document(); Lucene.Net.Documents.Document d2 = new Lucene.Net.Documents.Document(); Lucene.Net.Documents.Document d3 = new Lucene.Net.Documents.Document(); Lucene.Net.Documents.Document d4 = new Lucene.Net.Documents.Document(); d3.SetBoost(3.0f); d4.SetBoost(2.0f); d1.Add(f1); // boost = 1 d2.Add(f2); // boost = 2 d3.Add(f1); // boost = 3 d4.Add(f2); // boost = 4 writer.AddDocument(d1); writer.AddDocument(d2); writer.AddDocument(d3); writer.AddDocument(d4); writer.Optimize(); writer.Close(); float[] scores = new float[4]; new IndexSearcher(store).Search(new TermQuery(new Term("field", "word")), new AnonymousClassHitCollector(scores, this)); float lastScore = 0.0f; for (int i = 0; i < 4; i++) { Assert.IsTrue(scores[i] > lastScore); lastScore = scores[i]; } }
public void Set(string name, object value, Document document, Field.Store store, Field.Index index, float? boost) { // In this particular class the name of the new field was passed // from the name field of the ClassBridge Annotation. This is not // a requirement. It just works that way in this instance. The // actual name could be supplied by hard coding it below. Departments deps = (Departments)value; Field field = null; string fieldValue1 = deps.Manufacturer; if (fieldValue1 == null) fieldValue1 = string.Empty; else { string fieldValue = (string) equips[fieldValue1]; field = new Field(name, fieldValue, store, index); if (boost != null) field.SetBoost((float) boost); } document.Add(field); }
/// <summary> /// 创建商品索引 /// </summary> /// <param name="analyzer"></param> /// <param name="title"></param> /// <param name="content"></param> public static void AddIndex(IndexWriter writer, 商品 model) { try { Document doc = new Document(); doc.Add(new Field("NumId", model.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));//存储且索引 Field f = new Field("Name", model.商品信息.商品名, Field.Store.YES, Field.Index.ANALYZED); f.SetBoost(5F);//为标题增加权重 doc.Add(f); f = new Field("ExactModel", model.商品信息.精确型号, Field.Store.YES, Field.Index.ANALYZED); f.SetBoost(4F);//为精确型号增加权重 doc.Add(f); doc.Add(new Field("Description", model.商品数据.商品简介, Field.Store.YES, Field.Index.NOT_ANALYZED));//存储且索引 if (model.商品信息.商品图片.Count > 0) { doc.Add(new Field("Pic", model.商品信息.商品图片[0], Field.Store.YES, Field.Index.NOT_ANALYZED));//存储且索引 } else { doc.Add(new Field("Pic", "/images/noimage.jpg", Field.Store.YES, Field.Index.NOT_ANALYZED));//存储且索引 } doc.Add(new Field("Price", model.销售信息.价格.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));//存储且索引 doc.Add(new Field("Company", model.商品信息.所属供应商.用户ID.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));//存储且索引 doc.Add(new Field("Attribute", JsonConvert.SerializeObject(model.商品数据.商品属性), Field.Store.YES, Field.Index.NOT_ANALYZED));//存储且索引 doc.Add(new Field("AddTime", model.基本数据.修改时间.ToString("yyyy-MM-dd"), Field.Store.YES, Field.Index.NOT_ANALYZED));//存储且索引 writer.AddDocument(doc); } catch { return; } }
private static void UpdatePackage(PackageIndexEntity package) { string key = package.Key.ToString(CultureInfo.InvariantCulture); var document = new Document(); var field = new Field("Id-Exact", package.Id.ToLowerInvariant(), Field.Store.NO, Field.Index.NOT_ANALYZED); field.SetBoost(2.5f); document.Add(field); field = new Field("Description", package.Description, Field.Store.NO, Field.Index.ANALYZED); field.SetBoost(0.1f); document.Add(field); var tokenizedId = TokenizeId(package.Id); foreach (var idToken in tokenizedId) { field = new Field("Id", idToken, Field.Store.NO, Field.Index.ANALYZED); field.SetBoost(1.2f); document.Add(field); } // If an element does not have a Title, then add all the tokenized Id components as Title. // Lucene's StandardTokenizer does not tokenize items of the format a.b.c which does not play well with things like "xunit.net". // We will feed it values that are already tokenized. var titleTokens = String.IsNullOrEmpty(package.Title) ? tokenizedId : package.Title.Split(idSeparators, StringSplitOptions.RemoveEmptyEntries); foreach (var idToken in titleTokens) { document.Add(new Field("Title", idToken, Field.Store.NO, Field.Index.ANALYZED)); } if (!String.IsNullOrEmpty(package.Tags)) { field = new Field("Tags", package.Tags, Field.Store.NO, Field.Index.ANALYZED); field.SetBoost(0.8f); document.Add(field); } document.Add(new Field("Author", package.Authors, Field.Store.NO, Field.Index.ANALYZED)); // Fields meant for filtering and sorting document.Add(new Field("Key", key, Field.Store.YES, Field.Index.NO)); document.Add(new Field("IsLatestStable", package.IsLatestStable.ToString(), Field.Store.NO, Field.Index.NOT_ANALYZED)); document.Add(new Field("PublishedDate", package.Published.Ticks.ToString(), Field.Store.NO, Field.Index.NOT_ANALYZED)); document.Add(new Field("DownloadCount", package.DownloadCount.ToString(CultureInfo.InvariantCulture), Field.Store.NO, Field.Index.NOT_ANALYZED)); string displayName = String.IsNullOrEmpty(package.Title) ? package.Id : package.Title; document.Add(new Field("DisplayName", displayName.ToLower(CultureInfo.CurrentCulture), Field.Store.NO, Field.Index.NOT_ANALYZED)); indexWriter.UpdateDocument(new Term("Key", key), document); }
private void ReconstructFields(Hashtable doc) { fields = new ArrayList(doc.Count + 5); fieldsReconstructed = new ArrayList(doc.Count + 5); int i = 0; foreach (string key in doc.Keys) { Object t = doc[key]; // create/reconstruct fields if (typeof(Field).IsInstanceOfType(t)) { fields.Add(t); fieldsReconstructed.Add(false); } else { fieldsReconstructed.Add(true); GrowableStringArray terms = (GrowableStringArray)doc[key]; StringBuilder sb = new StringBuilder(); if (terms != null) { String sNull = "null"; int k = 0, m = 0; for (int j = 0; j < terms.Size(); j++) { if (terms.Get(j) == null) k++; else { if (sb.Length > 0) sb.Append(' '); if (k > 0) { sb.Append(sNull + "_" + k + " "); k = 0; m++; } sb.Append(terms.Get(j)); m++; if (m % 10 == 0) sb.Append('\n'); } } } Field newField = new Field(key, sb.ToString(), Field.Store.NO, Field.Index.NOT_ANALYZED, Field.TermVector.NO); newField.SetBoost(document.GetBoost()); fields.Add(newField); } i++; } }
protected AbstractField CreateField(string name, string value, LuceneField.Store storageType, LuceneField.Index indexType, LuceneField.TermVector vectorType, float boost) { var field = new LuceneField(name, value, storageType, indexType, vectorType); field.SetBoost(boost); return field; }
/// <summary> /// 创建索引 /// </summary> /// <param name="analyzer"></param> /// <param name="title"></param> /// <param name="content"></param> private void AddIndex(IndexWriter writer, string id, string title, string content, string date) { try { Document doc = new Document(); doc.Add(new Field("NumId", id, Field.Store.YES, Field.Index.NOT_ANALYZED));//存储且索引 //doc.Add(new Field("Title", title, Field.Store.YES, Field.Index.ANALYZED));//存储且索引 Field f = new Field("Title", title, Field.Store.YES, Field.Index.ANALYZED); f.SetBoost(5F);//为标题增加权重 doc.Add(f); doc.Add(new Field("Content", content, Field.Store.YES, Field.Index.ANALYZED));//存储且索引 doc.Add(new Field("AddTime", date, Field.Store.YES, Field.Index.NOT_ANALYZED));//存储且索引 writer.AddDocument(doc); } catch { return; } }
/// <summary> /// Creates the index in the specified path, using the corpusReader object /// as the documents feed /// </summary> /// <param name="corpusReader"></param> /// <param name="indexPath"></param> public void CreateIndex(WikiDumpReader corpusReader, string indexPath) { cr = corpusReader; var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29); writer = new IndexWriter(FSDirectory.Open(new DirectoryInfo(indexPath)), analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); writer.SetUseCompoundFile(false); // This will be called whenever a document is read by the provided ICorpusReader corpusReader.OnDocument += corpusDoc => { if (corpusReader.AbortReading) return; // Blaaaah that's ugly. Make sure parsing doesn't stick us in an infinite loop var t = Task.Factory.StartNew(() => corpusDoc.AsHtml()); var timeout = t.Wait(TimeSpan.FromMinutes(2)); var content = timeout ? t.Result : string.Empty; // skip blank documents, they are worthless to us (even though they have a title we could index) if (string.IsNullOrEmpty(content)) return; // Create a new index document var doc = new Document(); doc.Add(new Field("Id", corpusDoc.Id, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); // Add title field var titleField = new Field("Title", corpusDoc.Title, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); titleField.SetBoost(3.0f); doc.Add(titleField); doc.Add(new Field("Content", content, Field.Store.COMPRESS, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); writer.AddDocument(doc); }; // Progress reporting corpusReader.OnProgress += (percentage, status, isRunning) => { var pi = new ProgressInfo { IsStillRunning = true, Status = string.Format("{0} ({1}%)", status, percentage) }; Invoke(new ProgressChangedDelegate(UpdateProgress), null, new ProgressChangedEventArgs(percentage, pi)); }; // Execute corpus reading, which will trigger indexing for each document found corpusReader.Read(); cr = null; // Clean up and close writer.SetUseCompoundFile(true); writer.Optimize(); writer.Close(); writer = null; var pi1 = new ProgressInfo { IsStillRunning = false, Status = "Ready" }; Invoke(new ProgressChangedDelegate(UpdateProgress), null, new ProgressChangedEventArgs(100, pi1)); }
/// <summary> /// Creates a Lucene field. /// </summary> /// <param name="fieldKey">Field name</param> /// <param name="fieldValue">Field value</param> /// <param name="storeType">Storage option</param> /// <param name="indexType">Index type</param> /// <param name="boost">Boosting parameter</param> /// <returns>Fieldable Type</returns> private static Fieldable CreateField(string fieldKey, string fieldValue, Field.Store storeType, Field.Index indexType, float boost) { var field = new Field(fieldKey, fieldValue, storeType, indexType); field.SetBoost(boost); return field; }
/// <summary> /// 创建Document /// </summary> public static Document CreateDoucment(IndexNode node) { Document doc = new Document(); foreach (KeyValuePair<string, string> name in node.IndexNames) { var field = new Field(name.Key, name.Value, Field.Store.YES, Field.Index.ANALYZED); var boost = SettingCache.Boost; switch (name.Key) { case "name": break; case "catalog": boost = boost - 4; break; case "act": boost = boost - 8; break; case "area": boost = boost - 16; break; } field.SetBoost(boost); doc.Add(field); } foreach (KeyValuePair<string, string> value in node.IndexValues) { doc.Add(new Field(value.Key, value.Value, Field.Store.YES, Field.Index.NOT_ANALYZED)); } if (node.IndexNumerices != null) { foreach (KeyValuePair<string, int> numeric in node.IndexNumerices) { doc.Add(new NumericField(numeric.Key, Field.Store.YES, true).SetIntValue(numeric.Value)); } } if (node.IndexLongs != null) { foreach (var numeric in node.IndexLongs) { doc.Add(new NumericField(numeric.Key, Field.Store.YES, true).SetLongValue(numeric.Value)); } } return doc; }
/// <summary> /// 创建索引 /// </summary> public void CreateIndex() { var doc = new Document(); // 构造一个域信息 /* Field.Store.YES:存储字段值(未分词前的字段值) Field.Store.NO:不存储,存储与索引没有关系 Field.Store.COMPRESS:压缩存储,用于长文本或二进制,但性能受损 Field.Index.ANALYZED:分词建索引 Field.Index.ANALYZED_NO_NORMS:分词建索引,但是Field的值不像通常那样被保存,而是只取一个byte,这样节约存储空间 Field.Index.NOT_ANALYZED:不分词且索引 Field.Index.NOT_ANALYZED_NO_NORMS:不分词建索引,Field的值去一个byte保存 TermVector表示文档的条目(由一个Document和Field定位)和它们在当前文档中所出现的次数 Field.TermVector.YES:为每个文档(Document)存储该字段的TermVector Field.TermVector.NO:不存储TermVector Field.TermVector.WITH_POSITIONS:存储位置 Field.TermVector.WITH_OFFSETS:存储偏移量 Field.TermVector.WITH_POSITIONS_OFFSETS:存储位置和偏移量 */ var field1 = new Field("title", "笑傲江湖", Field.Store.YES, Field.Index.ANALYZED); // Field设置权重 field1.SetBoost(1.1f); // 向文档中添加域 doc.Add(field1); // 设置文档的权重(默认权重是1.0) doc.SetBoost(2); this.indexWriter.AddDocument(doc); // 优化索引结构 this.indexWriter.Optimize(); // this.indexWriter.Commit(); // 关闭写入 this.indexWriter.Close(); }
private Document CreateIndexDocuementForTicket(Ticket ticket) { var doc = new Document(); var commentTexts = (from c in ticket.TicketComments select c.Comment); StringBuilder sb = new StringBuilder(); foreach (var c in commentTexts) { sb.AppendLine(c); } var commentText = sb.ToString(); Lucene.Net.Documents.Field idField = new Lucene.Net.Documents.Field ( "ticketid", ticket.TicketId.ToString(), Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.NO, Lucene.Net.Documents.Field.TermVector.NO ); Lucene.Net.Documents.Field titleField = new Lucene.Net.Documents.Field ( "title", ticket.Title ?? string.Empty, Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.YES ); titleField.SetBoost(1.5F); Lucene.Net.Documents.Field detailsField = new Lucene.Net.Documents.Field ( "details", ticket.Details ?? string.Empty, Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.YES ); detailsField.SetBoost(1F); Lucene.Net.Documents.Field tagsField = new Lucene.Net.Documents.Field ( "tags", ticket.TagList ?? string.Empty, Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.NO ); tagsField.SetBoost(2F); Lucene.Net.Documents.Field commentsField = new Lucene.Net.Documents.Field ( "comments", commentText ?? string.Empty, Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.YES ); commentsField.SetBoost(.8F); doc.Add(idField); doc.Add(titleField); doc.Add(detailsField); doc.Add(tagsField); doc.Add(commentsField); if (ticket.CurrentStatus != "Closed") { doc.SetBoost(10F); } return(doc); }
/// <summary> /// create document from SampleData /// </summary> /// <param name="obj"></param> /// <returns></returns> private static Document CreateDocument(SampleData obj) { // add lucene fields mapped to db fields var doc = new Document(); var f = new Field("ArtNo", obj.ArtNo, Field.Store.NO, Field.Index.ANALYZED); f.SetBoost(2F); doc.Add(f); f = new Field("Id", obj.Id.ToString(CultureInfo.InvariantCulture), Field.Store.YES, Field.Index.NOT_ANALYZED); f.SetBoost(1); doc.Add(f); f = new Field("Name", obj.Name, Field.Store.NO, Field.Index.ANALYZED); f.SetBoost(.1F); doc.Add(f); f = new Field("NameWithWiteSpace", obj.Name.RemoveSymbols(" "), Field.Store.NO, Field.Index.ANALYZED); f.SetBoost(.1F); doc.Add(f); f = new Field("NameWithWiteSpaceExt", obj.Name.RemoveSymbols(" ").RemoveSymvolsExt(" "), Field.Store.NO, Field.Index.ANALYZED); f.SetBoost(.1F); doc.Add(f); return doc; }
private void btnIndexAddFolder_Click(object sender, EventArgs e) { using (new BusyObject(this)) { string hspellPath = SelectProjectFolder("Select a path to add to the index", null); if (hspellPath == null) return; string[] files = System.IO.Directory.GetFiles(hspellPath, "*.txt"); if (files != null) { Directory indexDirectory = FSDirectory.Open(new System.IO.DirectoryInfo(tempPath)); IndexWriter writer = new IndexWriter(indexDirectory, analyzer, false, new IndexWriter.MaxFieldLength(int.MaxValue)); foreach (string f in files) { Document doc = new Document(); string text = System.IO.File.ReadAllText(f); string title = f.Substring(f.LastIndexOf(System.IO.Path.DirectorySeparatorChar) + 1).Replace(".txt", ""); Fieldable titleField = new Field("title", title, Field.Store.YES, Field.Index.ANALYZED); titleField.SetBoost(5.0f); doc.Add(titleField); doc.Add(new Field("content", text, Field.Store.NO, Field.Index.ANALYZED)); doc.Add(new Field("path", f, Field.Store.YES, Field.Index.NO)); writer.AddDocument(doc); } writer.Close(); indexDirectory.Close(); } } }
private Document CreateIndexDocuementForTicket(Ticket ticket) { var doc = new Document(); var commentTexts = (from c in ticket.TicketComments select c.Comment); StringBuilder sb = new StringBuilder(); foreach (var c in commentTexts) { sb.AppendLine(c); } var commentText = sb.ToString(); Lucene.Net.Documents.Field idField = new Lucene.Net.Documents.Field ( "ticketid", ticket.TicketId.ToString(), Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.NO, Lucene.Net.Documents.Field.TermVector.NO ); Lucene.Net.Documents.Field titleField = new Lucene.Net.Documents.Field ( "title", ticket.Title ?? string.Empty, Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.YES ); titleField.SetBoost(1.5F); Lucene.Net.Documents.Field detailsField = new Lucene.Net.Documents.Field ( "details", ticket.Details ?? string.Empty, Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.YES ); detailsField.SetBoost(1F); Lucene.Net.Documents.Field tagsField = new Lucene.Net.Documents.Field ( "tags", ticket.TagList ?? string.Empty, Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.NO ); tagsField.SetBoost(2F); Lucene.Net.Documents.Field commentsField = new Lucene.Net.Documents.Field ( "comments", commentText ?? string.Empty, Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.YES ); commentsField.SetBoost(.8F); doc.Add(idField); doc.Add(titleField); doc.Add(detailsField); doc.Add(tagsField); doc.Add(commentsField); if (ticket.CurrentStatus != "Closed") { doc.SetBoost(10F); } return doc; }
private void AddPackage(Package package) { var document = new Document(); var field = new Field("Id-Exact", package.PackageRegistration.Id.ToLowerInvariant(), Field.Store.NO, Field.Index.NOT_ANALYZED); field.SetBoost(2.5f); document.Add(field); // Store description so we can show them in search results field = new Field("Description", package.Description, Field.Store.YES, Field.Index.ANALYZED); field.SetBoost(0.1f); document.Add(field); // We store the Id/Title field in multiple ways, so that it's possible to match using multiple // styles of search // Note: no matter which way we store it, it will also be processed by the Analyzer later. // Style 1: As-Is Id, no tokenizing (so you can search using dot or dash-joined terms) // Boost this one field = new Field("Id", package.PackageRegistration.Id, Field.Store.NO, Field.Index.ANALYZED); document.Add(field); // Style 2: dot+dash tokenized (so you can search using undotted terms) field = new Field("Id", SplitId(package.PackageRegistration.Id), Field.Store.NO, Field.Index.ANALYZED); field.SetBoost(0.8f); document.Add(field); // Style 3: camel-case tokenized (so you can search using parts of the camelCasedWord). // De-boosted since matches are less likely to be meaningful field = new Field("Id", CamelSplitId(package.PackageRegistration.Id), Field.Store.NO, Field.Index.ANALYZED); field.SetBoost(0.25f); document.Add(field); // If an element does not have a Title, fall back to Id, same as the website. var workingTitle = String.IsNullOrEmpty(package.Title) ? package.PackageRegistration.Id : package.Title; // As-Is (stored for search results) field = new Field("Title", workingTitle, Field.Store.YES, Field.Index.ANALYZED); field.SetBoost(0.9f); document.Add(field); // no need to store dot+dash tokenized - we'll handle this in the analyzer field = new Field("Title", SplitId(workingTitle), Field.Store.NO, Field.Index.ANALYZED); field.SetBoost(0.8f); document.Add(field); // camel-case tokenized field = new Field("Title", CamelSplitId(workingTitle), Field.Store.NO, Field.Index.ANALYZED); field.SetBoost(0.5f); document.Add(field); if (!String.IsNullOrEmpty(package.Tags)) { // Store tags so we can show them in search results field = new Field("Tags", package.Tags, Field.Store.YES, Field.Index.ANALYZED); field.SetBoost(0.8f); document.Add(field); } // note Authors and Dependencies have flattened representations in the data model. document.Add(new Field("Authors", package.FlattenedAuthors.ToStringSafe(), Field.Store.NO, Field.Index.ANALYZED)); document.Add(new Field("FlattenedAuthors", package.FlattenedAuthors.ToStringSafe(), Field.Store.YES, Field.Index.NO)); // Fields for storing data to avoid hitting SQL while doing searches if (!String.IsNullOrEmpty(package.IconUrl)) { document.Add(new Field("IconUrl", package.IconUrl, Field.Store.YES, Field.Index.NO)); } if (package.PackageRegistration.Owners.AnySafe()) { string flattenedOwners = String.Join(";", package.PackageRegistration.Owners.Select(o => o.Username)); document.Add(new Field("Owners", flattenedOwners, Field.Store.NO, Field.Index.ANALYZED)); document.Add(new Field("FlattenedOwners", flattenedOwners, Field.Store.YES, Field.Index.NO)); } document.Add(new Field("Copyright", package.Copyright.ToStringSafe(), Field.Store.YES, Field.Index.NO)); document.Add(new Field("Created", package.Created.ToString(CultureInfo.InvariantCulture), Field.Store.YES, Field.Index.NO)); document.Add(new Field("FlattenedDependencies", package.FlattenedDependencies.ToStringSafe(), Field.Store.YES, Field.Index.NO)); document.Add(new Field("Hash", package.Hash.ToStringSafe(), Field.Store.YES, Field.Index.NO)); document.Add(new Field("HashAlgorithm", package.HashAlgorithm.ToStringSafe(), Field.Store.YES, Field.Index.NO)); document.Add(new Field("Id-Original", package.PackageRegistration.Id, Field.Store.YES, Field.Index.NO)); document.Add(new Field("LastUpdated", package.LastUpdated.ToString(CultureInfo.InvariantCulture), Field.Store.YES, Field.Index.NO)); document.Add(new Field("Language", package.Language.ToStringSafe(), Field.Store.YES, Field.Index.NO)); document.Add(new Field("LicenseUrl", package.LicenseUrl.ToStringSafe(), Field.Store.YES, Field.Index.NO)); document.Add(new Field("Key", package.Key.ToString(CultureInfo.InvariantCulture), Field.Store.YES, Field.Index.NO)); document.Add(new Field("Version", package.Version.ToStringSafe(), Field.Store.YES, Field.Index.NO)); document.Add(new Field("VersionDownloadCount", package.DownloadCount.ToString(CultureInfo.InvariantCulture), Field.Store.YES, Field.Index.NO)); document.Add(new Field("PackageFileSize", package.PackageFileSize.ToString(CultureInfo.InvariantCulture), Field.Store.YES, Field.Index.NO)); document.Add(new Field("ProjectUrl", package.ProjectUrl.ToStringSafe(), Field.Store.YES, Field.Index.NO)); document.Add(new Field("Published", package.Published.ToString(CultureInfo.InvariantCulture), Field.Store.YES, Field.Index.NO)); document.Add(new Field("ReleaseNotes", package.ReleaseNotes.ToStringSafe(), Field.Store.YES, Field.Index.NO)); document.Add(new Field("RequiresLicenseAcceptance", package.RequiresLicenseAcceptance.ToString(), Field.Store.YES, Field.Index.NO)); document.Add(new Field("Summary", package.Summary.ToStringSafe(), Field.Store.YES, Field.Index.NO)); if (package.SupportedFrameworks.AnySafe()) { string joinedFrameworks = string.Join(";", package.SupportedFrameworks.Select(f => f.FrameworkName)); document.Add(new Field("JoinedSupportedFrameworks", joinedFrameworks, Field.Store.YES, Field.Index.NO)); } // Fields meant for filtering, also storing data to avoid hitting SQL while doing searches document.Add(new Field("IsLatest", package.IsLatest.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); document.Add(new Field("IsLatestStable", package.IsLatestStable.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); // Note: Used to identify index records for updates document.Add(new Field("PackageRegistrationKey", package.PackageRegistrationKey.ToString(CultureInfo.InvariantCulture), Field.Store.YES, Field.Index.NOT_ANALYZED)); // Fields meant for filtering, sorting document.Add(new Field("PublishedDate", package.Published.Ticks.ToString(CultureInfo.InvariantCulture), Field.Store.NO, Field.Index.NOT_ANALYZED)); document.Add( new Field("DownloadCount", package.PackageRegistration.DownloadCount.ToString(CultureInfo.InvariantCulture), Field.Store.YES, Field.Index.NOT_ANALYZED)); string displayName = String.IsNullOrEmpty(package.Title) ? package.PackageRegistration.Id : package.Title; document.Add(new Field("DisplayName", displayName.ToLower(CultureInfo.CurrentCulture), Field.Store.NO, Field.Index.NOT_ANALYZED)); _indexWriter.AddDocument(document); }
/// <summary> /// 创建公告索引 /// </summary> /// <param name="analyzer"></param> /// <param name="title"></param> /// <param name="content"></param> private void GG_AddIndex(IndexWriter writer, 公告 model) { try { Document doc = new Document(); doc.Add(new Field("NumId", model.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));//存储且索引 Field f = new Field("Title", model.内容主体.标题, Field.Store.YES, Field.Index.ANALYZED); f.SetBoost(5F);//为标题增加权重 doc.Add(f);//存储且索引 doc.Add(new Field("Content", model.内容主体.内容, Field.Store.YES, Field.Index.ANALYZED));//存储且索引 doc.Add(new Field("AdClass", model.公告信息.公告类别.ToString(), Field.Store.YES, Field.Index.ANALYZED));//存储且索引 doc.Add(new Field("AdFeature", model.公告信息.公告性质.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));//存储且索引 doc.Add(new Field("AdPro", model.公告信息.所属地域.省份, Field.Store.YES, Field.Index.ANALYZED));//存储且索引 doc.Add(new Field("AdCity", model.公告信息.所属地域.城市, Field.Store.YES, Field.Index.ANALYZED));//存储且索引 doc.Add(new Field("AdArea", model.公告信息.所属地域.区县, Field.Store.YES, Field.Index.ANALYZED));//存储且索引 doc.Add(new Field("AdHy", model.公告信息.一级分类, Field.Store.YES, Field.Index.ANALYZED));//存储且索引 doc.Add(new Field("AddTime", model.内容主体.发布时间.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));//存储且索引 writer.AddDocument(doc); } catch { return; } }
private void InsertData(LuceneModel model, IndexWriter writer) { var document = new Document();//new一篇文档对象 --一条记录对应索引库中的一个文档 //向文档中添加字段 Add(字段,值,是否保存字段原始值,是否针对该列创建索引) //--所有字段的值都将以字符串类型保存 因为索引库只存储字符串类型数据 /** * Field.Store.COMPRESS:压缩保存,用于长文本或二进制数据 依赖于ICSharpCode的SharpZipLib.dll组件,在索引文件有一定规模的时候可以压缩内容; * Field.Store.YES:保存 * Field.Store.NO:不保存 * * Field.Index.NO:不建立索引 * Field.Index.ANALYZED:分词,建索引 * Field.Index.NOT_ANALYZED:不分词,建索引 * Field.Index.NO_NORMS:不分词,建索引.但是Field的值不像通常那样被保存,而是只取一个byte,这样节约存储空间 * * Field.TermVector.NO:不保存term vectors * Field.TermVector.YES:保存term vectors * Field.TermVector.WITH_POSITIONS:保存term vectors.(保存值和token位置信息) * Field.TermVector.WITH_OFFSETS:保存term vectors.(保存值和Token的offset) * Field.TermVector.WITH_POSITIONS_OFFSETS:保存term vectors.(保存值和token位置信息和Token的offset) */ document.Add(new Field("id", model.ID.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); //Field.Store:表示是否保存字段原值。指定Field.Store.YES的字段在检索时才能用document.Get取出原值 //Field.Index.NOT_ANALYZED:指定不按照分词后的结果保存--是否按分词后结果保存取决于是否对该列内容进行模糊查询 document.Add(new Field("type", model.Type.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); var titleFile = new Field("title", model.Title, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); titleFile.SetBoost(TitleBoost); document.Add(titleFile); //Field.Index.ANALYZED:指定文章内容按照分词后结果保存 否则无法实现后续的模糊查询 //WITH_POSITIONS_OFFSETS:指示不仅保存分割后的词 还保存词之间的距离 if (!string.IsNullOrEmpty(model.Content)) document.Add(new Field("content", model.Content, Field.Store.COMPRESS, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); document.Add( new Field("createtime", model.CreateTime.ToString("yyyyMMddhhmmss"), Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); if (!string.IsNullOrEmpty(model.Images)) document.Add(new Field("images", model.Images, Field.Store.YES, Field.Index.NO)); if (!string.IsNullOrEmpty(model.Tags)) document.Add(new Field("tags", model.Tags, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); document.Add(new Field("clickcount", model.ClickCount.ToString(), Field.Store.YES, Field.Index.NO)); writer.AddDocument(document); }
/// <summary> /// /// </summary> /// <param name="writer"></param> /// <param name="model"></param> private void AddIndex_gys(IndexWriter writer, 供应商 model) { try { Document doc = new Document(); doc.Add(new Field("NumId", model.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));//存储不索引 if (model.所属地域.省份 != null) { Field f = new Field("Province", model.所属地域.省份, Field.Store.YES, Field.Index.ANALYZED);//所属省份 f.SetBoost(3F); doc.Add(f); } if (model.所属地域.城市 != null) { Field f = new Field("City", model.所属地域.城市, Field.Store.YES, Field.Index.ANALYZED);//所属城市 f.SetBoost(3F); doc.Add(f); } if (model.所属地域.区县 != null) { Field f = new Field("Area", model.所属地域.区县, Field.Store.YES, Field.Index.ANALYZED);//所属区县 f.SetBoost(3F); doc.Add(f); } if (model.企业联系人信息 != null && model.企业联系人信息.联系人固定电话 != null) { doc.Add(new Field("Telephone", model.企业联系人信息.联系人固定电话, Field.Store.YES, Field.Index.NOT_ANALYZED));//注册地址 存储不索引 } if (model.企业联系人信息 != null && model.企业联系人信息.联系人姓名 != null) { doc.Add(new Field("P_Name", model.企业联系人信息.联系人姓名, Field.Store.YES, Field.Index.NOT_ANALYZED));//存储不索引 } if (model.企业基本信息 != null && model.企业基本信息.所属行业 != null) { Field f = new Field("Industry", model.企业基本信息.所属行业, Field.Store.YES, Field.Index.ANALYZED);//所属行业 f.SetBoost(3F); doc.Add(f); } //if (model.可提供产品类别列表 != null && model.可提供产品类别列表.Any()) //{ // var pro_industry = ""; // foreach (var item in model.可提供产品类别列表) // { // pro_industry += item.一级分类 + ";"; // } // Field f = new Field("Pro_Industry", pro_industry, Field.Store.YES, Field.Index.ANALYZED);//可提供产品分类 // doc.Add(f); //} if (model.可提供产品类别列表 != null && model.可提供产品类别列表.Any()) { var pro_industry = ""; foreach (var item in model.可提供产品类别列表) { foreach (var it in item.二级分类) { pro_industry += it + ";"; } } Field f = new Field("Pro_Industry", pro_industry, Field.Store.YES, Field.Index.ANALYZED);//可提供产品分类 doc.Add(f); } if (model.企业基本信息.企业名称 != null) { Field f = new Field("Name", model.企业基本信息.企业名称, Field.Store.YES, Field.Index.ANALYZED);//企业名称 f.SetBoost(5F); doc.Add(f); } var 认证级别 = ((int)model.供应商用户信息.认证级别).ToString(); doc.Add(new Field("Rzjb", 认证级别.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));//存储不索引 //各图标字符串组合,顺序 :年检、应急、协议、入库 var 图标 = ""; if (model.供应商用户信息.年检列表 != null && model.供应商用户信息.年检列表.Any() && model.供应商用户信息.年检列表.ContainsKey(DateTime.Now.Year.ToString())) { 图标 += "1,"; } else { 图标 += "0,"; } if (model.供应商用户信息.应急供应商) { 图标 += "1,"; } else { 图标 += "0,"; } if (model.供应商用户信息.协议供应商) { 图标 += "1,"; } else { 图标 += "0,"; } 图标 += ((int)model.供应商用户信息.入库级别).ToString(); Field f1 = new Field("Level_Flage", 图标, Field.Store.YES, Field.Index.NOT_ANALYZED); doc.Add(f1); //各图标字符串组合,顺序 :认证、年检、应急、协议、入库 //员工人数 doc.Add(new Field("People_Count", model.企业基本信息.员工人数.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));//存储不索引 //商品总数 var pro_count = 商品管理.计数供应商商品(model.Id, 0, 0, Query<商品>.EQ(o => o.审核数据.审核状态, 审核状态.审核通过), false); doc.Add(new Field("Pro_Count", pro_count.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));//存储不索引 //历史参标次数 var history_count = model.历史参标记录.Count(); doc.Add(new Field("History_Count", history_count.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));//存储不索引 //资质证书 var zzzs = model.资质证书列表.Any() && model.资质证书列表[0].资质证书电子扫描件.Any() && !string.IsNullOrWhiteSpace(model.资质证书列表[0].资质证书电子扫描件[0].路径) ? model.资质证书列表[0].资质证书电子扫描件[0].路径 : ""; doc.Add(new Field("Zzzs_Pic", zzzs, Field.Store.YES, Field.Index.NOT_ANALYZED));//存储不索引 //厂房及设备图 var gyspic = model.供应商用户信息.供应商图片.Any() ? model.供应商用户信息.供应商图片.Last() : ""; doc.Add(new Field("Gys_Pic", gyspic, Field.Store.YES, Field.Index.NOT_ANALYZED));//存储不索引 //右边商品展示 var showprostr = ""; var showpropic = new List<商品链接>(); if (增值服务记录 != null) { var 广告商品B1_1 = 增值服务记录.已开通的服务.Where(o => o.所申请项目名.Contains("企业推广服务B1-1位置") && o.结束时间 > DateTime.Now); var 广告商品B1_2 = 增值服务记录.已开通的服务.Where(o => o.所申请项目名.Contains("企业推广服务B1-2位置") && o.结束时间 > DateTime.Now); var 商务会员 = 增值服务记录.已开通的服务.Where(o => o.所申请项目名.Contains("商务会员") && o.结束时间 > DateTime.Now); var 标准会员 = 增值服务记录.已开通的服务.Where(o => o.所申请项目名.Contains("标准会员") && o.结束时间 > DateTime.Now); var 广告商品数 = (广告商品B1_2.Any() || 商务会员.Any()) ? 6 : (广告商品B1_1.Any() || 标准会员.Any()) ? 3 : 0; if (广告商品数 == 6) { if (model.供应商用户信息.广告商品.ContainsKey("企业推广服务B1-2位置")) { showpropic = model.供应商用户信息.广告商品["企业推广服务B1-2位置"].Select(o => o.商品).ToList(); } else if (model.供应商用户信息.广告商品.ContainsKey("商务会员")) { showpropic = model.供应商用户信息.广告商品["商务会员"].Select(o => o.商品).ToList(); } } if (广告商品数 == 3) { if (model.供应商用户信息.广告商品.ContainsKey("企业推广服务B1-1位置")) { showpropic = model.供应商用户信息.广告商品["企业推广服务B1-1位置"].Select(o => o.商品).ToList(); } else if (model.供应商用户信息.广告商品.ContainsKey("标准会员")) { showpropic = model.供应商用户信息.广告商品["标准会员"].Select(o => o.商品).ToList(); } } //没有选择展示商品,默认选取审核通过的前3/6个商品作为展示 if (广告商品数 > 0 && showpropic.Count < 广告商品数) { var sp = 商品管理.查询供应商商品(model.Id, 0, 广告商品数 - showpropic.Count, Query<商品>.EQ(o => o.审核数据.审核状态, 审核状态.审核通过), includeDisabled: false); foreach (var s in sp) { 商品链接 p = new 商品链接(); p.商品ID = s.Id; showpropic.Add(p); } } if (showpropic.Any()) { foreach (var itemlist in showpropic) { var item = itemlist.商品; //图片 if (item.商品信息.商品图片.Any()) { showprostr += item.商品信息.商品图片[0] + "****"; } else { showprostr += "/Images/noimage.jpg****"; } //名称 showprostr += item.商品信息.商品名 + "****"; //价格 showprostr += item.销售信息.价格 + "****"; //ID showprostr += item.Id + "****||||"; } } } doc.Add(new Field("Show_Product", showprostr, Field.Store.YES, Field.Index.NOT_ANALYZED));//存储不索引 //经营类型 doc.Add(new Field("Management", model.企业基本信息.经营类型 + "/" + model.企业基本信息.经营子类型, Field.Store.YES, Field.Index.NOT_ANALYZED));//存储不索引 writer.AddDocument(doc); } catch { return; } }
/// <summary> /// /// </summary> /// <param name="writer"></param> /// <param name="model"></param> public static void AddIndex_gys(IndexWriter writer, 供应商 model) { try { Document doc = new Document(); doc.Add(new Field("NumId", model.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));//存储不索引 if (model.所属地域.省份 != null) { Field f = new Field("Province", model.所属地域.省份, Field.Store.YES, Field.Index.ANALYZED);//所属省份 f.SetBoost(3F); doc.Add(f); } if (model.所属地域.城市 != null) { Field f = new Field("City", model.所属地域.城市, Field.Store.YES, Field.Index.ANALYZED);//所属城市 f.SetBoost(3F); doc.Add(f); } if (model.所属地域.区县 != null) { Field f = new Field("Area", model.所属地域.区县, Field.Store.YES, Field.Index.ANALYZED);//所属区县 f.SetBoost(3F); doc.Add(f); } if (model.企业联系人信息 != null && model.企业联系人信息.联系人固定电话 != null) { doc.Add(new Field("Telephone", model.企业联系人信息.联系人固定电话, Field.Store.YES, Field.Index.NOT_ANALYZED));//注册地址 存储不索引 } if (model.企业联系人信息 != null && model.企业联系人信息.联系人姓名 != null) { doc.Add(new Field("P_Name", model.企业联系人信息.联系人姓名, Field.Store.YES, Field.Index.NOT_ANALYZED));//存储不索引 } if (model.企业基本信息 != null && model.企业基本信息.所属行业 != null) { Field f = new Field("Industry", model.企业基本信息.所属行业, Field.Store.YES, Field.Index.ANALYZED);//所属行业 f.SetBoost(3F); doc.Add(f); } //if (model.可提供产品类别列表 != null && model.可提供产品类别列表.Count > 0 && !string.IsNullOrEmpty(model.可提供产品类别列表[0].一级分类)) //{ // Field f = new Field("Industry", model.可提供产品类别列表[0].一级分类, Field.Store.YES, Field.Index.ANALYZED);//所属行业 // f.SetBoost(3F); // doc.Add(f); //} if (model.企业基本信息.企业名称 != null) { Field f = new Field("Name", model.企业基本信息.企业名称, Field.Store.YES, Field.Index.ANALYZED);//企业名称 f.SetBoost(5F); doc.Add(f); } if (model.供应商用户信息.年检列表 != null && model.供应商用户信息.年检列表.Any() && model.供应商用户信息.年检列表.ContainsKey(DateTime.Now.Year.ToString())) { Field f = new Field("YearCheck", "1", Field.Store.YES, Field.Index.NOT_ANALYZED); doc.Add(f); } else { Field f = new Field("YearCheck", "0", Field.Store.YES, Field.Index.NOT_ANALYZED); doc.Add(f); } //if (model.供应商用户信息.已入库) //{ Field f1 = new Field("Storage", "1", Field.Store.YES, Field.Index.NOT_ANALYZED); doc.Add(f1); //} //else //{ // Field f = new Field("Storage", "0", Field.Store.YES, Field.Index.NOT_ANALYZED); // doc.Add(f); //} writer.AddDocument(doc); } catch { return; } }
public void BeginIndexing(ICorpusReader corpusReader, string corpusName) { if (IndexingStatus != null) return; // there's already an indexing process running // Init IndexingStatus = new IndexingProgressInfo { IndexName = corpusName, Percentage = 0, Status = "Launching", IsRunning = true, }; // Create a morphologic analyzer to be used for indexing by default var morphIndexingAnalyzer = new HtmlMorphAnalyzer(MorphAnalyzer); morphIndexingAnalyzer.alwaysSaveMarkedOriginal = true; // to allow for non-morphologic searches too var indexingAnalyzer = new PerFieldAnalyzerWrapper(morphIndexingAnalyzer); // Allow for one field to be indexed using StandardAnalyzer, for the purpose of comparison indexingAnalyzer.AddAnalyzer("TitleDefault", new HtmlStandardAnalyzer()); indexingAnalyzer.AddAnalyzer("ContentDefault", new HtmlStandardAnalyzer()); // Create the indexer var indexPath = Path.Combine(IndexesStoragePath, corpusName); var writer = new IndexWriter(FSDirectory.Open(new DirectoryInfo(indexPath)), indexingAnalyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); writer.SetUseCompoundFile(false); // This will be called whenever a document is read by the provided ICorpusReader corpusReader.OnDocument += corpusDoc => { var content = corpusDoc.AsHtml(); // skip blank documents, they are worthless to us (even though they have a title we could index) if (string.IsNullOrEmpty(content)) return; // Create a new index document var doc = new Document(); doc.Add(new Field("Id", corpusDoc.Id, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); // Add title field var titleField = new Field("Title", corpusDoc.Title, Field.Store.COMPRESS, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); titleField.SetBoost(3.0f); doc.Add(titleField); titleField = new Field("TitleDefault", corpusDoc.Title, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); titleField.SetBoost(3.0f); doc.Add(titleField); // Add two versions of content - one will be analyzed by HebMorph and the other by Lucene's StandardAnalyzer doc.Add(new Field("Content", content, Field.Store.COMPRESS, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); doc.Add(new Field("ContentDefault", content, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); writer.AddDocument(doc); }; // Progress reporting corpusReader.OnProgress += (percentage, status, isRunning) => { IndexingStatus = new IndexingProgressInfo { IndexName = corpusName, Percentage = percentage, Status = status, IsRunning = isRunning, }; }; // Execute corpus reading, which will trigger indexing for each document found corpusReader.Read(); // Wrap up, optimize and cleanup IndexingStatus = new IndexingProgressInfo { IndexName = corpusName, Percentage = 100, Status = "Optimizing index", IsRunning = true, }; // Clean up and close writer.SetUseCompoundFile(true); writer.Optimize(); writer.Close(); UpdateIndexesList(); IndexingStatus = null; }
public void Index(int documentId) { using (var db = new SkybraryEntities()) { var icaoDocument = db.DocumentSet.Where(d => d.Id == documentId).First(); var chapters = (from c in db.ChapterSet .Include("Content.ContentDefinitions.Definition.Content") .Include("Paragraphs.Content.ContentDefinitions.Definition.Content") where c.Document.Id == documentId select c).ToList(); foreach (var chapter in chapters) { if (chapter.HasContent()) { Field titleField = new Field("Title", chapter.Title, Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS); titleField.SetBoost(1.1F); var doc = new Lucene.Net.Documents.Document(); doc.Add(new Field("IcaoId", icaoDocument.Id.ToString(), Field.Store.YES, Field.Index.UN_TOKENIZED)); doc.Add(new Field("DocumentTitle", icaoDocument.Title, Field.Store.YES, Field.Index.NO)); doc.Add(new Field("Type", "chapter", Field.Store.YES, Field.Index.NO)); doc.Add(new Field("Id", chapter.Id.ToString(), Field.Store.YES, Field.Index.NO)); doc.Add(new Field("OrderInDocument", chapter.OrderInDocument.ToString(), Field.Store.YES, Field.Index.NO)); doc.Add(titleField); doc.Add(new Field("Content", chapter.Content.ToSearchIndex(), Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); Writer.AddDocument(doc); } foreach (var paragraph in chapter.Paragraphs) { if (paragraph.HasContent()) { Field titleField = new Field("Title", paragraph.Number + " " + paragraph.Title, Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS); titleField.SetBoost(1.1F); var par = new Lucene.Net.Documents.Document(); par.Add(new Field("IcaoId", icaoDocument.Id.ToString(), Field.Store.YES, Field.Index.UN_TOKENIZED)); par.Add(new Field("DocumentTitle", icaoDocument.Title, Field.Store.YES, Field.Index.NO)); par.Add(new Field("Type", "paragraph", Field.Store.YES, Field.Index.NO)); par.Add(new Field("Id", paragraph.Id.ToString(), Field.Store.YES, Field.Index.NO)); par.Add(new Field("Chapter", chapter.Title, Field.Store.YES, Field.Index.NO)); par.Add(new Field("OrderInChapter", paragraph.OrderInChapter.ToString(), Field.Store.YES, Field.Index.NO)); par.Add(titleField); par.Add(new Field("Content", paragraph.Content.ToSearchIndex(), Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); Writer.AddDocument(par); } } } Writer.Optimize(); Writer.Flush(); Writer.Close(); } }