public static LuceneDocument ToLucene(this MarcellDocument sourceDocument) { LuceneDocument result = new LuceneDocument(); //Add basic document data result.AddStringField("Id", sourceDocument.Id ?? sourceDocument.InternalId.ToString("N"), Field.Store.YES); result.AddStringField("InternalId", sourceDocument.InternalId.ToString("N"), Field.Store.YES); result.AddDateField("ApprovalDate", sourceDocument.ApprovalDate); result.AddDateField("DocumentDate", sourceDocument.DocumentDate); result.AddDateField("EffectiveDate", sourceDocument.EffectiveDate); result.AddStringList("DocumentToken", sourceDocument.DocumentSimilarityData.ConsolidatedTokens); result.AddStringList("DocumentTopic", sourceDocument.DocumentSimilarityData.ConsolidatedTopics); result.AddInt32Field("TokenCount", sourceDocument.TokenCount, Field.Store.YES); result.AddTextField("DocumentType", sourceDocument.DocumentType ?? "", Field.Store.YES); result.AddTextField("OriginalType", sourceDocument.OriginalType ?? "", Field.Store.YES); result.AddTextField("Issuer", sourceDocument.Issuer ?? "", Field.Store.YES); result.AddStringField("Language", sourceDocument.Language, Field.Store.YES); result.AddStringField("Url", sourceDocument.Url ?? "", Field.Store.YES); result.AddScoredDoubleField("RecognitionQuality", sourceDocument.RecognitionQuality); result.AddBoolField("IsStructured", sourceDocument.IsStructured); if (sourceDocument.FileName != null) { result.AddStringField("FileName", sourceDocument.FileName, Field.Store.YES); } return(result); }
public static LuceneDocument ToLucene(this Section sourceSection, MarcellDocument parentDocument) { LuceneDocument result = new LuceneDocument(); //Add basic document data result.AddStringField("Id", sourceSection.Id ?? sourceSection.InternalId.ToString("N"), Field.Store.YES); result.AddStringField("InternalId", sourceSection.InternalId.ToString("N"), Field.Store.YES); result.AddStringList("DocumentToken", sourceSection.DocumentSimilarityData.ConsolidatedTokens); result.AddStringList("DocumentTopic", sourceSection.DocumentSimilarityData.ConsolidatedTopics); result.AddStringList("SectionToken", sourceSection.SectionSimilarityData.ConsolidatedTokens); result.AddStringList("SectionTopic", sourceSection.SectionSimilarityData.ConsolidatedTopics); result.AddInt32Field("TokenCount", sourceSection.TokenCount, Field.Store.YES); result.AddStringField("Language", sourceSection.Language, Field.Store.YES); result.AddScoredDoubleField("RecognitionQuality", sourceSection.RecognitionQuality); result.AddStringField("Type", sourceSection.Type.ToString(), Field.Store.YES); string sectionText = sourceSection.Text ?? ""; result.AddTextField("Text", sectionText, Field.Store.YES); //Add reference to parent document if (parentDocument != null) { result.AddStringField("ParentDocumentId", parentDocument.InternalId.ToString("N"), Field.Store.YES); } return(result); }
private static Field AddStringList(this LuceneDocument doc, string name, IEnumerable <string> stringList, Field.Store store = Field.Store.YES) { if (stringList == null) { return(null); } return(doc.AddTextField(name, string.Join(" ", stringList), store)); }
public static LuceneDocument ToLucene(this Sentence sourceSentence, MarcellDocument parentDocument, Section parentSection, Paragraph parentParagraph) { LuceneDocument result = new LuceneDocument(); //Add basic document data result.AddStringField("Id", sourceSentence.Id ?? sourceSentence.InternalId.ToString("N"), Field.Store.YES); result.AddStringField("InternalId", sourceSentence.InternalId.ToString("N"), Field.Store.YES); result.AddStringList("DocumentToken", sourceSentence.DocumentSimilarityData.ConsolidatedTokens); result.AddStringList("DocumentTopic", sourceSentence.DocumentSimilarityData.ConsolidatedTopics); result.AddStringList("SectionToken", sourceSentence.SectionSimilarityData.ConsolidatedTokens); result.AddStringList("SectionTopic", sourceSentence.SectionSimilarityData.ConsolidatedTopics); result.AddStringList("ParagraphToken", sourceSentence.ParagraphSimilarityData.ConsolidatedTokens); result.AddStringList("ParagraphTopic", sourceSentence.ParagraphSimilarityData.ConsolidatedTopics); result.AddStringList("SentenceToken", sourceSentence.SentenceSimilarityData.ConsolidatedTokens); result.AddStringList("SentenceTopic", sourceSentence.SentenceSimilarityData.ConsolidatedTopics); result.AddInt32Field("TokenCount", sourceSentence.TokenCount, Field.Store.YES); result.AddStringField("Language", sourceSentence.Language, Field.Store.YES); result.AddScoredDoubleField("RecognitionQuality", sourceSentence.RecognitionQuality); result.AddInt32Field("Order", sourceSentence.Order, Field.Store.YES); string sentencehText = sourceSentence.Text ?? ""; result.AddTextField("Text", sentencehText, Field.Store.YES); //Add reference to parent document if (parentDocument != null) { result.AddStringField("ParentDocumentId", parentDocument.InternalId.ToString("N"), Field.Store.YES); } //Add reference to parent section if (parentSection != null) { result.AddStringField("ParentSectionId", parentSection.InternalId.ToString("N"), Field.Store.YES); } //Add reference to parent section if (parentParagraph != null) { result.AddStringField("ParentParagraphId", parentParagraph.InternalId.ToString("N"), Field.Store.YES); } result.AddStringList("ContainedTokenEV", sourceSentence.Tokens.SelectMany(t => t.EuroVocEntities), Field.Store.NO); result.AddStringList("ContainedTokenIATE", sourceSentence.Tokens.SelectMany(t => t.IateEntities), Field.Store.NO); return(result); }
public static LuceneDocument ToLucene(this Paragraph sourceParagraph, MarcellDocument parentDocument, Section parentSection) { LuceneDocument result = new LuceneDocument(); //Add basic document data result.AddStringField("Id", sourceParagraph.Id ?? sourceParagraph.InternalId.ToString("N"), Field.Store.YES); result.AddStringField("InternalId", sourceParagraph.InternalId.ToString("N"), Field.Store.YES); result.AddStringList("DocumentToken", sourceParagraph.DocumentSimilarityData.ConsolidatedTokens); result.AddStringList("DocumentTopic", sourceParagraph.DocumentSimilarityData.ConsolidatedTopics); result.AddStringList("SectionToken", sourceParagraph.SectionSimilarityData.ConsolidatedTokens); result.AddStringList("SectionTopic", sourceParagraph.SectionSimilarityData.ConsolidatedTopics); result.AddStringList("ParagraphToken", sourceParagraph.ParagraphSimilarityData.ConsolidatedTokens); result.AddStringList("ParagraphTopic", sourceParagraph.ParagraphSimilarityData.ConsolidatedTopics); result.AddInt32Field("TokenCount", sourceParagraph.TokenCount, Field.Store.YES); result.AddStringField("Language", sourceParagraph.Language, Field.Store.YES); result.AddScoredDoubleField("RecognitionQuality", sourceParagraph.RecognitionQuality); result.AddInt32Field("Order", sourceParagraph.Order, Field.Store.YES); result.AddStringField("ParagraphType", sourceParagraph.ParagraphType.ToString(), Field.Store.YES); result.AddStringField("ParagraphNumber", sourceParagraph.ParagraphNumber ?? "", Field.Store.YES); result.AddStringField("PointNumber", sourceParagraph.PointNumber ?? "", Field.Store.YES); string paragraphText = sourceParagraph.Text ?? ""; result.AddTextField("Text", paragraphText, Field.Store.YES); //Add reference to parent document if (parentDocument != null) { result.AddStringField("ParentDocumentId", parentDocument.InternalId.ToString("N"), Field.Store.YES); } //Add reference to parent section if (parentSection != null) { result.AddStringField("ParentSectionId", parentSection.InternalId.ToString("N"), Field.Store.YES); } return(result); }