public static LuceneDocument ToLucene(this MarcellDocument sourceDocument)
        {
            LuceneDocument result = new LuceneDocument();

            //Add basic document data
            result.AddStringField("Id", sourceDocument.Id ?? sourceDocument.InternalId.ToString("N"), Field.Store.YES);
            result.AddStringField("InternalId", sourceDocument.InternalId.ToString("N"), Field.Store.YES);
            result.AddDateField("ApprovalDate", sourceDocument.ApprovalDate);
            result.AddDateField("DocumentDate", sourceDocument.DocumentDate);
            result.AddDateField("EffectiveDate", sourceDocument.EffectiveDate);
            result.AddStringList("DocumentToken", sourceDocument.DocumentSimilarityData.ConsolidatedTokens);
            result.AddStringList("DocumentTopic", sourceDocument.DocumentSimilarityData.ConsolidatedTopics);
            result.AddInt32Field("TokenCount", sourceDocument.TokenCount, Field.Store.YES);
            result.AddTextField("DocumentType", sourceDocument.DocumentType ?? "", Field.Store.YES);
            result.AddTextField("OriginalType", sourceDocument.OriginalType ?? "", Field.Store.YES);
            result.AddTextField("Issuer", sourceDocument.Issuer ?? "", Field.Store.YES);
            result.AddStringField("Language", sourceDocument.Language, Field.Store.YES);
            result.AddStringField("Url", sourceDocument.Url ?? "", Field.Store.YES);
            result.AddScoredDoubleField("RecognitionQuality", sourceDocument.RecognitionQuality);
            result.AddBoolField("IsStructured", sourceDocument.IsStructured);

            if (sourceDocument.FileName != null)
            {
                result.AddStringField("FileName", sourceDocument.FileName, Field.Store.YES);
            }

            return(result);
        }
        public static LuceneDocument ToLucene(this Section sourceSection, MarcellDocument parentDocument)
        {
            LuceneDocument result = new LuceneDocument();

            //Add basic document data
            result.AddStringField("Id", sourceSection.Id ?? sourceSection.InternalId.ToString("N"), Field.Store.YES);
            result.AddStringField("InternalId", sourceSection.InternalId.ToString("N"), Field.Store.YES);
            result.AddStringList("DocumentToken", sourceSection.DocumentSimilarityData.ConsolidatedTokens);
            result.AddStringList("DocumentTopic", sourceSection.DocumentSimilarityData.ConsolidatedTopics);
            result.AddStringList("SectionToken", sourceSection.SectionSimilarityData.ConsolidatedTokens);
            result.AddStringList("SectionTopic", sourceSection.SectionSimilarityData.ConsolidatedTopics);
            result.AddInt32Field("TokenCount", sourceSection.TokenCount, Field.Store.YES);
            result.AddStringField("Language", sourceSection.Language, Field.Store.YES);
            result.AddScoredDoubleField("RecognitionQuality", sourceSection.RecognitionQuality);

            result.AddStringField("Type", sourceSection.Type.ToString(), Field.Store.YES);

            string sectionText = sourceSection.Text ?? "";

            result.AddTextField("Text", sectionText, Field.Store.YES);

            //Add reference to parent document
            if (parentDocument != null)
            {
                result.AddStringField("ParentDocumentId", parentDocument.InternalId.ToString("N"), Field.Store.YES);
            }

            return(result);
        }
Esempio n. 3
0
        /// <summary>
        /// Indexes the document.
        /// </summary>
        /// <typeparam name="T"></typeparam>
        /// <param name="document">The document.</param>
        /// <param name="indexName">Name of the index.</param>
        /// <param name="mappingType">Type of the mapping.</param>
        public override void IndexDocument <T>(T document, string indexName = null, string mappingType = null)
        {
            try
            {
                Type documentType = document.GetType();
                if (indexName == null)
                {
                    indexName = documentType.Name.ToLower();
                }

                if (mappingType == null)
                {
                    mappingType = documentType.Name.ToLower();
                }

                if (!_indexes.ContainsKey(mappingType))
                {
                    CreateIndex(documentType);
                }

                var index = _indexes[mappingType];

                Document doc = new Document();
                foreach (var typeMappingProperty in index.MappingProperties.Values)
                {
                    TextField textField = new TextField(typeMappingProperty.Name, documentType.GetProperty(typeMappingProperty.Name).GetValue(document, null).ToStringSafe().ToLower(), global::Lucene.Net.Documents.Field.Store.YES);
                    textField.Boost = typeMappingProperty.Boost;
                    doc.Add(textField);
                }

                IndexModelBase docIndexModelBase = document as IndexModelBase;
                string         indexValue        = LuceneID(mappingType, docIndexModelBase.Id);
                doc.AddStringField("type", mappingType, global::Lucene.Net.Documents.Field.Store.YES);
                doc.AddStringField("id", docIndexModelBase.Id.ToString(), global::Lucene.Net.Documents.Field.Store.YES);
                doc.AddStringField("index", indexValue, global::Lucene.Net.Documents.Field.Store.YES);

                // Stores all the properties as JSON to retrieve object on lookup.
                doc.AddStoredField("JSON", document.ToJson());

                // Use the analyzer in fieldAnalyzers if that field is in that dictionary, otherwise use StandardAnalyzer.
                var analyzer = new PerFieldAnalyzerWrapper(defaultAnalyzer: new StandardAnalyzer(_matchVersion, new CharArraySet(_matchVersion, 0, true)), fieldAnalyzers: index.FieldAnalyzers);

                OpenWriter();
                lock ( _lockWriter )
                {
                    if (_writer != null)
                    {
                        _writer.UpdateDocument(new Term("index", indexValue), doc, analyzer);     // Must specify analyzer because the default analyzer that is specified in indexWriterConfig is null.
                    }
                }
            }
            catch (Exception ex)
            {
                HttpContext context2 = HttpContext.Current;
                ExceptionLogService.LogException(ex, context2);
            }
        }
        public static LuceneDocument ToLucene(this Paragraph sourceParagraph, MarcellDocument parentDocument, Section parentSection)
        {
            LuceneDocument result = new LuceneDocument();

            //Add basic document data
            result.AddStringField("Id", sourceParagraph.Id ?? sourceParagraph.InternalId.ToString("N"), Field.Store.YES);
            result.AddStringField("InternalId", sourceParagraph.InternalId.ToString("N"), Field.Store.YES);
            result.AddStringList("DocumentToken", sourceParagraph.DocumentSimilarityData.ConsolidatedTokens);
            result.AddStringList("DocumentTopic", sourceParagraph.DocumentSimilarityData.ConsolidatedTopics);
            result.AddStringList("SectionToken", sourceParagraph.SectionSimilarityData.ConsolidatedTokens);
            result.AddStringList("SectionTopic", sourceParagraph.SectionSimilarityData.ConsolidatedTopics);
            result.AddStringList("ParagraphToken", sourceParagraph.ParagraphSimilarityData.ConsolidatedTokens);
            result.AddStringList("ParagraphTopic", sourceParagraph.ParagraphSimilarityData.ConsolidatedTopics);
            result.AddInt32Field("TokenCount", sourceParagraph.TokenCount, Field.Store.YES);
            result.AddStringField("Language", sourceParagraph.Language, Field.Store.YES);
            result.AddScoredDoubleField("RecognitionQuality", sourceParagraph.RecognitionQuality);
            result.AddInt32Field("Order", sourceParagraph.Order, Field.Store.YES);

            result.AddStringField("ParagraphType", sourceParagraph.ParagraphType.ToString(), Field.Store.YES);
            result.AddStringField("ParagraphNumber", sourceParagraph.ParagraphNumber ?? "", Field.Store.YES);
            result.AddStringField("PointNumber", sourceParagraph.PointNumber ?? "", Field.Store.YES);

            string paragraphText = sourceParagraph.Text ?? "";

            result.AddTextField("Text", paragraphText, Field.Store.YES);

            //Add reference to parent document
            if (parentDocument != null)
            {
                result.AddStringField("ParentDocumentId", parentDocument.InternalId.ToString("N"), Field.Store.YES);
            }
            //Add reference to parent section
            if (parentSection != null)
            {
                result.AddStringField("ParentSectionId", parentSection.InternalId.ToString("N"), Field.Store.YES);
            }

            return(result);
        }
        public static LuceneDocument ToLucene(this Sentence sourceSentence, MarcellDocument parentDocument, Section parentSection, Paragraph parentParagraph)
        {
            LuceneDocument result = new LuceneDocument();

            //Add basic document data
            result.AddStringField("Id", sourceSentence.Id ?? sourceSentence.InternalId.ToString("N"), Field.Store.YES);
            result.AddStringField("InternalId", sourceSentence.InternalId.ToString("N"), Field.Store.YES);
            result.AddStringList("DocumentToken", sourceSentence.DocumentSimilarityData.ConsolidatedTokens);
            result.AddStringList("DocumentTopic", sourceSentence.DocumentSimilarityData.ConsolidatedTopics);
            result.AddStringList("SectionToken", sourceSentence.SectionSimilarityData.ConsolidatedTokens);
            result.AddStringList("SectionTopic", sourceSentence.SectionSimilarityData.ConsolidatedTopics);
            result.AddStringList("ParagraphToken", sourceSentence.ParagraphSimilarityData.ConsolidatedTokens);
            result.AddStringList("ParagraphTopic", sourceSentence.ParagraphSimilarityData.ConsolidatedTopics);
            result.AddStringList("SentenceToken", sourceSentence.SentenceSimilarityData.ConsolidatedTokens);
            result.AddStringList("SentenceTopic", sourceSentence.SentenceSimilarityData.ConsolidatedTopics);
            result.AddInt32Field("TokenCount", sourceSentence.TokenCount, Field.Store.YES);
            result.AddStringField("Language", sourceSentence.Language, Field.Store.YES);
            result.AddScoredDoubleField("RecognitionQuality", sourceSentence.RecognitionQuality);
            result.AddInt32Field("Order", sourceSentence.Order, Field.Store.YES);

            string sentencehText = sourceSentence.Text ?? "";

            result.AddTextField("Text", sentencehText, Field.Store.YES);

            //Add reference to parent document
            if (parentDocument != null)
            {
                result.AddStringField("ParentDocumentId", parentDocument.InternalId.ToString("N"), Field.Store.YES);
            }
            //Add reference to parent section
            if (parentSection != null)
            {
                result.AddStringField("ParentSectionId", parentSection.InternalId.ToString("N"), Field.Store.YES);
            }
            //Add reference to parent section
            if (parentParagraph != null)
            {
                result.AddStringField("ParentParagraphId", parentParagraph.InternalId.ToString("N"), Field.Store.YES);
            }

            result.AddStringList("ContainedTokenEV", sourceSentence.Tokens.SelectMany(t => t.EuroVocEntities), Field.Store.NO);
            result.AddStringList("ContainedTokenIATE", sourceSentence.Tokens.SelectMany(t => t.IateEntities), Field.Store.NO);

            return(result);
        }