Esempio n. 1
0
        private void AddDocument(MarcellDocument document, bool isParallel = false)
        {
            List <Document> sectionDocument   = new List <Document>();
            List <Document> paragraphDocument = new List <Document>();
            List <Document> sentenceDocument  = new List <Document>();

            foreach (var section in document.Sections)
            {
                sectionDocument.Add(section.ToLucene(document));
                foreach (var paragraph in section.Paragraphs)
                {
                    paragraphDocument.Add(paragraph.ToLucene(document, section));
                    foreach (var sentence in paragraph.Sentences)
                    {
                        sentenceDocument.Add(sentence.ToLucene(document, section, paragraph));
                    }
                }
            }

            m_documentWriter[document.Language].AddDocument(document.ToLucene());

            var t1 = Task.Factory.StartNew(() => { m_sectionWriter[document.Language].AddDocuments(sectionDocument); }, TaskCreationOptions.LongRunning);
            var t2 = Task.Factory.StartNew(() => { m_paragraphWriter[document.Language].AddDocuments(paragraphDocument); }, TaskCreationOptions.LongRunning);
            var t3 = Task.Factory.StartNew(() => { m_sentenceWriter[document.Language].AddDocuments(sentenceDocument); }, TaskCreationOptions.LongRunning);

            Task.WaitAll(t1, t2, t3);
        }
        private void ParseDocumentXml(CoNLLDocument sourceDoc, LegalTextParserFactory parserFactory)
        {
            if (sourceDoc == null || sourceDoc.doc == null)
            {
                throw new InvalidOperationException("Unsupported document found in corpus!");
            }

            var parser = parserFactory.CreateParser(sourceDoc.doc.language);

            var pass1 = parser.ParsePass1(sourceDoc);

            parser.ParsePass2(pass1);

            m_marcellDocument = pass1;
        }
 public ParsedDocument(MarcellDocument document)
 {
     m_marcellDocument = document;
 }