private void AddDocument(MarcellDocument document, bool isParallel = false) { List <Document> sectionDocument = new List <Document>(); List <Document> paragraphDocument = new List <Document>(); List <Document> sentenceDocument = new List <Document>(); foreach (var section in document.Sections) { sectionDocument.Add(section.ToLucene(document)); foreach (var paragraph in section.Paragraphs) { paragraphDocument.Add(paragraph.ToLucene(document, section)); foreach (var sentence in paragraph.Sentences) { sentenceDocument.Add(sentence.ToLucene(document, section, paragraph)); } } } m_documentWriter[document.Language].AddDocument(document.ToLucene()); var t1 = Task.Factory.StartNew(() => { m_sectionWriter[document.Language].AddDocuments(sectionDocument); }, TaskCreationOptions.LongRunning); var t2 = Task.Factory.StartNew(() => { m_paragraphWriter[document.Language].AddDocuments(paragraphDocument); }, TaskCreationOptions.LongRunning); var t3 = Task.Factory.StartNew(() => { m_sentenceWriter[document.Language].AddDocuments(sentenceDocument); }, TaskCreationOptions.LongRunning); Task.WaitAll(t1, t2, t3); }
private void ParseDocumentXml(CoNLLDocument sourceDoc, LegalTextParserFactory parserFactory) { if (sourceDoc == null || sourceDoc.doc == null) { throw new InvalidOperationException("Unsupported document found in corpus!"); } var parser = parserFactory.CreateParser(sourceDoc.doc.language); var pass1 = parser.ParsePass1(sourceDoc); parser.ParsePass2(pass1); m_marcellDocument = pass1; }
public ParsedDocument(MarcellDocument document) { m_marcellDocument = document; }