/// <summary> /// Valideaza un document. Daca statusul sau este gol, atunci /// se porneste parsarea si apelarea celorlalte moduule. /// </summary> /// <param name="doc">Documentul ce trebuie validat</param> /// <returns>Fisierul XML daca exista. Null daca documentul inca nu a fost creat.</returns> public String ValidateDocumentOutput(DocumentOutput doc) { if (this.EntityManager == null) { return(null); } switch (doc.Status) { /*daca fisierul a fost creat, il intoarcems*/ case DocumentManager.StatusOK: return(doc.Document); /*daca documentul e in curs de parsare, nu facem nimic*/ case DocumentManager.StatusParsing: return(null); /*daca documentul este gol, pornim workerul ce se ocupa de apelarea modulelor*/ case DocumentManager.StatusEmpty: this._StartWorker(doc); return(null); default: return(null); } }
private DocumentOutput _CombineTimeline(DocumentOutput summ, DocumentOutput sit, DocumentOutput time) { XmlDocument root = new XmlDocument(); XmlDocument sitDoc = new XmlDocument(); sitDoc.LoadXml(sit.Document); XmlDocument timeDoc = new XmlDocument(); timeDoc.LoadXml(time.Document); XmlDocument summDoc = new XmlDocument(); summDoc.LoadXml(summ.Document); var situations = root.CreateElement("situations"); root.AppendChild(situations); /*infromatiile extrase anterior*/ foreach (XmlElement elem in summDoc.GetElementsByTagName("extraction")) { situations.AppendChild(elem); } foreach (XmlElement esit in sitDoc.GetElementsByTagName("situation")) { } MemoryStream ms = new MemoryStream(); root.Save(ms); return(this._em.AddDocumentOutput(this._doc.DocumentId, "summary", DocumentManager.StatusOK, Encoding.UTF8.GetString(ms.ToArray()))); }
private void _StartWorker(DocumentOutput doc) { ModuleWorker worker = new ModuleWorker(this.EntityManager, doc); /*cream un thread nou*/ Thread thread = new Thread(new ThreadStart(worker.Run)); thread.Start(); }
private DocumentOutput _GetSummary(string root) { DocumentOutput doc = null; /*verificam daca este deja documentul in bd*/ try { doc = this._em.GetDocumentOutput(this._doc.DocumentId, "summ"); /*ne asiguram ca este scris*/ using (Stream stream = File.OpenWrite(root + "\\summ.xml")) { byte[] buffer = Encoding.UTF8.GetBytes(doc.Document); stream.Write(buffer, 0, buffer.Length); } } catch { } if (doc != null) { return(doc); } String input = null; using (StreamReader stream = new StreamReader(root + "\\main.txt")) { input = stream.ReadToEnd(); } /*cream modulu de sumarizare*/ /*ISummarizer summarizer = new SimpleSummarizer(); * String output = summarizer.Summarize(input, 40);*/ String output = null; using (StreamReader info = new StreamReader(File.OpenRead(root + "\\info.xml"))) { XElement infoDoc = XElement.Parse(info.ReadToEnd()); output = new Summarisation().ProcessXML(infoDoc); } if (output == null) { return(null); } /*punem in baza de date rezultatul*/ doc = this._em.AddDocumentOutput(this._doc.DocumentId, "summ", DocumentManager.StatusOK, output); /*scriem si in fisier*/ using (Stream stream = File.OpenWrite(root + "\\summ.xml")) { byte[] buffer = Encoding.UTF8.GetBytes(output); stream.Write(buffer, 0, buffer.Length); } return(doc); }
private Boolean _CreateTimelineDocument(string root) { /*sunt nevoie si de situatii*/ DocumentOutput infoDoc = this._GetInformation(root); /*cream documentul cu situatiile*/ DocumentOutput sitDoc = this._GetSituations(root); /*daca a reusit apelarea modulelor*/ return(true); }
public String GetDocument(int id, string type) { DocumentOutput res = null; try { res = this.EntityManager.GetDocumentOutput(id, type); if (res == null) { throw new EntityManagerException(); } } catch (EntityManagerException) { throw new DocumentException("Documentul nu a fost gasit"); } return(this._moduleManager.ValidateDocumentOutput(res)); }
private Boolean _CreateSummaryDocument(string root) { try { /*extragem infomatiile*/ DocumentOutput infoDoc = this._GetInformation(root); /*extragem resultatul*/ DocumentOutput summaryDoc = this._GetSummary(root); /*le combinam*/ DocumentOutput summary = this._CombineSummary(infoDoc, summaryDoc); /*daca a reusit apelarea modulelor*/ return(true); } catch { return(false); } }
private DocumentOutput _GetSituations(string root) { DocumentOutput doc = null; /*verificam daca este deja documentul in bd*/ try { doc = this._em.GetDocumentOutput(this._doc.DocumentId, "situations"); /*ne asiguram ca este scris*/ using (Stream stream = File.OpenWrite(root + "\\sit.xml")) { byte[] buffer = Encoding.UTF8.GetBytes(doc.Document); stream.Write(buffer, 0, buffer.Length); } } catch { } if (doc != null) { return(doc); } /*pornim procesul*/ Process process = Process.Start(new ProcessStartInfo("java", " -jar AISituations.jar -f \"" + root + "\\info.xml\" " + " -t \"" + root + "\\main.txt\" " + " -o \"" + root + "\\sit.xml\"" ) { WorkingDirectory = ROOT + "\\sit" }); process.WaitForExit(); try { /*citim fisierul si il punem in bd*/ using (Stream stream = File.OpenRead(root + "\\sit.xml")) { StreamReader sr = new StreamReader(stream); /*adaugam documentul in baza de date*/ doc = this._em.AddDocumentOutput(this._doc.DocumentId, "situations", DocumentManager.StatusOK, sr.ReadToEnd()); } } catch { }; return(doc); }
private DocumentOutput _GetInformation(string root) { DocumentOutput doc = null; /*verificam daca este deja documentul in bd*/ try { doc = this._em.GetDocumentOutput(this._doc.DocumentId, "information"); /*ne asiguram ca este scris*/ using (Stream stream = File.OpenWrite(root + "\\info.xml")) { byte[] buffer = Encoding.UTF8.GetBytes(doc.Document); stream.Write(buffer, 0, buffer.Length); } } catch { } if (doc != null) { return(doc); } /*pornim procesul*/ String args = " -Xms1500m -Xmx1500m -jar InformationExtraction.jar input=\"" + root + "\\main.txt\" " + "output=\"" + root + "\\info.xml\" " + " find genre characters locations dates relationships actions"; Process infoProcess = Process.Start(new ProcessStartInfo("java", args) { WorkingDirectory = ROOT + "\\info" }); infoProcess.WaitForExit(); /*citim fisierul si il punem in bd*/ using (Stream stream = File.OpenRead(root + "\\info.xml")) { StreamReader sr = new StreamReader(stream); /*adaugam documentul in baza de date*/ doc = this._em.AddDocumentOutput(this._doc.DocumentId, "information", DocumentManager.StatusOK, sr.ReadToEnd()); } return(doc); }
public ModuleWorker(IDocumentEntityManager em, DocumentOutput doc) { this._em = em; this._doc = doc; }
private DocumentOutput _CombineSummary(DocumentOutput info, DocumentOutput summ) { XmlDocument root = new XmlDocument(); XmlDocument infoDoc = new XmlDocument(); infoDoc.LoadXml(info.Document); var summary = root.CreateElement("summary"); root.AppendChild(summary); /*numele romanului*/ var name = root.CreateElement("name"); summary.AppendChild(name); var nameText = root.CreateTextNode("Document title"); name.AppendChild(nameText); var extraction = root.CreateElement("extraction"); summary.AppendChild(extraction); /*tipul romanului*/ var type = root.CreateElement("type"); extraction.AppendChild(type); foreach (XmlElement genre in infoDoc.GetElementsByTagName("genre")) { /*luam doar primul*/ type.AppendChild(root.CreateTextNode(genre.GetAttribute("name"))); break; } /*personajele*/ var characters = root.CreateElement("characters"); extraction.AppendChild(characters); foreach (XmlElement ch in infoDoc.GetElementsByTagName("character")) { String cname = Regex.Replace(ch.InnerText, @"[^A-Za-z0-9 ]+", "").Trim(); var che = root.CreateElement("character"); characters.AppendChild(che); che.SetAttribute("name", cname); if (ch.HasAttribute("main")) { che.SetAttribute("type", "main"); } if (ch.HasAttribute("secondary")) { che.SetAttribute("type", "secondary"); } } /*locatiile*/ var locations = root.CreateElement("locations"); extraction.AppendChild(locations); foreach (XmlElement loc in infoDoc.GetElementsByTagName("location")) { String cloc = Regex.Replace(loc.InnerText, @"[^A-Za-z0-9 ]+", ""); var loce = root.CreateElement("location"); locations.AppendChild(loce); loce.SetAttribute("name", cloc); } /*relatiile*/ var relations = root.CreateElement("relations"); extraction.AppendChild(relations); foreach (XmlElement rel in infoDoc.GetElementsByTagName("relationship")) { String ent1 = Regex.Replace(rel.GetAttribute("entity1"), @"[^A-Za-z0-9 ]+", "").Trim(); String ent2 = Regex.Replace(rel.GetAttribute("entity2"), @"[^A-Za-z0-9 ]+", "").Trim(); String link = Regex.Replace(rel.GetAttribute("link"), @"[^A-Za-z0-9 ]+", ""); var rele = root.CreateElement("relation"); relations.AppendChild(rele); rele.SetAttribute("character1", ent1); rele.SetAttribute("character2", ent2); rele.SetAttribute("verb", link); } /*rezumatul*/ var summarisation = root.CreateElement("summarisation"); summary.AppendChild(summarisation); var summary2 = root.CreateElement("summary"); summary.AppendChild(summary2); summary2.InnerText = summ.Document; MemoryStream ms = new MemoryStream(); root.Save(ms); return(this._em.AddDocumentOutput(this._doc.DocumentId, "summary", DocumentManager.StatusOK, Encoding.UTF8.GetString(ms.ToArray()))); }