public override string ToString() { var w = new StringWriter(); "(display {0} {1})".Eval(this, w); return(w.GetBuffer()); }
public StringBuilder DumpTreeRecursively() { StringWriter @out = new StringWriter(); DumpTreeRecursively(new PrintWriter(@out, true), new StringBuilder(), Org.Apache.Hadoop.Hdfs.Server.Namenode.Snapshot.Snapshot .CurrentStateId); return(@out.GetBuffer()); }
public virtual string PrintResults(IList <string> goldStandard, IList <string> extractorOutput) { StringWriter sw = new StringWriter(); PrintWriter pw = new PrintWriter(sw, true); PrintResultsUsingLabels(pw, goldStandard, extractorOutput); return(sw.GetBuffer().ToString()); }
/// <summary> /// Given a set of sentences with annotations from an information extractor class, and the same sentences /// with gold-standard annotations, print results on how the information extraction performed. /// </summary> public virtual string PrintResults(ICoreMap goldStandard, ICoreMap extractorOutput) { StringWriter sw = new StringWriter(); PrintWriter pw = new PrintWriter(sw, true); IList <ICoreMap> mutableGold = new List <ICoreMap>(); Sharpen.Collections.AddAll(mutableGold, goldStandard.Get(typeof(CoreAnnotations.SentencesAnnotation))); IList <ICoreMap> mutableOutput = new List <ICoreMap>(); Sharpen.Collections.AddAll(mutableOutput, extractorOutput.Get(typeof(CoreAnnotations.SentencesAnnotation))); PrintResults(pw, mutableGold, mutableOutput); return(sw.GetBuffer().ToString()); }
/// <exception cref="System.IO.IOException"/> public virtual void Annotate(ICoreMap document) { try { //--Create Input File //(create file) File inputFile = File.CreateTempFile("heideltime", ".input"); //(write to file) PrintWriter inputWriter = new PrintWriter(inputFile); PrepareHeidelTimeInput(inputWriter, document); inputWriter.Close(); Optional <string> pubDate = GetPubDate(document); //--Build Command IList <string> args = new List <string>(Arrays.AsList("java", "-jar", this.heideltimePath.GetPath() + "/heideltime.jar", "-c", this.heideltimePath.GetPath() + "/config.props", "-l", this.language, "-t", "NEWS")); if (pubDate.IsPresent()) { args.Add("-dct"); args.Add(pubDate.Get()); } args.Add(inputFile.GetPath()); // run HeidelTime on the input file ProcessBuilder process = new ProcessBuilder(args); StringWriter outputWriter = new StringWriter(); SystemUtils.Run(process, outputWriter, null); string output = outputWriter.GetBuffer().ToString(); IList <ICoreMap> timexAnns = outputReader.Process(document, output); document.Set(typeof(TimeAnnotations.TimexAnnotations), timexAnns); if (outputResults) { System.Console.Out.WriteLine(timexAnns); } } catch (Exception e) { Sharpen.Runtime.PrintStackTrace(e, System.Console.Error); System.Console.Error.WriteLine("error running HeidelTime on this doc: " + document.Get(typeof(CoreAnnotations.DocIDAnnotation))); } }
/// <exception cref="System.IO.IOException"/> public virtual void Annotate(ICoreMap document) { //--Create Input File //(create file) File inputFile = File.CreateTempFile("heideltime", ".input"); //(write to file) PrintWriter inputWriter = new PrintWriter(inputFile); inputWriter.Println(document.Get(typeof(CoreAnnotations.TextAnnotation))); inputWriter.Close(); //--Get Date //(error checks) if (!document.ContainsKey(typeof(CoreAnnotations.CalendarAnnotation)) && !document.ContainsKey(typeof(CoreAnnotations.DocDateAnnotation))) { throw new ArgumentException("CoreMap must have either a Calendar or DocDate annotation"); } //not strictly necessary, technically... //(variables) Calendar dateCalendar = document.Get(typeof(CoreAnnotations.CalendarAnnotation)); string pubDate = null; if (dateCalendar != null) { //(case: calendar annotation) pubDate = string.Format("%TF", dateCalendar); } else { //(case: docdateannotation) string s = document.Get(typeof(CoreAnnotations.DocDateAnnotation)); if (s != null) { pubDate = s; } } //--Build Command List <string> args = new List <string>(); args.Add("java"); args.Add("-jar"); args.Add(this.heideltimePath.GetPath() + "/heideltime.jar"); args.Add("-c"); args.Add(this.heideltimePath.GetPath() + "/config.props"); args.Add("-l"); args.Add(this.language); args.Add("-t"); args.Add("NEWS"); if (pubDate != null) { args.Add("-dct"); args.Add(pubDate); } args.Add(inputFile.GetPath()); // run HeidelTime on the input file ProcessBuilder process = new ProcessBuilder(args); StringWriter outputWriter = new StringWriter(); SystemUtils.Run(process, outputWriter, null); string output = outputWriter.GetBuffer().ToString(); Pattern docClose = Pattern.Compile("</DOC>.*", Pattern.Dotall); output = docClose.Matcher(output).ReplaceAll("</DOC>").ReplaceAll("<!DOCTYPE TimeML SYSTEM \"TimeML.dtd\">", string.Empty); //TODO TimeML.dtd? FileNotFoundException if we leave it in Pattern badNestedTimex = Pattern.Compile(Pattern.Quote("<T</TIMEX3>IMEX3")); output = badNestedTimex.Matcher(output).ReplaceAll("</TIMEX3><TIMEX3"); Pattern badNestedTimex2 = Pattern.Compile(Pattern.Quote("<TI</TIMEX3>MEX3")); output = badNestedTimex2.Matcher(output).ReplaceAll("</TIMEX3><TIMEX3"); //output = output.replaceAll("\\n\\n<TimeML>\\n\\n","<TimeML>"); output = output.ReplaceAll("<TimeML>", string.Empty); // parse the HeidelTime output IElement outputXML; try { outputXML = XMLUtils.ParseElement(output); } catch (Exception ex) { throw new Exception(string.Format("error:\n%s\ninput:\n%s\noutput:\n%s", ex, IOUtils.SlurpFile(inputFile), output), ex); } inputFile.Delete(); // get Timex annotations IList <ICoreMap> timexAnns = ToTimexCoreMaps(outputXML, document); document.Set(typeof(TimeAnnotations.TimexAnnotations), timexAnns); if (outputResults) { System.Console.Out.WriteLine(timexAnns); } // align Timex annotations to sentences int timexIndex = 0; foreach (ICoreMap sentence in document.Get(typeof(CoreAnnotations.SentencesAnnotation))) { int sentBegin = BeginOffset(sentence); int sentEnd = EndOffset(sentence); // skip times before the sentence while (timexIndex < timexAnns.Count && BeginOffset(timexAnns[timexIndex]) < sentBegin) { ++timexIndex; } // determine times within the sentence int sublistBegin = timexIndex; int sublistEnd = timexIndex; while (timexIndex < timexAnns.Count && sentBegin <= BeginOffset(timexAnns[timexIndex]) && EndOffset(timexAnns[timexIndex]) <= sentEnd) { ++sublistEnd; ++timexIndex; } // set the sentence timexes sentence.Set(typeof(TimeAnnotations.TimexAnnotations), timexAnns.SubList(sublistBegin, sublistEnd)); } }
/// <exception cref="System.IO.IOException"/> public virtual void Annotate(ICoreMap document) { // write input file in GUTime format IElement inputXML = ToInputXML(document); File inputFile = File.CreateTempFile("gutime", ".input"); //Document doc = new Document(inputXML); PrintWriter inputWriter = new PrintWriter(inputFile); inputWriter.Println(XMLUtils.NodeToString(inputXML, false)); // new XMLOutputter().output(inputXML, inputWriter); inputWriter.Close(); bool useFirstDate = (!document.ContainsKey(typeof(CoreAnnotations.CalendarAnnotation)) && !document.ContainsKey(typeof(CoreAnnotations.DocDateAnnotation))); List <string> args = new List <string>(); args.Add("perl"); args.Add("-I" + this.gutimePath.GetPath()); args.Add(new File(this.gutimePath, "TimeTag.pl").GetPath()); if (useFirstDate) { args.Add("-FDNW"); } args.Add(inputFile.GetPath()); // run GUTime on the input file ProcessBuilder process = new ProcessBuilder(args); StringWriter outputWriter = new StringWriter(); SystemUtils.Run(process, outputWriter, null); string output = outputWriter.GetBuffer().ToString(); Pattern docClose = Pattern.Compile("</DOC>.*", Pattern.Dotall); output = docClose.Matcher(output).ReplaceAll("</DOC>"); //The TimeTag.pl result file contains next tags which must be removed output = output.ReplaceAll("<lex.*?>", string.Empty); output = output.Replace("</lex>", string.Empty); output = output.Replace("<NG>", string.Empty); output = output.Replace("</NG>", string.Empty); output = output.Replace("<VG>", string.Empty); output = output.Replace("</VG>", string.Empty); output = output.Replace("<s>", string.Empty); output = output.Replace("</s>", string.Empty); // parse the GUTime output IElement outputXML; try { outputXML = XMLUtils.ParseElement(output); } catch (Exception ex) { throw new Exception(string.Format("error:\n%s\ninput:\n%s\noutput:\n%s", ex, IOUtils.SlurpFile(inputFile), output), ex); } /* * try { * outputXML = new SAXBuilder().build(new StringReader(output)).getRootElement(); * } catch (JDOMException e) { * throw new RuntimeException(String.format("error:\n%s\ninput:\n%s\noutput:\n%s", * e, IOUtils.slurpFile(inputFile), output)); * } */ inputFile.Delete(); // get Timex annotations IList <ICoreMap> timexAnns = ToTimexCoreMaps(outputXML, document); document.Set(typeof(TimeAnnotations.TimexAnnotations), timexAnns); if (outputResults) { System.Console.Out.WriteLine(timexAnns); } // align Timex annotations to sentences int timexIndex = 0; foreach (ICoreMap sentence in document.Get(typeof(CoreAnnotations.SentencesAnnotation))) { int sentBegin = BeginOffset(sentence); int sentEnd = EndOffset(sentence); // skip times before the sentence while (timexIndex < timexAnns.Count && BeginOffset(timexAnns[timexIndex]) < sentBegin) { ++timexIndex; } // determine times within the sentence int sublistBegin = timexIndex; int sublistEnd = timexIndex; while (timexIndex < timexAnns.Count && sentBegin <= BeginOffset(timexAnns[timexIndex]) && EndOffset(timexAnns[timexIndex]) <= sentEnd) { ++sublistEnd; ++timexIndex; } // set the sentence timexes sentence.Set(typeof(TimeAnnotations.TimexAnnotations), timexAnns.SubList(sublistBegin, sublistEnd)); } }
public override string ToString() { var w = new StringWriter(); "(display {0} {1})".Eval(this, w); return w.GetBuffer(); }