Beispiel #1
0
        public override string ToString()
        {
            var w = new StringWriter();

            "(display {0} {1})".Eval(this, w);
            return(w.GetBuffer());
        }
Beispiel #2
0
        public StringBuilder DumpTreeRecursively()
        {
            StringWriter @out = new StringWriter();

            DumpTreeRecursively(new PrintWriter(@out, true), new StringBuilder(), Org.Apache.Hadoop.Hdfs.Server.Namenode.Snapshot.Snapshot
                                .CurrentStateId);
            return(@out.GetBuffer());
        }
        public virtual string PrintResults(IList <string> goldStandard, IList <string> extractorOutput)
        {
            StringWriter sw = new StringWriter();
            PrintWriter  pw = new PrintWriter(sw, true);

            PrintResultsUsingLabels(pw, goldStandard, extractorOutput);
            return(sw.GetBuffer().ToString());
        }
        /// <summary>
        /// Given a set of sentences with annotations from an information extractor class, and the same sentences
        /// with gold-standard annotations, print results on how the information extraction performed.
        /// </summary>
        public virtual string PrintResults(ICoreMap goldStandard, ICoreMap extractorOutput)
        {
            StringWriter     sw          = new StringWriter();
            PrintWriter      pw          = new PrintWriter(sw, true);
            IList <ICoreMap> mutableGold = new List <ICoreMap>();

            Sharpen.Collections.AddAll(mutableGold, goldStandard.Get(typeof(CoreAnnotations.SentencesAnnotation)));
            IList <ICoreMap> mutableOutput = new List <ICoreMap>();

            Sharpen.Collections.AddAll(mutableOutput, extractorOutput.Get(typeof(CoreAnnotations.SentencesAnnotation)));
            PrintResults(pw, mutableGold, mutableOutput);
            return(sw.GetBuffer().ToString());
        }
 /// <exception cref="System.IO.IOException"/>
 public virtual void Annotate(ICoreMap document)
 {
     try
     {
         //--Create Input File
         //(create file)
         File inputFile = File.CreateTempFile("heideltime", ".input");
         //(write to file)
         PrintWriter inputWriter = new PrintWriter(inputFile);
         PrepareHeidelTimeInput(inputWriter, document);
         inputWriter.Close();
         Optional <string> pubDate = GetPubDate(document);
         //--Build Command
         IList <string> args = new List <string>(Arrays.AsList("java", "-jar", this.heideltimePath.GetPath() + "/heideltime.jar", "-c", this.heideltimePath.GetPath() + "/config.props", "-l", this.language, "-t", "NEWS"));
         if (pubDate.IsPresent())
         {
             args.Add("-dct");
             args.Add(pubDate.Get());
         }
         args.Add(inputFile.GetPath());
         // run HeidelTime on the input file
         ProcessBuilder process      = new ProcessBuilder(args);
         StringWriter   outputWriter = new StringWriter();
         SystemUtils.Run(process, outputWriter, null);
         string           output    = outputWriter.GetBuffer().ToString();
         IList <ICoreMap> timexAnns = outputReader.Process(document, output);
         document.Set(typeof(TimeAnnotations.TimexAnnotations), timexAnns);
         if (outputResults)
         {
             System.Console.Out.WriteLine(timexAnns);
         }
     }
     catch (Exception e)
     {
         Sharpen.Runtime.PrintStackTrace(e, System.Console.Error);
         System.Console.Error.WriteLine("error running HeidelTime on this doc: " + document.Get(typeof(CoreAnnotations.DocIDAnnotation)));
     }
 }
Beispiel #6
0
        /// <exception cref="System.IO.IOException"/>
        public virtual void Annotate(ICoreMap document)
        {
            //--Create Input File
            //(create file)
            File inputFile = File.CreateTempFile("heideltime", ".input");
            //(write to file)
            PrintWriter inputWriter = new PrintWriter(inputFile);

            inputWriter.Println(document.Get(typeof(CoreAnnotations.TextAnnotation)));
            inputWriter.Close();
            //--Get Date
            //(error checks)
            if (!document.ContainsKey(typeof(CoreAnnotations.CalendarAnnotation)) && !document.ContainsKey(typeof(CoreAnnotations.DocDateAnnotation)))
            {
                throw new ArgumentException("CoreMap must have either a Calendar or DocDate annotation");
            }
            //not strictly necessary, technically...
            //(variables)
            Calendar dateCalendar = document.Get(typeof(CoreAnnotations.CalendarAnnotation));
            string   pubDate      = null;

            if (dateCalendar != null)
            {
                //(case: calendar annotation)
                pubDate = string.Format("%TF", dateCalendar);
            }
            else
            {
                //(case: docdateannotation)
                string s = document.Get(typeof(CoreAnnotations.DocDateAnnotation));
                if (s != null)
                {
                    pubDate = s;
                }
            }
            //--Build Command
            List <string> args = new List <string>();

            args.Add("java");
            args.Add("-jar");
            args.Add(this.heideltimePath.GetPath() + "/heideltime.jar");
            args.Add("-c");
            args.Add(this.heideltimePath.GetPath() + "/config.props");
            args.Add("-l");
            args.Add(this.language);
            args.Add("-t");
            args.Add("NEWS");
            if (pubDate != null)
            {
                args.Add("-dct");
                args.Add(pubDate);
            }
            args.Add(inputFile.GetPath());
            // run HeidelTime on the input file
            ProcessBuilder process      = new ProcessBuilder(args);
            StringWriter   outputWriter = new StringWriter();

            SystemUtils.Run(process, outputWriter, null);
            string  output   = outputWriter.GetBuffer().ToString();
            Pattern docClose = Pattern.Compile("</DOC>.*", Pattern.Dotall);

            output = docClose.Matcher(output).ReplaceAll("</DOC>").ReplaceAll("<!DOCTYPE TimeML SYSTEM \"TimeML.dtd\">", string.Empty);
            //TODO TimeML.dtd? FileNotFoundException if we leave it in
            Pattern badNestedTimex = Pattern.Compile(Pattern.Quote("<T</TIMEX3>IMEX3"));

            output = badNestedTimex.Matcher(output).ReplaceAll("</TIMEX3><TIMEX3");
            Pattern badNestedTimex2 = Pattern.Compile(Pattern.Quote("<TI</TIMEX3>MEX3"));

            output = badNestedTimex2.Matcher(output).ReplaceAll("</TIMEX3><TIMEX3");
            //output = output.replaceAll("\\n\\n<TimeML>\\n\\n","<TimeML>");
            output = output.ReplaceAll("<TimeML>", string.Empty);
            // parse the HeidelTime output
            IElement outputXML;

            try
            {
                outputXML = XMLUtils.ParseElement(output);
            }
            catch (Exception ex)
            {
                throw new Exception(string.Format("error:\n%s\ninput:\n%s\noutput:\n%s", ex, IOUtils.SlurpFile(inputFile), output), ex);
            }
            inputFile.Delete();
            // get Timex annotations
            IList <ICoreMap> timexAnns = ToTimexCoreMaps(outputXML, document);

            document.Set(typeof(TimeAnnotations.TimexAnnotations), timexAnns);
            if (outputResults)
            {
                System.Console.Out.WriteLine(timexAnns);
            }
            // align Timex annotations to sentences
            int timexIndex = 0;

            foreach (ICoreMap sentence in document.Get(typeof(CoreAnnotations.SentencesAnnotation)))
            {
                int sentBegin = BeginOffset(sentence);
                int sentEnd   = EndOffset(sentence);
                // skip times before the sentence
                while (timexIndex < timexAnns.Count && BeginOffset(timexAnns[timexIndex]) < sentBegin)
                {
                    ++timexIndex;
                }
                // determine times within the sentence
                int sublistBegin = timexIndex;
                int sublistEnd   = timexIndex;
                while (timexIndex < timexAnns.Count && sentBegin <= BeginOffset(timexAnns[timexIndex]) && EndOffset(timexAnns[timexIndex]) <= sentEnd)
                {
                    ++sublistEnd;
                    ++timexIndex;
                }
                // set the sentence timexes
                sentence.Set(typeof(TimeAnnotations.TimexAnnotations), timexAnns.SubList(sublistBegin, sublistEnd));
            }
        }
        /// <exception cref="System.IO.IOException"/>
        public virtual void Annotate(ICoreMap document)
        {
            // write input file in GUTime format
            IElement inputXML  = ToInputXML(document);
            File     inputFile = File.CreateTempFile("gutime", ".input");
            //Document doc = new Document(inputXML);
            PrintWriter inputWriter = new PrintWriter(inputFile);

            inputWriter.Println(XMLUtils.NodeToString(inputXML, false));
            // new XMLOutputter().output(inputXML, inputWriter);
            inputWriter.Close();
            bool          useFirstDate = (!document.ContainsKey(typeof(CoreAnnotations.CalendarAnnotation)) && !document.ContainsKey(typeof(CoreAnnotations.DocDateAnnotation)));
            List <string> args         = new List <string>();

            args.Add("perl");
            args.Add("-I" + this.gutimePath.GetPath());
            args.Add(new File(this.gutimePath, "TimeTag.pl").GetPath());
            if (useFirstDate)
            {
                args.Add("-FDNW");
            }
            args.Add(inputFile.GetPath());
            // run GUTime on the input file
            ProcessBuilder process      = new ProcessBuilder(args);
            StringWriter   outputWriter = new StringWriter();

            SystemUtils.Run(process, outputWriter, null);
            string  output   = outputWriter.GetBuffer().ToString();
            Pattern docClose = Pattern.Compile("</DOC>.*", Pattern.Dotall);

            output = docClose.Matcher(output).ReplaceAll("</DOC>");
            //The TimeTag.pl result file contains next tags which must be removed
            output = output.ReplaceAll("<lex.*?>", string.Empty);
            output = output.Replace("</lex>", string.Empty);
            output = output.Replace("<NG>", string.Empty);
            output = output.Replace("</NG>", string.Empty);
            output = output.Replace("<VG>", string.Empty);
            output = output.Replace("</VG>", string.Empty);
            output = output.Replace("<s>", string.Empty);
            output = output.Replace("</s>", string.Empty);
            // parse the GUTime output
            IElement outputXML;

            try
            {
                outputXML = XMLUtils.ParseElement(output);
            }
            catch (Exception ex)
            {
                throw new Exception(string.Format("error:\n%s\ninput:\n%s\noutput:\n%s", ex, IOUtils.SlurpFile(inputFile), output), ex);
            }

            /*
             * try {
             * outputXML = new SAXBuilder().build(new StringReader(output)).getRootElement();
             * } catch (JDOMException e) {
             * throw new RuntimeException(String.format("error:\n%s\ninput:\n%s\noutput:\n%s",
             * e, IOUtils.slurpFile(inputFile), output));
             * } */
            inputFile.Delete();
            // get Timex annotations
            IList <ICoreMap> timexAnns = ToTimexCoreMaps(outputXML, document);

            document.Set(typeof(TimeAnnotations.TimexAnnotations), timexAnns);
            if (outputResults)
            {
                System.Console.Out.WriteLine(timexAnns);
            }
            // align Timex annotations to sentences
            int timexIndex = 0;

            foreach (ICoreMap sentence in document.Get(typeof(CoreAnnotations.SentencesAnnotation)))
            {
                int sentBegin = BeginOffset(sentence);
                int sentEnd   = EndOffset(sentence);
                // skip times before the sentence
                while (timexIndex < timexAnns.Count && BeginOffset(timexAnns[timexIndex]) < sentBegin)
                {
                    ++timexIndex;
                }
                // determine times within the sentence
                int sublistBegin = timexIndex;
                int sublistEnd   = timexIndex;
                while (timexIndex < timexAnns.Count && sentBegin <= BeginOffset(timexAnns[timexIndex]) && EndOffset(timexAnns[timexIndex]) <= sentEnd)
                {
                    ++sublistEnd;
                    ++timexIndex;
                }
                // set the sentence timexes
                sentence.Set(typeof(TimeAnnotations.TimexAnnotations), timexAnns.SubList(sublistBegin, sublistEnd));
            }
        }
Beispiel #8
0
 public override string ToString()
 {
   var w = new StringWriter();
   "(display {0} {1})".Eval(this, w);
   return w.GetBuffer();
 }