/// <exception cref="System.IO.IOException"/>
        public virtual void Annotate(ICoreMap document)
        {
            //--Create Input File
            //(create file)
            File inputFile = File.CreateTempFile("heideltime", ".input");
            //(write to file)
            PrintWriter inputWriter = new PrintWriter(inputFile);

            inputWriter.Println(document.Get(typeof(CoreAnnotations.TextAnnotation)));
            inputWriter.Close();
            //--Get Date
            //(error checks)
            if (!document.ContainsKey(typeof(CoreAnnotations.CalendarAnnotation)) && !document.ContainsKey(typeof(CoreAnnotations.DocDateAnnotation)))
            {
                throw new ArgumentException("CoreMap must have either a Calendar or DocDate annotation");
            }
            //not strictly necessary, technically...
            //(variables)
            Calendar dateCalendar = document.Get(typeof(CoreAnnotations.CalendarAnnotation));
            string   pubDate      = null;

            if (dateCalendar != null)
            {
                //(case: calendar annotation)
                pubDate = string.Format("%TF", dateCalendar);
            }
            else
            {
                //(case: docdateannotation)
                string s = document.Get(typeof(CoreAnnotations.DocDateAnnotation));
                if (s != null)
                {
                    pubDate = s;
                }
            }
            //--Build Command
            List <string> args = new List <string>();

            args.Add("java");
            args.Add("-jar");
            args.Add(this.heideltimePath.GetPath() + "/heideltime.jar");
            args.Add("-c");
            args.Add(this.heideltimePath.GetPath() + "/config.props");
            args.Add("-l");
            args.Add(this.language);
            args.Add("-t");
            args.Add("NEWS");
            if (pubDate != null)
            {
                args.Add("-dct");
                args.Add(pubDate);
            }
            args.Add(inputFile.GetPath());
            // run HeidelTime on the input file
            ProcessBuilder process      = new ProcessBuilder(args);
            StringWriter   outputWriter = new StringWriter();

            SystemUtils.Run(process, outputWriter, null);
            string  output   = outputWriter.GetBuffer().ToString();
            Pattern docClose = Pattern.Compile("</DOC>.*", Pattern.Dotall);

            output = docClose.Matcher(output).ReplaceAll("</DOC>").ReplaceAll("<!DOCTYPE TimeML SYSTEM \"TimeML.dtd\">", string.Empty);
            //TODO TimeML.dtd? FileNotFoundException if we leave it in
            Pattern badNestedTimex = Pattern.Compile(Pattern.Quote("<T</TIMEX3>IMEX3"));

            output = badNestedTimex.Matcher(output).ReplaceAll("</TIMEX3><TIMEX3");
            Pattern badNestedTimex2 = Pattern.Compile(Pattern.Quote("<TI</TIMEX3>MEX3"));

            output = badNestedTimex2.Matcher(output).ReplaceAll("</TIMEX3><TIMEX3");
            //output = output.replaceAll("\\n\\n<TimeML>\\n\\n","<TimeML>");
            output = output.ReplaceAll("<TimeML>", string.Empty);
            // parse the HeidelTime output
            IElement outputXML;

            try
            {
                outputXML = XMLUtils.ParseElement(output);
            }
            catch (Exception ex)
            {
                throw new Exception(string.Format("error:\n%s\ninput:\n%s\noutput:\n%s", ex, IOUtils.SlurpFile(inputFile), output), ex);
            }
            inputFile.Delete();
            // get Timex annotations
            IList <ICoreMap> timexAnns = ToTimexCoreMaps(outputXML, document);

            document.Set(typeof(TimeAnnotations.TimexAnnotations), timexAnns);
            if (outputResults)
            {
                System.Console.Out.WriteLine(timexAnns);
            }
            // align Timex annotations to sentences
            int timexIndex = 0;

            foreach (ICoreMap sentence in document.Get(typeof(CoreAnnotations.SentencesAnnotation)))
            {
                int sentBegin = BeginOffset(sentence);
                int sentEnd   = EndOffset(sentence);
                // skip times before the sentence
                while (timexIndex < timexAnns.Count && BeginOffset(timexAnns[timexIndex]) < sentBegin)
                {
                    ++timexIndex;
                }
                // determine times within the sentence
                int sublistBegin = timexIndex;
                int sublistEnd   = timexIndex;
                while (timexIndex < timexAnns.Count && sentBegin <= BeginOffset(timexAnns[timexIndex]) && EndOffset(timexAnns[timexIndex]) <= sentEnd)
                {
                    ++sublistEnd;
                    ++timexIndex;
                }
                // set the sentence timexes
                sentence.Set(typeof(TimeAnnotations.TimexAnnotations), timexAnns.SubList(sublistBegin, sublistEnd));
            }
        }
Exemple #2
0
 private void Init(IElement element)
 {
     Init(XMLUtils.NodeToString(element, false), element);
 }