/// <exception cref="System.IO.IOException"/>
        public virtual void Annotate(ICoreMap document)
        {
            // write input file in GUTime format
            IElement inputXML  = ToInputXML(document);
            File     inputFile = File.CreateTempFile("gutime", ".input");
            //Document doc = new Document(inputXML);
            PrintWriter inputWriter = new PrintWriter(inputFile);

            inputWriter.Println(XMLUtils.NodeToString(inputXML, false));
            // new XMLOutputter().output(inputXML, inputWriter);
            inputWriter.Close();
            bool          useFirstDate = (!document.ContainsKey(typeof(CoreAnnotations.CalendarAnnotation)) && !document.ContainsKey(typeof(CoreAnnotations.DocDateAnnotation)));
            List <string> args         = new List <string>();

            args.Add("perl");
            args.Add("-I" + this.gutimePath.GetPath());
            args.Add(new File(this.gutimePath, "TimeTag.pl").GetPath());
            if (useFirstDate)
            {
                args.Add("-FDNW");
            }
            args.Add(inputFile.GetPath());
            // run GUTime on the input file
            ProcessBuilder process      = new ProcessBuilder(args);
            StringWriter   outputWriter = new StringWriter();

            SystemUtils.Run(process, outputWriter, null);
            string  output   = outputWriter.GetBuffer().ToString();
            Pattern docClose = Pattern.Compile("</DOC>.*", Pattern.Dotall);

            output = docClose.Matcher(output).ReplaceAll("</DOC>");
            //The TimeTag.pl result file contains next tags which must be removed
            output = output.ReplaceAll("<lex.*?>", string.Empty);
            output = output.Replace("</lex>", string.Empty);
            output = output.Replace("<NG>", string.Empty);
            output = output.Replace("</NG>", string.Empty);
            output = output.Replace("<VG>", string.Empty);
            output = output.Replace("</VG>", string.Empty);
            output = output.Replace("<s>", string.Empty);
            output = output.Replace("</s>", string.Empty);
            // parse the GUTime output
            IElement outputXML;

            try
            {
                outputXML = XMLUtils.ParseElement(output);
            }
            catch (Exception ex)
            {
                throw new Exception(string.Format("error:\n%s\ninput:\n%s\noutput:\n%s", ex, IOUtils.SlurpFile(inputFile), output), ex);
            }

            /*
             * try {
             * outputXML = new SAXBuilder().build(new StringReader(output)).getRootElement();
             * } catch (JDOMException e) {
             * throw new RuntimeException(String.format("error:\n%s\ninput:\n%s\noutput:\n%s",
             * e, IOUtils.slurpFile(inputFile), output));
             * } */
            inputFile.Delete();
            // get Timex annotations
            IList <ICoreMap> timexAnns = ToTimexCoreMaps(outputXML, document);

            document.Set(typeof(TimeAnnotations.TimexAnnotations), timexAnns);
            if (outputResults)
            {
                System.Console.Out.WriteLine(timexAnns);
            }
            // align Timex annotations to sentences
            int timexIndex = 0;

            foreach (ICoreMap sentence in document.Get(typeof(CoreAnnotations.SentencesAnnotation)))
            {
                int sentBegin = BeginOffset(sentence);
                int sentEnd   = EndOffset(sentence);
                // skip times before the sentence
                while (timexIndex < timexAnns.Count && BeginOffset(timexAnns[timexIndex]) < sentBegin)
                {
                    ++timexIndex;
                }
                // determine times within the sentence
                int sublistBegin = timexIndex;
                int sublistEnd   = timexIndex;
                while (timexIndex < timexAnns.Count && sentBegin <= BeginOffset(timexAnns[timexIndex]) && EndOffset(timexAnns[timexIndex]) <= sentEnd)
                {
                    ++sublistEnd;
                    ++timexIndex;
                }
                // set the sentence timexes
                sentence.Set(typeof(TimeAnnotations.TimexAnnotations), timexAnns.SubList(sublistBegin, sublistEnd));
            }
        }
Exemple #2
0
 private void Init(IElement element)
 {
     Init(XMLUtils.NodeToString(element, false), element);
 }