Exemple #1
0
        public QuoteAttributionAnnotator(Properties props)
        {
            // settings
            // these paths go in the props file
            // fields
            Verbose = PropertiesUtils.GetBool(props, "verbose", false);
            Timing timer = null;

            CorefPath = props.GetProperty("booknlpCoref", null);
            if (CorefPath == null && Verbose)
            {
                log.Err("Warning: no coreference map!");
            }
            ModelPath      = props.GetProperty("modelPath", DefaultModelPath);
            CharactersFile = props.GetProperty("charactersPath", null);
            if (CharactersFile == null && Verbose)
            {
                log.Err("Warning: no characters file!");
            }
            qmSieveList = props.GetProperty("QMSieves", DefaultQmsieves);
            msSieveList = props.GetProperty("MSSieves", DefaultMssieves);
            if (Verbose)
            {
                timer = new Timing();
                log.Info("Loading QuoteAttribution coref [" + CorefPath + "]...");
                log.Info("Loading QuoteAttribution characters [" + CharactersFile + "]...");
            }
            // loading all our word lists
            FamilyWordList  = props.GetProperty("familyWordsFile", FamilyWordList);
            AnimacyWordList = props.GetProperty("animacyWordsFile", AnimacyWordList);
            GenderWordList  = props.GetProperty("genderNamesFile", GenderWordList);
            familyRelations = QuoteAttributionUtils.ReadFamilyRelations(FamilyWordList);
            genderMap       = QuoteAttributionUtils.ReadGenderedNounList(GenderWordList);
            animacyList     = QuoteAttributionUtils.ReadAnimacyList(AnimacyWordList);
            if (characterMap != null)
            {
                characterMap = QuoteAttributionUtils.ReadPersonMap(CharactersFile);
            }
            else
            {
                buildCharacterMapPerAnnotation = true;
            }
            // use Stanford CoreNLP coref to map mentions to canonical mentions
            useCoref = PropertiesUtils.GetBool(props, "useCoref", useCoref);
            if (Verbose)
            {
                timer.Stop("done.");
            }
        }
Exemple #2
0
 private void Extract()
 {
     log.Info("content type: " + editorPane.GetContentType());
     if (!editorPane.GetContentType().Equals("text/html"))
     {
         DefaultStyledDocument doc = (DefaultStyledDocument)editorPane.GetDocument();
         string text = null;
         try
         {
             text = doc.GetText(0, doc.GetLength());
         }
         catch (Exception e)
         {
             log.Err(e);
         }
         string labeledText = classifier.ClassifyWithInlineXML(text);
         taggedContents = labeledText;
         ICollection <string> tags = classifier.Labels();
         string        background  = classifier.BackgroundSymbol();
         StringBuilder tagPattern  = new StringBuilder();
         foreach (string tag in tags)
         {
             if (background.Equals(tag))
             {
                 continue;
             }
             if (tagPattern.Length > 0)
             {
                 tagPattern.Append('|');
             }
             tagPattern.Append(tag);
         }
         Pattern startPattern = Pattern.Compile("<(" + tagPattern + ")>");
         Pattern endPattern   = Pattern.Compile("</(" + tagPattern + ")>");
         string  finalText    = labeledText;
         Matcher m            = startPattern.Matcher(finalText);
         while (m.Find())
         {
             int start = m.Start();
             finalText = m.ReplaceFirst(string.Empty);
             m         = endPattern.Matcher(finalText);
             if (m.Find())
             {
                 int    end   = m.Start();
                 string tag_1 = m.Group(1);
                 finalText = m.ReplaceFirst(string.Empty);
                 IAttributeSet attSet = GetAttributeSet(tag_1);
                 try
                 {
                     string entity = Sharpen.Runtime.Substring(finalText, start, end);
                     doc.SetCharacterAttributes(start, entity.Length, attSet, false);
                 }
                 catch (Exception ex)
                 {
                     log.Err(ex);
                     System.Environment.Exit(-1);
                 }
                 log.Info(tag_1 + ": " + Sharpen.Runtime.Substring(finalText, start, end));
             }
             else
             {
                 log.Info("Couldn't find end pattern!");
             }
             m = startPattern.Matcher(finalText);
         }
         editorPane.Revalidate();
         editorPane.Repaint();
     }
     else
     {
         string untaggedContents = editorPane.GetText();
         if (untaggedContents == null)
         {
             untaggedContents = string.Empty;
         }
         taggedContents = classifier.ClassifyWithInlineXML(untaggedContents);
         ICollection <string> tags = classifier.Labels();
         string        background  = classifier.BackgroundSymbol();
         StringBuilder tagPattern  = new StringBuilder();
         foreach (string tag in tags)
         {
             if (background.Equals(tag))
             {
                 continue;
             }
             if (tagPattern.Length > 0)
             {
                 tagPattern.Append('|');
             }
             tagPattern.Append(tag);
         }
         Pattern startPattern = Pattern.Compile("<(" + tagPattern + ")>");
         Pattern endPattern   = Pattern.Compile("</(" + tagPattern + ")>");
         string  finalText    = taggedContents;
         Matcher m            = startPattern.Matcher(finalText);
         while (m.Find())
         {
             string tag_1 = m.Group(1);
             Color  col   = tagToColorMap[tag_1];
             if (col != null)
             {
                 string color  = ColorToHTML(col);
                 string newTag = "<span style=\"background-color: " + color + "; color: white\">";
                 finalText = m.ReplaceFirst(newTag);
                 int     start = m.Start() + newTag.Length;
                 Matcher m1    = endPattern.Matcher(finalText);
                 if (m1.Find(m.End()))
                 {
                     string entity = Sharpen.Runtime.Substring(finalText, start, m1.Start());
                     log.Info(tag_1 + ": " + entity);
                 }
                 else
                 {
                     log.Warn("Failed to find end for " + tag_1);
                 }
                 finalText = m1.ReplaceFirst("</span>");
                 m         = startPattern.Matcher(finalText);
             }
         }
         // System.out.println(finalText);
         editorPane.SetText(finalText);
         editorPane.Revalidate();
         editorPane.Repaint();
     }
     // log.info(finalText);
     saveTaggedAs.SetEnabled(true);
 }