Esempio n. 1
0
 private void Extract()
 {
     log.Info("content type: " + editorPane.GetContentType());
     if (!editorPane.GetContentType().Equals("text/html"))
     {
         DefaultStyledDocument doc = (DefaultStyledDocument)editorPane.GetDocument();
         string text = null;
         try
         {
             text = doc.GetText(0, doc.GetLength());
         }
         catch (Exception e)
         {
             throw new Exception(e);
         }
         string labeledText = classifier.ClassifyWithInlineXML(text);
         taggedContents   = labeledText;
         untaggedContents = text;
         ICollection <string> tags = classifier.Labels();
         string background         = classifier.BackgroundSymbol();
         string tagPattern         = string.Empty;
         foreach (string tag in tags)
         {
             if (background.Equals(tag))
             {
                 continue;
             }
             if (tagPattern.Length > 0)
             {
                 tagPattern += "|";
             }
             tagPattern += tag;
         }
         Pattern startPattern = Pattern.Compile("<(" + tagPattern + ")>");
         Pattern endPattern   = Pattern.Compile("</(" + tagPattern + ")>");
         string  finalText    = labeledText;
         Matcher m            = startPattern.Matcher(finalText);
         while (m.Find())
         {
             int start = m.Start();
             finalText = m.ReplaceFirst(string.Empty);
             m         = endPattern.Matcher(finalText);
             if (m.Find())
             {
                 int    end   = m.Start();
                 string tag_1 = m.Group(1);
                 finalText = m.ReplaceFirst(string.Empty);
                 IAttributeSet attSet = GetAttributeSet(tag_1);
                 try
                 {
                     string entity = Sharpen.Runtime.Substring(finalText, start, end);
                     doc.SetCharacterAttributes(start, entity.Length, attSet, false);
                 }
                 catch (Exception ex)
                 {
                     throw new Exception(ex);
                 }
                 log.Info(tag_1 + ": " + Sharpen.Runtime.Substring(finalText, start, end));
             }
             // print error message
             m = startPattern.Matcher(finalText);
         }
         editorPane.Revalidate();
         editorPane.Repaint();
     }
     else
     {
         untaggedContents = editorPane.GetText();
         taggedContents   = classifier.ClassifyWithInlineXML(untaggedContents);
         ICollection <string> tags = classifier.Labels();
         string background         = classifier.BackgroundSymbol();
         string tagPattern         = string.Empty;
         foreach (string tag in tags)
         {
             if (background.Equals(tag))
             {
                 continue;
             }
             if (tagPattern.Length > 0)
             {
                 tagPattern += "|";
             }
             tagPattern += tag;
         }
         Pattern startPattern = Pattern.Compile("<(" + tagPattern + ")>");
         Pattern endPattern   = Pattern.Compile("</(" + tagPattern + ")>");
         string  finalText    = taggedContents;
         Matcher m            = startPattern.Matcher(finalText);
         while (m.Find())
         {
             string tag_1  = m.Group(1);
             string color  = ColorToHTML(tagToColorMap[tag_1]);
             string newTag = "<span style=\"background-color: " + color + "; color: white\">";
             finalText = m.ReplaceFirst(newTag);
             int     start = m.Start() + newTag.Length;
             Matcher m1    = endPattern.Matcher(finalText);
             m1.Find(m.End());
             string entity = Sharpen.Runtime.Substring(finalText, start, m1.Start());
             log.Info(tag_1 + ": " + entity);
             finalText = m1.ReplaceFirst("</span>");
             m         = startPattern.Matcher(finalText);
         }
         System.Console.Out.WriteLine(finalText);
         editorPane.SetText(finalText);
         editorPane.Revalidate();
         editorPane.Repaint();
         log.Info(finalText);
     }
     saveTaggedAs.SetEnabled(true);
 }
Esempio n. 2
0
 private void Extract()
 {
     log.Info("content type: " + editorPane.GetContentType());
     if (!editorPane.GetContentType().Equals("text/html"))
     {
         DefaultStyledDocument doc = (DefaultStyledDocument)editorPane.GetDocument();
         string text = null;
         try
         {
             text = doc.GetText(0, doc.GetLength());
         }
         catch (Exception e)
         {
             log.Err(e);
         }
         string labeledText = classifier.ClassifyWithInlineXML(text);
         taggedContents = labeledText;
         ICollection <string> tags = classifier.Labels();
         string        background  = classifier.BackgroundSymbol();
         StringBuilder tagPattern  = new StringBuilder();
         foreach (string tag in tags)
         {
             if (background.Equals(tag))
             {
                 continue;
             }
             if (tagPattern.Length > 0)
             {
                 tagPattern.Append('|');
             }
             tagPattern.Append(tag);
         }
         Pattern startPattern = Pattern.Compile("<(" + tagPattern + ")>");
         Pattern endPattern   = Pattern.Compile("</(" + tagPattern + ")>");
         string  finalText    = labeledText;
         Matcher m            = startPattern.Matcher(finalText);
         while (m.Find())
         {
             int start = m.Start();
             finalText = m.ReplaceFirst(string.Empty);
             m         = endPattern.Matcher(finalText);
             if (m.Find())
             {
                 int    end   = m.Start();
                 string tag_1 = m.Group(1);
                 finalText = m.ReplaceFirst(string.Empty);
                 IAttributeSet attSet = GetAttributeSet(tag_1);
                 try
                 {
                     string entity = Sharpen.Runtime.Substring(finalText, start, end);
                     doc.SetCharacterAttributes(start, entity.Length, attSet, false);
                 }
                 catch (Exception ex)
                 {
                     log.Err(ex);
                     System.Environment.Exit(-1);
                 }
                 log.Info(tag_1 + ": " + Sharpen.Runtime.Substring(finalText, start, end));
             }
             else
             {
                 log.Info("Couldn't find end pattern!");
             }
             m = startPattern.Matcher(finalText);
         }
         editorPane.Revalidate();
         editorPane.Repaint();
     }
     else
     {
         string untaggedContents = editorPane.GetText();
         if (untaggedContents == null)
         {
             untaggedContents = string.Empty;
         }
         taggedContents = classifier.ClassifyWithInlineXML(untaggedContents);
         ICollection <string> tags = classifier.Labels();
         string        background  = classifier.BackgroundSymbol();
         StringBuilder tagPattern  = new StringBuilder();
         foreach (string tag in tags)
         {
             if (background.Equals(tag))
             {
                 continue;
             }
             if (tagPattern.Length > 0)
             {
                 tagPattern.Append('|');
             }
             tagPattern.Append(tag);
         }
         Pattern startPattern = Pattern.Compile("<(" + tagPattern + ")>");
         Pattern endPattern   = Pattern.Compile("</(" + tagPattern + ")>");
         string  finalText    = taggedContents;
         Matcher m            = startPattern.Matcher(finalText);
         while (m.Find())
         {
             string tag_1 = m.Group(1);
             Color  col   = tagToColorMap[tag_1];
             if (col != null)
             {
                 string color  = ColorToHTML(col);
                 string newTag = "<span style=\"background-color: " + color + "; color: white\">";
                 finalText = m.ReplaceFirst(newTag);
                 int     start = m.Start() + newTag.Length;
                 Matcher m1    = endPattern.Matcher(finalText);
                 if (m1.Find(m.End()))
                 {
                     string entity = Sharpen.Runtime.Substring(finalText, start, m1.Start());
                     log.Info(tag_1 + ": " + entity);
                 }
                 else
                 {
                     log.Warn("Failed to find end for " + tag_1);
                 }
                 finalText = m1.ReplaceFirst("</span>");
                 m         = startPattern.Matcher(finalText);
             }
         }
         // System.out.println(finalText);
         editorPane.SetText(finalText);
         editorPane.Revalidate();
         editorPane.Repaint();
     }
     // log.info(finalText);
     saveTaggedAs.SetEnabled(true);
 }