示例#1
0
 public virtual void LoadDefaultClassifier(bool crf)
 {
     try
     {
         if (crf)
         {
             classifier = CRFClassifier.GetDefaultClassifier();
         }
         else
         {
             classifier = CMMClassifier.GetDefaultClassifier();
         }
     }
     catch (Exception e)
     {
         string message = "Error loading default " + (crf ? "CRF" : "CMM");
         string title   = (crf ? "CRF" : "CMM") + " Load Error";
         message += "\nMessage: " + e.Message;
         DisplayError(title, message);
         return;
     }
     RemoveTags();
     BuildTagPanel();
     BuildExtractButton();
 }
示例#2
0
 /// <exception cref="System.IO.IOException"/>
 public NERClassifierCombiner(bool applyNumericClassifiers, NERClassifierCombiner.Language nerLanguage, bool useSUTime, bool augmentRegexNER, Properties nscProps, params string[] loadPaths)
     : base(nscProps, ClassifierCombiner.ExtractCombinationModeSafe(nscProps), loadPaths)
 {
     // NOTE: nscProps may contains sutime props which will not be recognized by the SeqClassifierFlags
     this.applyNumericClassifiers = applyNumericClassifiers;
     this.nerLanguage             = nerLanguage;
     this.useSUTime = useSUTime;
     // check for which language to use for number sequence classifier
     if (nerLanguage == NERClassifierCombiner.Language.Chinese)
     {
         this.nsc = new ChineseNumberSequenceClassifier(new Properties(), useSUTime, nscProps);
     }
     else
     {
         this.nsc = new NumberSequenceClassifier(new Properties(), useSUTime, nscProps);
     }
     if (augmentRegexNER)
     {
         this.gazetteMapping = ReadRegexnerGazette(DefaultPaths.DefaultNerGazetteMapping);
     }
     else
     {
         this.gazetteMapping = Java.Util.Collections.EmptyMap();
     }
 }
示例#3
0
 /// <summary>Creates a new named entity recognizer server on the specified port.</summary>
 /// <param name="port">the port this NERServer listens on.</param>
 /// <param name="asc">The classifier which will do the tagging</param>
 /// <param name="charset">The character set for encoding Strings over the socket stream, e.g., "utf-8"</param>
 /// <exception cref="System.IO.IOException">If there is a problem creating a ServerSocket</exception>
 public NERServer(int port, AbstractSequenceClassifier asc, string charset)
 {
     //// Variables
     //// Constructors
     ner          = asc;
     listener     = new ServerSocket(port);
     this.charset = charset;
 }
        public NumberAnnotator(string name, Properties props)
        {
            string property = name + "." + BackgroundSymbolProperty;

            BackgroundSymbol = props.GetProperty(property, DefaultBackgroundSymbol);
            bool useSUTime = PropertiesUtils.GetBool(props, NumberSequenceClassifier.UseSutimeProperty, NumberSequenceClassifier.UseSutimeDefault);

            Verbose = false;
            nsc     = new NumberSequenceClassifier(useSUTime);
        }
        public ChineseSegmenterAnnotator(string name, Properties props)
        {
            string model = null;
            // Keep only the properties that apply to this annotator
            Properties modelProps = new Properties();
            string     desiredKey = name + '.';

            foreach (string key in props.StringPropertyNames())
            {
                if (key.StartsWith(desiredKey))
                {
                    // skip past name and the subsequent "."
                    string modelKey = Sharpen.Runtime.Substring(key, desiredKey.Length);
                    if (modelKey.Equals("model"))
                    {
                        model = props.GetProperty(key);
                    }
                    else
                    {
                        modelProps.SetProperty(modelKey, props.GetProperty(key));
                    }
                }
            }
            this.Verbose        = PropertiesUtils.GetBool(props, name + ".verbose", false);
            this.normalizeSpace = PropertiesUtils.GetBool(props, name + ".normalizeSpace", false);
            if (model == null)
            {
                throw new Exception("Expected a property " + name + ".model");
            }
            // don't write very much, because the CRFClassifier already reports loading
            if (Verbose)
            {
                log.Info("Loading Segmentation Model ... ");
            }
            try
            {
                segmenter = CRFClassifier.GetClassifier(model, modelProps);
            }
            catch (Exception e)
            {
                throw;
            }
            catch (Exception e)
            {
                throw new Exception(e);
            }
            // If newlines are treated as sentence split, we need to retain them in tokenization for ssplit to make use of them
            tokenizeNewline = (!props.GetProperty(StanfordCoreNLP.NewlineIsSentenceBreakProperty, "never").Equals("never")) || bool.ValueOf(props.GetProperty(StanfordCoreNLP.NewlineSplitterProperty, "false"));
            // record whether or not sentence splitting on two newlines ; if so, need to remove single newlines
            sentenceSplitOnTwoNewlines = props.GetProperty(StanfordCoreNLP.NewlineIsSentenceBreakProperty, "never").Equals("two");
        }
示例#6
0
 public NERClassifierCombiner(bool applyNumericClassifiers, bool useSUTime, bool augmentRegexNER, params AbstractSequenceClassifier <CoreLabel>[] classifiers)
     : base(classifiers)
 {
     this.applyNumericClassifiers = applyNumericClassifiers;
     this.nerLanguage             = NerLanguageDefault;
     this.useSUTime = useSUTime;
     this.nsc       = new NumberSequenceClassifier(useSUTime);
     if (augmentRegexNER)
     {
         this.gazetteMapping = ReadRegexnerGazette(DefaultPaths.DefaultNerGazetteMapping);
     }
     else
     {
         this.gazetteMapping = Java.Util.Collections.EmptyMap();
     }
 }
示例#7
0
 /// <exception cref="System.IO.IOException"/>
 public NERClassifierCombiner(Properties props)
     : base(props)
 {
     // todo [cdm 2015]: Could avoid constructing this if applyNumericClassifiers is false
     applyNumericClassifiers = PropertiesUtils.GetBool(props, ApplyNumericClassifiersProperty, ApplyNumericClassifiersDefault);
     nerLanguage             = NERClassifierCombiner.Language.FromString(PropertiesUtils.GetString(props, NerLanguageProperty, null), NerLanguageDefault);
     useSUTime = PropertiesUtils.GetBool(props, NumberSequenceClassifier.UseSutimeProperty, NumberSequenceClassifier.UseSutimeDefault);
     nsc       = new NumberSequenceClassifier(new Properties(), useSUTime, props);
     if (PropertiesUtils.GetBool(props, NERClassifierCombiner.ApplyGazetteProperty, NERClassifierCombiner.ApplyGazetteDefault))
     {
         this.gazetteMapping = ReadRegexnerGazette(DefaultPaths.DefaultNerGazetteMapping);
     }
     else
     {
         this.gazetteMapping = Java.Util.Collections.EmptyMap();
     }
 }
 /// <exception cref="System.IO.IOException"/>
 private void LoadClassifiers(Properties props, IList <string> paths)
 {
     baseClassifiers = new List <AbstractSequenceClassifier <IN> >();
     if (PropertiesUtils.GetBool(props, "ner.usePresetNERTags", false))
     {
         AbstractSequenceClassifier <IN> presetASC = new PresetSequenceClassifier(props);
         baseClassifiers.Add(presetASC);
     }
     foreach (string path in paths)
     {
         AbstractSequenceClassifier <IN> cls = LoadClassifierFromPath(props, path);
         baseClassifiers.Add(cls);
     }
     if (baseClassifiers.Count > 0)
     {
         flags.backgroundSymbol = baseClassifiers[0].flags.backgroundSymbol;
     }
 }
示例#9
0
 /// <summary>Load a classifier from a file or the default.</summary>
 /// <remarks>
 /// Load a classifier from a file or the default.
 /// The default is specified by passing in
 /// <see langword="null"/>
 /// .
 /// </remarks>
 public virtual void LoadClassifier(string resource)
 {
     try
     {
         if (resource != null)
         {
             classifier = CRFClassifier.GetClassifier(resource);
         }
         else
         {
             // default classifier in jar
             classifier = CRFClassifier.GetDefaultClassifier();
         }
     }
     catch (Exception e)
     {
         // we catch Throwable, since we'd also like to be able to get an OutOfMemoryError
         string message;
         if (resource != null)
         {
             message = "Error loading classpath CRF: " + resource;
         }
         else
         {
             message = "Error loading default CRF";
         }
         log.Info(message);
         string title = "CRF Load Error";
         string msg   = e.ToString();
         if (msg != null)
         {
             message += '\n' + msg;
         }
         DisplayError(title, message);
         return;
     }
     RemoveTags();
     BuildTagPanel();
     // buildExtractButton();
     extractButton.SetEnabled(true);
     extract.SetEnabled(true);
 }
示例#10
0
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.InvalidCastException"/>
        /// <exception cref="System.TypeLoadException"/>
        public NERClassifierCombiner(ObjectInputStream ois, Properties props)
            : base(ois, props)
        {
            // constructor which builds an NERClassifierCombiner from an ObjectInputStream
            // read the useSUTime from disk
            bool diskUseSUTime = ois.ReadBoolean();

            if (props.GetProperty("ner.useSUTime") != null)
            {
                this.useSUTime = bool.Parse(props.GetProperty("ner.useSUTime"));
            }
            else
            {
                this.useSUTime = diskUseSUTime;
            }
            // read the applyNumericClassifiers from disk
            bool diskApplyNumericClassifiers = ois.ReadBoolean();

            if (props.GetProperty("ner.applyNumericClassifiers") != null)
            {
                this.applyNumericClassifiers = bool.Parse(props.GetProperty("ner.applyNumericClassifiers"));
            }
            else
            {
                this.applyNumericClassifiers = diskApplyNumericClassifiers;
            }
            this.nerLanguage = NerLanguageDefault;
            // build the nsc, note that initProps should be set by ClassifierCombiner
            this.nsc = new NumberSequenceClassifier(new Properties(), useSUTime, props);
            if (PropertiesUtils.GetBool(props, NERClassifierCombiner.ApplyGazetteProperty, NERClassifierCombiner.ApplyGazetteDefault))
            {
                this.gazetteMapping = ReadRegexnerGazette(DefaultPaths.DefaultNerGazetteMapping);
            }
            else
            {
                this.gazetteMapping = Java.Util.Collections.EmptyMap();
            }
        }
 public NumberAnnotator(string backgroundSymbol, bool verbose, bool useSUTime)
 {
     BackgroundSymbol = backgroundSymbol;
     Verbose          = verbose;
     nsc = new NumberSequenceClassifier(useSUTime);
 }
示例#12
0
        /// <exception cref="System.Exception"/>
        public static void Main(string[] args)
        {
            string serializedClassifier = "classifiers/english.all.3class.distsim.crf.ser.gz";

            if (args.Length > 0)
            {
                serializedClassifier = args[0];
            }
            AbstractSequenceClassifier <CoreLabel> classifier = CRFClassifier.GetClassifier(serializedClassifier);

            /* For either a file to annotate or for the hardcoded text example, this
             * demo file shows several ways to process the input, for teaching purposes.
             */
            if (args.Length > 1)
            {
                /* For the file, it shows (1) how to run NER on a String, (2) how
                 * to get the entities in the String with character offsets, and
                 * (3) how to run NER on a whole file (without loading it into a String).
                 */
                string fileContents             = IOUtils.SlurpFile(args[1]);
                IList <IList <CoreLabel> > @out = classifier.Classify(fileContents);
                foreach (IList <CoreLabel> sentence in @out)
                {
                    foreach (CoreLabel word in sentence)
                    {
                        System.Console.Out.Write(word.Word() + '/' + word.Get(typeof(CoreAnnotations.AnswerAnnotation)) + ' ');
                    }
                    System.Console.Out.WriteLine();
                }
                System.Console.Out.WriteLine("---");
                @out = classifier.ClassifyFile(args[1]);
                foreach (IList <CoreLabel> sentence_1 in @out)
                {
                    foreach (CoreLabel word in sentence_1)
                    {
                        System.Console.Out.Write(word.Word() + '/' + word.Get(typeof(CoreAnnotations.AnswerAnnotation)) + ' ');
                    }
                    System.Console.Out.WriteLine();
                }
                System.Console.Out.WriteLine("---");
                IList <Triple <string, int, int> > list = classifier.ClassifyToCharacterOffsets(fileContents);
                foreach (Triple <string, int, int> item in list)
                {
                    System.Console.Out.WriteLine(item.First() + ": " + Sharpen.Runtime.Substring(fileContents, item.Second(), item.Third()));
                }
                System.Console.Out.WriteLine("---");
                System.Console.Out.WriteLine("Ten best entity labelings");
                IDocumentReaderAndWriter <CoreLabel> readerAndWriter = classifier.MakePlainTextReaderAndWriter();
                classifier.ClassifyAndWriteAnswersKBest(args[1], 10, readerAndWriter);
                System.Console.Out.WriteLine("---");
                System.Console.Out.WriteLine("Per-token marginalized probabilities");
                classifier.PrintProbs(args[1], readerAndWriter);
            }
            else
            {
                // -- This code prints out the first order (token pair) clique probabilities.
                // -- But that output is a bit overwhelming, so we leave it commented out by default.
                // System.out.println("---");
                // System.out.println("First Order Clique Probabilities");
                // ((CRFClassifier) classifier).printFirstOrderProbs(args[1], readerAndWriter);

                /* For the hard-coded String, it shows how to run it on a single
                 * sentence, and how to do this and produce several formats, including
                 * slash tags and an inline XML output format. It also shows the full
                 * contents of the {@code CoreLabel}s that are constructed by the
                 * classifier. And it shows getting out the probabilities of different
                 * assignments and an n-best list of classifications with probabilities.
                 */
                string[] example = new string[] { "Good afternoon Rajat Raina, how are you today?", "I go to school at Stanford University, which is located in California." };
                foreach (string str in example)
                {
                    System.Console.Out.WriteLine(classifier.ClassifyToString(str));
                }
                System.Console.Out.WriteLine("---");
                foreach (string str_1 in example)
                {
                    // This one puts in spaces and newlines between tokens, so just print not println.
                    System.Console.Out.Write(classifier.ClassifyToString(str_1, "slashTags", false));
                }
                System.Console.Out.WriteLine("---");
                foreach (string str_2 in example)
                {
                    // This one is best for dealing with the output as a TSV (tab-separated column) file.
                    // The first column gives entities, the second their classes, and the third the remaining text in a document
                    System.Console.Out.Write(classifier.ClassifyToString(str_2, "tabbedEntities", false));
                }
                System.Console.Out.WriteLine("---");
                foreach (string str_3 in example)
                {
                    System.Console.Out.WriteLine(classifier.ClassifyWithInlineXML(str_3));
                }
                System.Console.Out.WriteLine("---");
                foreach (string str_4 in example)
                {
                    System.Console.Out.WriteLine(classifier.ClassifyToString(str_4, "xml", true));
                }
                System.Console.Out.WriteLine("---");
                foreach (string str_5 in example)
                {
                    System.Console.Out.Write(classifier.ClassifyToString(str_5, "tsv", false));
                }
                System.Console.Out.WriteLine("---");
                // This gets out entities with character offsets
                int j = 0;
                foreach (string str_6 in example)
                {
                    j++;
                    IList <Triple <string, int, int> > triples = classifier.ClassifyToCharacterOffsets(str_6);
                    foreach (Triple <string, int, int> trip in triples)
                    {
                        System.Console.Out.Printf("%s over character offsets [%d, %d) in sentence %d.%n", trip.First(), trip.Second(), trip.third, j);
                    }
                }
                System.Console.Out.WriteLine("---");
                // This prints out all the details of what is stored for each token
                int i = 0;
                foreach (string str_7 in example)
                {
                    foreach (IList <CoreLabel> lcl in classifier.Classify(str_7))
                    {
                        foreach (CoreLabel cl in lcl)
                        {
                            System.Console.Out.Write(i++ + ": ");
                            System.Console.Out.WriteLine(cl.ToShorterString());
                        }
                    }
                }
                System.Console.Out.WriteLine("---");
            }
        }
        /// <summary>
        /// Train a Stanford NER model from a configuration file
        /// </summary>
        /// <param name="prop">Configuration file</param>
        public bool Train(string prop)
        {
            try
            {
                java.util.Properties props = new java.util.Properties();
                InputStream st = new BufferedInputStream(new FileInputStream(prop));
                InputStreamReader reader = new InputStreamReader(st, "utf-8");
                props.load(reader);
                _crfModel = new CRFClassifier(props);

                _crfModel.train();
                String serializeTo = _crfModel.flags.serializeTo;
                if (serializeTo != null)
                {
                    _crfModel.serializeClassifier(serializeTo);
                }

                return true;
            }
            catch (Exception e)
            {
                System.Console.WriteLine("Unable to train the Standford CRF model" + e.ToString());
                return false;

            }
        }
 /// <summary>
 /// Create CRF model from the model file
 /// </summary>
 /// <param name="crfSerializedClassifier">The model file</param>
 /// <returns>If loaded successfully, returns true else false.</returns>
 public bool LoadModel(string crfSerializedClassifier)
 {
     try
     {
         _crfModel = CRFClassifier.getClassifierNoExceptions(crfSerializedClassifier);
         _isCRFModelLoaded = true;
         return true;
     }
     catch
     {
         System.Console.WriteLine("Uable to load the Stanford CRF Model... ");
         _isCRFModelLoaded = false;
         return false;
     }
 }