Exemplo n.º 1
0
 public virtual void LoadClassifier(File file, bool crf)
 {
     try
     {
         if (crf)
         {
             classifier = CRFClassifier.GetClassifier(file);
         }
         else
         {
             classifier = CMMClassifier.GetClassifier(file);
         }
     }
     catch (Exception e)
     {
         string message = "Error loading " + (crf ? "CRF" : "CMM") + ": " + file.GetAbsolutePath();
         string title   = (crf ? "CRF" : "CMM") + " Load Error";
         message += "\nMessage: " + e.Message;
         DisplayError(title, message);
         return;
     }
     RemoveTags();
     BuildTagPanel();
     BuildExtractButton();
 }
        /// <summary>Loads the model from disk.</summary>
        /// <param name="path">The location of model that was saved to disk</param>
        /// <exception cref="System.InvalidCastException">if model is the wrong format</exception>
        /// <exception cref="System.IO.IOException">
        /// if the model file doesn't exist or is otherwise
        /// unavailable/incomplete
        /// </exception>
        /// <exception cref="System.TypeLoadException">this would probably indicate a serious classpath problem</exception>
        public static Edu.Stanford.Nlp.IE.Machinereading.BasicEntityExtractor Load(string path, Type entityClassifier, bool preferDefaultGazetteer)
        {
            // load the additional arguments
            // try to load the extra file from the CLASSPATH first
            InputStream @is = typeof(Edu.Stanford.Nlp.IE.Machinereading.BasicEntityExtractor).GetClassLoader().GetResourceAsStream(path + ".extra");

            // if not found in the CLASSPATH, load from the file system
            if (@is == null)
            {
                @is = new FileInputStream(path + ".extra");
            }
            ObjectInputStream @in = new ObjectInputStream(@is);
            string            gazetteerLocation = ErasureUtils.UncheckedCast <string>(@in.ReadObject());

            if (preferDefaultGazetteer)
            {
                gazetteerLocation = DefaultPaths.DefaultNflGazetteer;
            }
            ICollection <string> annotationsToSkip = ErasureUtils.UncheckedCast <ICollection <string> >(@in.ReadObject());
            bool useSubTypes = ErasureUtils.UncheckedCast <bool>(@in.ReadObject());
            bool useBIO      = ErasureUtils.UncheckedCast <bool>(@in.ReadObject());

            @in.Close();
            @is.Close();
            Edu.Stanford.Nlp.IE.Machinereading.BasicEntityExtractor extractor = (Edu.Stanford.Nlp.IE.Machinereading.BasicEntityExtractor)MachineReading.MakeEntityExtractor(entityClassifier, gazetteerLocation);
            // load the CRF classifier (this works from any resource, e.g., classpath or file system)
            extractor.classifier = CRFClassifier.GetClassifier(path);
            // copy the extra arguments
            extractor.annotationsToSkip = annotationsToSkip;
            extractor.useSubTypes       = useSubTypes;
            extractor.useBIO            = useBIO;
            return(extractor);
        }
Exemplo n.º 3
0
 public virtual void LoadSegmenter(string filename, Properties p)
 {
     try
     {
         classifier = CRFClassifier.GetClassifier(filename, p);
     }
     catch (Exception e)
     {
         throw new RuntimeIOException("Failed to load segmenter " + filename, e);
     }
 }
        public ChineseSegmenterAnnotator(string name, Properties props)
        {
            string model = null;
            // Keep only the properties that apply to this annotator
            Properties modelProps = new Properties();
            string     desiredKey = name + '.';

            foreach (string key in props.StringPropertyNames())
            {
                if (key.StartsWith(desiredKey))
                {
                    // skip past name and the subsequent "."
                    string modelKey = Sharpen.Runtime.Substring(key, desiredKey.Length);
                    if (modelKey.Equals("model"))
                    {
                        model = props.GetProperty(key);
                    }
                    else
                    {
                        modelProps.SetProperty(modelKey, props.GetProperty(key));
                    }
                }
            }
            this.Verbose        = PropertiesUtils.GetBool(props, name + ".verbose", false);
            this.normalizeSpace = PropertiesUtils.GetBool(props, name + ".normalizeSpace", false);
            if (model == null)
            {
                throw new Exception("Expected a property " + name + ".model");
            }
            // don't write very much, because the CRFClassifier already reports loading
            if (Verbose)
            {
                log.Info("Loading Segmentation Model ... ");
            }
            try
            {
                segmenter = CRFClassifier.GetClassifier(model, modelProps);
            }
            catch (Exception e)
            {
                throw;
            }
            catch (Exception e)
            {
                throw new Exception(e);
            }
            // If newlines are treated as sentence split, we need to retain them in tokenization for ssplit to make use of them
            tokenizeNewline = (!props.GetProperty(StanfordCoreNLP.NewlineIsSentenceBreakProperty, "never").Equals("never")) || bool.ValueOf(props.GetProperty(StanfordCoreNLP.NewlineSplitterProperty, "false"));
            // record whether or not sentence splitting on two newlines ; if so, need to remove single newlines
            sentenceSplitOnTwoNewlines = props.GetProperty(StanfordCoreNLP.NewlineIsSentenceBreakProperty, "never").Equals("two");
        }
Exemplo n.º 5
0
 /// <summary>Load a classifier from a file or the default.</summary>
 /// <remarks>
 /// Load a classifier from a file or the default.
 /// The default is specified by passing in
 /// <see langword="null"/>
 /// .
 /// </remarks>
 public virtual void LoadClassifier(string resource)
 {
     try
     {
         if (resource != null)
         {
             classifier = CRFClassifier.GetClassifier(resource);
         }
         else
         {
             // default classifier in jar
             classifier = CRFClassifier.GetDefaultClassifier();
         }
     }
     catch (Exception e)
     {
         // we catch Throwable, since we'd also like to be able to get an OutOfMemoryError
         string message;
         if (resource != null)
         {
             message = "Error loading classpath CRF: " + resource;
         }
         else
         {
             message = "Error loading default CRF";
         }
         log.Info(message);
         string title = "CRF Load Error";
         string msg   = e.ToString();
         if (msg != null)
         {
             message += '\n' + msg;
         }
         DisplayError(title, message);
         return;
     }
     RemoveTags();
     BuildTagPanel();
     // buildExtractButton();
     extractButton.SetEnabled(true);
     extract.SetEnabled(true);
 }
 /// <exception cref="System.IO.IOException"/>
 public static AbstractSequenceClassifier <INN> LoadClassifierFromPath <Inn>(Properties props, string path)
     where Inn : ICoreMap
 {
     //try loading as a CRFClassifier
     try
     {
         return(ErasureUtils.UncheckedCast(CRFClassifier.GetClassifier(path, props)));
     }
     catch (Exception e)
     {
         Sharpen.Runtime.PrintStackTrace(e);
     }
     //try loading as a CMMClassifier
     try
     {
         return(ErasureUtils.UncheckedCast(CMMClassifier.GetClassifier(path)));
     }
     catch (Exception e)
     {
         //fail
         //log.info("Couldn't load classifier from path :"+path);
         throw new IOException("Couldn't load classifier from " + path, e);
     }
 }
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.TypeLoadException"/>
        /// <exception cref="System.InvalidCastException"/>
        public ClassifierCombiner(ObjectInputStream ois, Properties props)
            : base(PropertiesUtils.OverWriteProperties((Properties)ois.ReadObject(), props))
        {
            // constructor for building a ClassifierCombiner from an ObjectInputStream
            // read the initial Properties out of the ObjectInputStream so you can properly start the AbstractSequenceClassifier
            // note now we load in props from command line and overwrite any that are given for command line
            // read another copy of initProps that I have helpfully included
            // TODO: probably set initProps in AbstractSequenceClassifier to avoid this writing twice thing, its hacky
            this.initProps = PropertiesUtils.OverWriteProperties((Properties)ois.ReadObject(), props);
            // read the initLoadPaths
            this.initLoadPaths = (List <string>)ois.ReadObject();
            // read the combinationMode from the serialized version
            string cm = (string)ois.ReadObject();

            // see if there is a commandline override for the combinationMode, else set newCM to the serialized version
            ClassifierCombiner.CombinationMode newCM;
            if (props.GetProperty("ner.combinationMode") != null)
            {
                // there is a possible commandline override, have to see if its valid
                try
                {
                    // see if the commandline has a proper value
                    newCM = ClassifierCombiner.CombinationMode.ValueOf(props.GetProperty("ner.combinationMode"));
                }
                catch (ArgumentException)
                {
                    // the commandline override did not have a proper value, so just use the serialized version
                    newCM = ClassifierCombiner.CombinationMode.ValueOf(cm);
                }
            }
            else
            {
                // there was no commandline override given, so just use the serialized version
                newCM = ClassifierCombiner.CombinationMode.ValueOf(cm);
            }
            this.combinationMode = newCM;
            // read in the base classifiers
            int numClassifiers = ois.ReadInt();

            // set up the list of base classifiers
            this.baseClassifiers = new List <AbstractSequenceClassifier <IN> >();
            int i = 0;

            while (i < numClassifiers)
            {
                try
                {
                    log.Info("loading CRF...");
                    CRFClassifier <IN> newCRF = ErasureUtils.UncheckedCast(CRFClassifier.GetClassifier(ois, props));
                    baseClassifiers.Add(newCRF);
                    i++;
                }
                catch (Exception)
                {
                    try
                    {
                        log.Info("loading CMM...");
                        CMMClassifier newCMM = ErasureUtils.UncheckedCast(CMMClassifier.GetClassifier(ois, props));
                        baseClassifiers.Add(newCMM);
                        i++;
                    }
                    catch (Exception ex)
                    {
                        throw new IOException("Couldn't load classifier!", ex);
                    }
                }
            }
        }
Exemplo n.º 8
0
        // end static class NERClient
        /// <summary>Starts this server on the specified port.</summary>
        /// <remarks>
        /// Starts this server on the specified port.  The classifier used can be
        /// either a default one stored in the jar file from which this code is
        /// invoked or you can specify it as a filename or as another classifier
        /// resource name, which must correspond to the name of a resource in the
        /// /classifiers/ directory of the jar file.
        /// Default port is 4465.
        /// When run in server mode, additional properties can be specified
        /// on the command line and will be passed to the model loaded.
        /// Usage:
        /// <c>java edu.stanford.nlp.ie.NERServer [-loadClassifier fileOrResource|-client] -port portNumber</c>
        /// </remarks>
        /// <param name="args">Command-line arguments (described above)</param>
        /// <exception cref="System.Exception">If file or Java class problems with serialized classifier</exception>
        public static void Main(string[] args)
        {
            Properties props       = StringUtils.ArgsToProperties(args);
            string     loadFile    = props.GetProperty("loadClassifier");
            string     loadJarFile = props.GetProperty("loadJarClassifier");
            string     client      = props.GetProperty("client");
            string     portStr     = props.GetProperty("port", "4465");

            props.Remove("port");
            // so later code doesn't complain
            if (portStr == null || portStr.Equals(string.Empty))
            {
                log.Info(Usage);
                return;
            }
            string charset  = "utf-8";
            string encoding = props.GetProperty("encoding");

            if (encoding != null && !string.Empty.Equals(encoding))
            {
                charset = encoding;
            }
            int port;

            try
            {
                port = System.Convert.ToInt32(portStr);
            }
            catch (NumberFormatException)
            {
                log.Info("Non-numerical port");
                log.Info(Usage);
                return;
            }
            // default output format for if no output format is specified
            if (props.GetProperty("outputFormat") == null)
            {
                props.SetProperty("outputFormat", "slashTags");
            }
            if (client != null && !client.Equals(string.Empty))
            {
                // run a test client for illustration/testing
                string host = props.GetProperty("host");
                NERServer.NERClient.CommunicateWithNERServer(host, port, charset);
            }
            else
            {
                AbstractSequenceClassifier asc;
                if (!StringUtils.IsNullOrEmpty(loadFile))
                {
                    asc = CRFClassifier.GetClassifier(loadFile, props);
                }
                else
                {
                    if (!StringUtils.IsNullOrEmpty(loadJarFile))
                    {
                        asc = CRFClassifier.GetClassifier(loadJarFile, props);
                    }
                    else
                    {
                        asc = CRFClassifier.GetDefaultClassifier(props);
                    }
                }
                new NERServer(port, asc, charset).Run();
            }
        }
Exemplo n.º 9
0
        /// <exception cref="Javax.Servlet.ServletException"/>
        public override void Init()
        {
            format = GetServletConfig().GetInitParameter("outputFormat");
            if (format == null || format.Trim().IsEmpty())
            {
                throw new ServletException("Invalid outputFormat setting.");
            }
            string spacingStr = GetServletConfig().GetInitParameter("preserveSpacing");

            if (spacingStr == null || spacingStr.Trim().IsEmpty())
            {
                throw new ServletException("Invalid preserveSpacing setting.");
            }
            //spacing = Boolean.valueOf(spacingStr).booleanValue();
            spacingStr = spacingStr.Trim().ToLower();
            spacing    = "true".Equals(spacingStr);
            string path = GetServletContext().GetRealPath("/WEB-INF/data/models");

            foreach (string classifier in new File(path).List())
            {
                classifiers.Add(classifier);
            }
            // TODO: get this from somewhere more interesting?
            defaultClassifier = classifiers[0];
            foreach (string classifier_1 in classifiers)
            {
                Log(classifier_1);
            }
            ners = Generics.NewHashMap();
            foreach (string classifier_2 in classifiers)
            {
                CRFClassifier model    = null;
                string        filename = "/WEB-INF/data/models/" + classifier_2;
                InputStream   @is      = GetServletConfig().GetServletContext().GetResourceAsStream(filename);
                if (@is == null)
                {
                    throw new ServletException("File not found. Filename = " + filename);
                }
                try
                {
                    if (filename.EndsWith(".gz"))
                    {
                        @is = new BufferedInputStream(new GZIPInputStream(@is));
                    }
                    else
                    {
                        @is = new BufferedInputStream(@is);
                    }
                    model = CRFClassifier.GetClassifier(@is);
                }
                catch (IOException)
                {
                    throw new ServletException("IO problem reading classifier.");
                }
                catch (InvalidCastException)
                {
                    throw new ServletException("Classifier class casting problem.");
                }
                catch (TypeLoadException)
                {
                    throw new ServletException("Classifier class not found problem.");
                }
                finally
                {
                    IOUtils.CloseIgnoringExceptions(@is);
                }
                ners[classifier_2] = model;
            }
        }
Exemplo n.º 10
0
        /// <exception cref="System.Exception"/>
        public static void Main(string[] args)
        {
            string serializedClassifier = "classifiers/english.all.3class.distsim.crf.ser.gz";

            if (args.Length > 0)
            {
                serializedClassifier = args[0];
            }
            AbstractSequenceClassifier <CoreLabel> classifier = CRFClassifier.GetClassifier(serializedClassifier);

            /* For either a file to annotate or for the hardcoded text example, this
             * demo file shows several ways to process the input, for teaching purposes.
             */
            if (args.Length > 1)
            {
                /* For the file, it shows (1) how to run NER on a String, (2) how
                 * to get the entities in the String with character offsets, and
                 * (3) how to run NER on a whole file (without loading it into a String).
                 */
                string fileContents             = IOUtils.SlurpFile(args[1]);
                IList <IList <CoreLabel> > @out = classifier.Classify(fileContents);
                foreach (IList <CoreLabel> sentence in @out)
                {
                    foreach (CoreLabel word in sentence)
                    {
                        System.Console.Out.Write(word.Word() + '/' + word.Get(typeof(CoreAnnotations.AnswerAnnotation)) + ' ');
                    }
                    System.Console.Out.WriteLine();
                }
                System.Console.Out.WriteLine("---");
                @out = classifier.ClassifyFile(args[1]);
                foreach (IList <CoreLabel> sentence_1 in @out)
                {
                    foreach (CoreLabel word in sentence_1)
                    {
                        System.Console.Out.Write(word.Word() + '/' + word.Get(typeof(CoreAnnotations.AnswerAnnotation)) + ' ');
                    }
                    System.Console.Out.WriteLine();
                }
                System.Console.Out.WriteLine("---");
                IList <Triple <string, int, int> > list = classifier.ClassifyToCharacterOffsets(fileContents);
                foreach (Triple <string, int, int> item in list)
                {
                    System.Console.Out.WriteLine(item.First() + ": " + Sharpen.Runtime.Substring(fileContents, item.Second(), item.Third()));
                }
                System.Console.Out.WriteLine("---");
                System.Console.Out.WriteLine("Ten best entity labelings");
                IDocumentReaderAndWriter <CoreLabel> readerAndWriter = classifier.MakePlainTextReaderAndWriter();
                classifier.ClassifyAndWriteAnswersKBest(args[1], 10, readerAndWriter);
                System.Console.Out.WriteLine("---");
                System.Console.Out.WriteLine("Per-token marginalized probabilities");
                classifier.PrintProbs(args[1], readerAndWriter);
            }
            else
            {
                // -- This code prints out the first order (token pair) clique probabilities.
                // -- But that output is a bit overwhelming, so we leave it commented out by default.
                // System.out.println("---");
                // System.out.println("First Order Clique Probabilities");
                // ((CRFClassifier) classifier).printFirstOrderProbs(args[1], readerAndWriter);

                /* For the hard-coded String, it shows how to run it on a single
                 * sentence, and how to do this and produce several formats, including
                 * slash tags and an inline XML output format. It also shows the full
                 * contents of the {@code CoreLabel}s that are constructed by the
                 * classifier. And it shows getting out the probabilities of different
                 * assignments and an n-best list of classifications with probabilities.
                 */
                string[] example = new string[] { "Good afternoon Rajat Raina, how are you today?", "I go to school at Stanford University, which is located in California." };
                foreach (string str in example)
                {
                    System.Console.Out.WriteLine(classifier.ClassifyToString(str));
                }
                System.Console.Out.WriteLine("---");
                foreach (string str_1 in example)
                {
                    // This one puts in spaces and newlines between tokens, so just print not println.
                    System.Console.Out.Write(classifier.ClassifyToString(str_1, "slashTags", false));
                }
                System.Console.Out.WriteLine("---");
                foreach (string str_2 in example)
                {
                    // This one is best for dealing with the output as a TSV (tab-separated column) file.
                    // The first column gives entities, the second their classes, and the third the remaining text in a document
                    System.Console.Out.Write(classifier.ClassifyToString(str_2, "tabbedEntities", false));
                }
                System.Console.Out.WriteLine("---");
                foreach (string str_3 in example)
                {
                    System.Console.Out.WriteLine(classifier.ClassifyWithInlineXML(str_3));
                }
                System.Console.Out.WriteLine("---");
                foreach (string str_4 in example)
                {
                    System.Console.Out.WriteLine(classifier.ClassifyToString(str_4, "xml", true));
                }
                System.Console.Out.WriteLine("---");
                foreach (string str_5 in example)
                {
                    System.Console.Out.Write(classifier.ClassifyToString(str_5, "tsv", false));
                }
                System.Console.Out.WriteLine("---");
                // This gets out entities with character offsets
                int j = 0;
                foreach (string str_6 in example)
                {
                    j++;
                    IList <Triple <string, int, int> > triples = classifier.ClassifyToCharacterOffsets(str_6);
                    foreach (Triple <string, int, int> trip in triples)
                    {
                        System.Console.Out.Printf("%s over character offsets [%d, %d) in sentence %d.%n", trip.First(), trip.Second(), trip.third, j);
                    }
                }
                System.Console.Out.WriteLine("---");
                // This prints out all the details of what is stored for each token
                int i = 0;
                foreach (string str_7 in example)
                {
                    foreach (IList <CoreLabel> lcl in classifier.Classify(str_7))
                    {
                        foreach (CoreLabel cl in lcl)
                        {
                            System.Console.Out.Write(i++ + ": ");
                            System.Console.Out.WriteLine(cl.ToShorterString());
                        }
                    }
                }
                System.Console.Out.WriteLine("---");
            }
        }