public virtual void LoadClassifier(File file, bool crf) { try { if (crf) { classifier = CRFClassifier.GetClassifier(file); } else { classifier = CMMClassifier.GetClassifier(file); } } catch (Exception e) { string message = "Error loading " + (crf ? "CRF" : "CMM") + ": " + file.GetAbsolutePath(); string title = (crf ? "CRF" : "CMM") + " Load Error"; message += "\nMessage: " + e.Message; DisplayError(title, message); return; } RemoveTags(); BuildTagPanel(); BuildExtractButton(); }
/// <summary>Loads the model from disk.</summary> /// <param name="path">The location of model that was saved to disk</param> /// <exception cref="System.InvalidCastException">if model is the wrong format</exception> /// <exception cref="System.IO.IOException"> /// if the model file doesn't exist or is otherwise /// unavailable/incomplete /// </exception> /// <exception cref="System.TypeLoadException">this would probably indicate a serious classpath problem</exception> public static Edu.Stanford.Nlp.IE.Machinereading.BasicEntityExtractor Load(string path, Type entityClassifier, bool preferDefaultGazetteer) { // load the additional arguments // try to load the extra file from the CLASSPATH first InputStream @is = typeof(Edu.Stanford.Nlp.IE.Machinereading.BasicEntityExtractor).GetClassLoader().GetResourceAsStream(path + ".extra"); // if not found in the CLASSPATH, load from the file system if (@is == null) { @is = new FileInputStream(path + ".extra"); } ObjectInputStream @in = new ObjectInputStream(@is); string gazetteerLocation = ErasureUtils.UncheckedCast <string>(@in.ReadObject()); if (preferDefaultGazetteer) { gazetteerLocation = DefaultPaths.DefaultNflGazetteer; } ICollection <string> annotationsToSkip = ErasureUtils.UncheckedCast <ICollection <string> >(@in.ReadObject()); bool useSubTypes = ErasureUtils.UncheckedCast <bool>(@in.ReadObject()); bool useBIO = ErasureUtils.UncheckedCast <bool>(@in.ReadObject()); @in.Close(); @is.Close(); Edu.Stanford.Nlp.IE.Machinereading.BasicEntityExtractor extractor = (Edu.Stanford.Nlp.IE.Machinereading.BasicEntityExtractor)MachineReading.MakeEntityExtractor(entityClassifier, gazetteerLocation); // load the CRF classifier (this works from any resource, e.g., classpath or file system) extractor.classifier = CRFClassifier.GetClassifier(path); // copy the extra arguments extractor.annotationsToSkip = annotationsToSkip; extractor.useSubTypes = useSubTypes; extractor.useBIO = useBIO; return(extractor); }
public virtual void LoadSegmenter(string filename, Properties p) { try { classifier = CRFClassifier.GetClassifier(filename, p); } catch (Exception e) { throw new RuntimeIOException("Failed to load segmenter " + filename, e); } }
public ChineseSegmenterAnnotator(string name, Properties props) { string model = null; // Keep only the properties that apply to this annotator Properties modelProps = new Properties(); string desiredKey = name + '.'; foreach (string key in props.StringPropertyNames()) { if (key.StartsWith(desiredKey)) { // skip past name and the subsequent "." string modelKey = Sharpen.Runtime.Substring(key, desiredKey.Length); if (modelKey.Equals("model")) { model = props.GetProperty(key); } else { modelProps.SetProperty(modelKey, props.GetProperty(key)); } } } this.Verbose = PropertiesUtils.GetBool(props, name + ".verbose", false); this.normalizeSpace = PropertiesUtils.GetBool(props, name + ".normalizeSpace", false); if (model == null) { throw new Exception("Expected a property " + name + ".model"); } // don't write very much, because the CRFClassifier already reports loading if (Verbose) { log.Info("Loading Segmentation Model ... "); } try { segmenter = CRFClassifier.GetClassifier(model, modelProps); } catch (Exception e) { throw; } catch (Exception e) { throw new Exception(e); } // If newlines are treated as sentence split, we need to retain them in tokenization for ssplit to make use of them tokenizeNewline = (!props.GetProperty(StanfordCoreNLP.NewlineIsSentenceBreakProperty, "never").Equals("never")) || bool.ValueOf(props.GetProperty(StanfordCoreNLP.NewlineSplitterProperty, "false")); // record whether or not sentence splitting on two newlines ; if so, need to remove single newlines sentenceSplitOnTwoNewlines = props.GetProperty(StanfordCoreNLP.NewlineIsSentenceBreakProperty, "never").Equals("two"); }
/// <summary>Load a classifier from a file or the default.</summary> /// <remarks> /// Load a classifier from a file or the default. /// The default is specified by passing in /// <see langword="null"/> /// . /// </remarks> public virtual void LoadClassifier(string resource) { try { if (resource != null) { classifier = CRFClassifier.GetClassifier(resource); } else { // default classifier in jar classifier = CRFClassifier.GetDefaultClassifier(); } } catch (Exception e) { // we catch Throwable, since we'd also like to be able to get an OutOfMemoryError string message; if (resource != null) { message = "Error loading classpath CRF: " + resource; } else { message = "Error loading default CRF"; } log.Info(message); string title = "CRF Load Error"; string msg = e.ToString(); if (msg != null) { message += '\n' + msg; } DisplayError(title, message); return; } RemoveTags(); BuildTagPanel(); // buildExtractButton(); extractButton.SetEnabled(true); extract.SetEnabled(true); }
/// <exception cref="System.IO.IOException"/> public static AbstractSequenceClassifier <INN> LoadClassifierFromPath <Inn>(Properties props, string path) where Inn : ICoreMap { //try loading as a CRFClassifier try { return(ErasureUtils.UncheckedCast(CRFClassifier.GetClassifier(path, props))); } catch (Exception e) { Sharpen.Runtime.PrintStackTrace(e); } //try loading as a CMMClassifier try { return(ErasureUtils.UncheckedCast(CMMClassifier.GetClassifier(path))); } catch (Exception e) { //fail //log.info("Couldn't load classifier from path :"+path); throw new IOException("Couldn't load classifier from " + path, e); } }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.TypeLoadException"/> /// <exception cref="System.InvalidCastException"/> public ClassifierCombiner(ObjectInputStream ois, Properties props) : base(PropertiesUtils.OverWriteProperties((Properties)ois.ReadObject(), props)) { // constructor for building a ClassifierCombiner from an ObjectInputStream // read the initial Properties out of the ObjectInputStream so you can properly start the AbstractSequenceClassifier // note now we load in props from command line and overwrite any that are given for command line // read another copy of initProps that I have helpfully included // TODO: probably set initProps in AbstractSequenceClassifier to avoid this writing twice thing, its hacky this.initProps = PropertiesUtils.OverWriteProperties((Properties)ois.ReadObject(), props); // read the initLoadPaths this.initLoadPaths = (List <string>)ois.ReadObject(); // read the combinationMode from the serialized version string cm = (string)ois.ReadObject(); // see if there is a commandline override for the combinationMode, else set newCM to the serialized version ClassifierCombiner.CombinationMode newCM; if (props.GetProperty("ner.combinationMode") != null) { // there is a possible commandline override, have to see if its valid try { // see if the commandline has a proper value newCM = ClassifierCombiner.CombinationMode.ValueOf(props.GetProperty("ner.combinationMode")); } catch (ArgumentException) { // the commandline override did not have a proper value, so just use the serialized version newCM = ClassifierCombiner.CombinationMode.ValueOf(cm); } } else { // there was no commandline override given, so just use the serialized version newCM = ClassifierCombiner.CombinationMode.ValueOf(cm); } this.combinationMode = newCM; // read in the base classifiers int numClassifiers = ois.ReadInt(); // set up the list of base classifiers this.baseClassifiers = new List <AbstractSequenceClassifier <IN> >(); int i = 0; while (i < numClassifiers) { try { log.Info("loading CRF..."); CRFClassifier <IN> newCRF = ErasureUtils.UncheckedCast(CRFClassifier.GetClassifier(ois, props)); baseClassifiers.Add(newCRF); i++; } catch (Exception) { try { log.Info("loading CMM..."); CMMClassifier newCMM = ErasureUtils.UncheckedCast(CMMClassifier.GetClassifier(ois, props)); baseClassifiers.Add(newCMM); i++; } catch (Exception ex) { throw new IOException("Couldn't load classifier!", ex); } } } }
// end static class NERClient /// <summary>Starts this server on the specified port.</summary> /// <remarks> /// Starts this server on the specified port. The classifier used can be /// either a default one stored in the jar file from which this code is /// invoked or you can specify it as a filename or as another classifier /// resource name, which must correspond to the name of a resource in the /// /classifiers/ directory of the jar file. /// Default port is 4465. /// When run in server mode, additional properties can be specified /// on the command line and will be passed to the model loaded. /// Usage: /// <c>java edu.stanford.nlp.ie.NERServer [-loadClassifier fileOrResource|-client] -port portNumber</c> /// </remarks> /// <param name="args">Command-line arguments (described above)</param> /// <exception cref="System.Exception">If file or Java class problems with serialized classifier</exception> public static void Main(string[] args) { Properties props = StringUtils.ArgsToProperties(args); string loadFile = props.GetProperty("loadClassifier"); string loadJarFile = props.GetProperty("loadJarClassifier"); string client = props.GetProperty("client"); string portStr = props.GetProperty("port", "4465"); props.Remove("port"); // so later code doesn't complain if (portStr == null || portStr.Equals(string.Empty)) { log.Info(Usage); return; } string charset = "utf-8"; string encoding = props.GetProperty("encoding"); if (encoding != null && !string.Empty.Equals(encoding)) { charset = encoding; } int port; try { port = System.Convert.ToInt32(portStr); } catch (NumberFormatException) { log.Info("Non-numerical port"); log.Info(Usage); return; } // default output format for if no output format is specified if (props.GetProperty("outputFormat") == null) { props.SetProperty("outputFormat", "slashTags"); } if (client != null && !client.Equals(string.Empty)) { // run a test client for illustration/testing string host = props.GetProperty("host"); NERServer.NERClient.CommunicateWithNERServer(host, port, charset); } else { AbstractSequenceClassifier asc; if (!StringUtils.IsNullOrEmpty(loadFile)) { asc = CRFClassifier.GetClassifier(loadFile, props); } else { if (!StringUtils.IsNullOrEmpty(loadJarFile)) { asc = CRFClassifier.GetClassifier(loadJarFile, props); } else { asc = CRFClassifier.GetDefaultClassifier(props); } } new NERServer(port, asc, charset).Run(); } }
/// <exception cref="Javax.Servlet.ServletException"/> public override void Init() { format = GetServletConfig().GetInitParameter("outputFormat"); if (format == null || format.Trim().IsEmpty()) { throw new ServletException("Invalid outputFormat setting."); } string spacingStr = GetServletConfig().GetInitParameter("preserveSpacing"); if (spacingStr == null || spacingStr.Trim().IsEmpty()) { throw new ServletException("Invalid preserveSpacing setting."); } //spacing = Boolean.valueOf(spacingStr).booleanValue(); spacingStr = spacingStr.Trim().ToLower(); spacing = "true".Equals(spacingStr); string path = GetServletContext().GetRealPath("/WEB-INF/data/models"); foreach (string classifier in new File(path).List()) { classifiers.Add(classifier); } // TODO: get this from somewhere more interesting? defaultClassifier = classifiers[0]; foreach (string classifier_1 in classifiers) { Log(classifier_1); } ners = Generics.NewHashMap(); foreach (string classifier_2 in classifiers) { CRFClassifier model = null; string filename = "/WEB-INF/data/models/" + classifier_2; InputStream @is = GetServletConfig().GetServletContext().GetResourceAsStream(filename); if (@is == null) { throw new ServletException("File not found. Filename = " + filename); } try { if (filename.EndsWith(".gz")) { @is = new BufferedInputStream(new GZIPInputStream(@is)); } else { @is = new BufferedInputStream(@is); } model = CRFClassifier.GetClassifier(@is); } catch (IOException) { throw new ServletException("IO problem reading classifier."); } catch (InvalidCastException) { throw new ServletException("Classifier class casting problem."); } catch (TypeLoadException) { throw new ServletException("Classifier class not found problem."); } finally { IOUtils.CloseIgnoringExceptions(@is); } ners[classifier_2] = model; } }
/// <exception cref="System.Exception"/> public static void Main(string[] args) { string serializedClassifier = "classifiers/english.all.3class.distsim.crf.ser.gz"; if (args.Length > 0) { serializedClassifier = args[0]; } AbstractSequenceClassifier <CoreLabel> classifier = CRFClassifier.GetClassifier(serializedClassifier); /* For either a file to annotate or for the hardcoded text example, this * demo file shows several ways to process the input, for teaching purposes. */ if (args.Length > 1) { /* For the file, it shows (1) how to run NER on a String, (2) how * to get the entities in the String with character offsets, and * (3) how to run NER on a whole file (without loading it into a String). */ string fileContents = IOUtils.SlurpFile(args[1]); IList <IList <CoreLabel> > @out = classifier.Classify(fileContents); foreach (IList <CoreLabel> sentence in @out) { foreach (CoreLabel word in sentence) { System.Console.Out.Write(word.Word() + '/' + word.Get(typeof(CoreAnnotations.AnswerAnnotation)) + ' '); } System.Console.Out.WriteLine(); } System.Console.Out.WriteLine("---"); @out = classifier.ClassifyFile(args[1]); foreach (IList <CoreLabel> sentence_1 in @out) { foreach (CoreLabel word in sentence_1) { System.Console.Out.Write(word.Word() + '/' + word.Get(typeof(CoreAnnotations.AnswerAnnotation)) + ' '); } System.Console.Out.WriteLine(); } System.Console.Out.WriteLine("---"); IList <Triple <string, int, int> > list = classifier.ClassifyToCharacterOffsets(fileContents); foreach (Triple <string, int, int> item in list) { System.Console.Out.WriteLine(item.First() + ": " + Sharpen.Runtime.Substring(fileContents, item.Second(), item.Third())); } System.Console.Out.WriteLine("---"); System.Console.Out.WriteLine("Ten best entity labelings"); IDocumentReaderAndWriter <CoreLabel> readerAndWriter = classifier.MakePlainTextReaderAndWriter(); classifier.ClassifyAndWriteAnswersKBest(args[1], 10, readerAndWriter); System.Console.Out.WriteLine("---"); System.Console.Out.WriteLine("Per-token marginalized probabilities"); classifier.PrintProbs(args[1], readerAndWriter); } else { // -- This code prints out the first order (token pair) clique probabilities. // -- But that output is a bit overwhelming, so we leave it commented out by default. // System.out.println("---"); // System.out.println("First Order Clique Probabilities"); // ((CRFClassifier) classifier).printFirstOrderProbs(args[1], readerAndWriter); /* For the hard-coded String, it shows how to run it on a single * sentence, and how to do this and produce several formats, including * slash tags and an inline XML output format. It also shows the full * contents of the {@code CoreLabel}s that are constructed by the * classifier. And it shows getting out the probabilities of different * assignments and an n-best list of classifications with probabilities. */ string[] example = new string[] { "Good afternoon Rajat Raina, how are you today?", "I go to school at Stanford University, which is located in California." }; foreach (string str in example) { System.Console.Out.WriteLine(classifier.ClassifyToString(str)); } System.Console.Out.WriteLine("---"); foreach (string str_1 in example) { // This one puts in spaces and newlines between tokens, so just print not println. System.Console.Out.Write(classifier.ClassifyToString(str_1, "slashTags", false)); } System.Console.Out.WriteLine("---"); foreach (string str_2 in example) { // This one is best for dealing with the output as a TSV (tab-separated column) file. // The first column gives entities, the second their classes, and the third the remaining text in a document System.Console.Out.Write(classifier.ClassifyToString(str_2, "tabbedEntities", false)); } System.Console.Out.WriteLine("---"); foreach (string str_3 in example) { System.Console.Out.WriteLine(classifier.ClassifyWithInlineXML(str_3)); } System.Console.Out.WriteLine("---"); foreach (string str_4 in example) { System.Console.Out.WriteLine(classifier.ClassifyToString(str_4, "xml", true)); } System.Console.Out.WriteLine("---"); foreach (string str_5 in example) { System.Console.Out.Write(classifier.ClassifyToString(str_5, "tsv", false)); } System.Console.Out.WriteLine("---"); // This gets out entities with character offsets int j = 0; foreach (string str_6 in example) { j++; IList <Triple <string, int, int> > triples = classifier.ClassifyToCharacterOffsets(str_6); foreach (Triple <string, int, int> trip in triples) { System.Console.Out.Printf("%s over character offsets [%d, %d) in sentence %d.%n", trip.First(), trip.Second(), trip.third, j); } } System.Console.Out.WriteLine("---"); // This prints out all the details of what is stored for each token int i = 0; foreach (string str_7 in example) { foreach (IList <CoreLabel> lcl in classifier.Classify(str_7)) { foreach (CoreLabel cl in lcl) { System.Console.Out.Write(i++ + ": "); System.Console.Out.WriteLine(cl.ToShorterString()); } } } System.Console.Out.WriteLine("---"); } }