public virtual void LoadDefaultClassifier(bool crf) { try { if (crf) { classifier = CRFClassifier.GetDefaultClassifier(); } else { classifier = CMMClassifier.GetDefaultClassifier(); } } catch (Exception e) { string message = "Error loading default " + (crf ? "CRF" : "CMM"); string title = (crf ? "CRF" : "CMM") + " Load Error"; message += "\nMessage: " + e.Message; DisplayError(title, message); return; } RemoveTags(); BuildTagPanel(); BuildExtractButton(); }
/// <exception cref="System.IO.IOException"/> public NERClassifierCombiner(bool applyNumericClassifiers, NERClassifierCombiner.Language nerLanguage, bool useSUTime, bool augmentRegexNER, Properties nscProps, params string[] loadPaths) : base(nscProps, ClassifierCombiner.ExtractCombinationModeSafe(nscProps), loadPaths) { // NOTE: nscProps may contains sutime props which will not be recognized by the SeqClassifierFlags this.applyNumericClassifiers = applyNumericClassifiers; this.nerLanguage = nerLanguage; this.useSUTime = useSUTime; // check for which language to use for number sequence classifier if (nerLanguage == NERClassifierCombiner.Language.Chinese) { this.nsc = new ChineseNumberSequenceClassifier(new Properties(), useSUTime, nscProps); } else { this.nsc = new NumberSequenceClassifier(new Properties(), useSUTime, nscProps); } if (augmentRegexNER) { this.gazetteMapping = ReadRegexnerGazette(DefaultPaths.DefaultNerGazetteMapping); } else { this.gazetteMapping = Java.Util.Collections.EmptyMap(); } }
/// <summary>Creates a new named entity recognizer server on the specified port.</summary> /// <param name="port">the port this NERServer listens on.</param> /// <param name="asc">The classifier which will do the tagging</param> /// <param name="charset">The character set for encoding Strings over the socket stream, e.g., "utf-8"</param> /// <exception cref="System.IO.IOException">If there is a problem creating a ServerSocket</exception> public NERServer(int port, AbstractSequenceClassifier asc, string charset) { //// Variables //// Constructors ner = asc; listener = new ServerSocket(port); this.charset = charset; }
public NumberAnnotator(string name, Properties props) { string property = name + "." + BackgroundSymbolProperty; BackgroundSymbol = props.GetProperty(property, DefaultBackgroundSymbol); bool useSUTime = PropertiesUtils.GetBool(props, NumberSequenceClassifier.UseSutimeProperty, NumberSequenceClassifier.UseSutimeDefault); Verbose = false; nsc = new NumberSequenceClassifier(useSUTime); }
public ChineseSegmenterAnnotator(string name, Properties props) { string model = null; // Keep only the properties that apply to this annotator Properties modelProps = new Properties(); string desiredKey = name + '.'; foreach (string key in props.StringPropertyNames()) { if (key.StartsWith(desiredKey)) { // skip past name and the subsequent "." string modelKey = Sharpen.Runtime.Substring(key, desiredKey.Length); if (modelKey.Equals("model")) { model = props.GetProperty(key); } else { modelProps.SetProperty(modelKey, props.GetProperty(key)); } } } this.Verbose = PropertiesUtils.GetBool(props, name + ".verbose", false); this.normalizeSpace = PropertiesUtils.GetBool(props, name + ".normalizeSpace", false); if (model == null) { throw new Exception("Expected a property " + name + ".model"); } // don't write very much, because the CRFClassifier already reports loading if (Verbose) { log.Info("Loading Segmentation Model ... "); } try { segmenter = CRFClassifier.GetClassifier(model, modelProps); } catch (Exception e) { throw; } catch (Exception e) { throw new Exception(e); } // If newlines are treated as sentence split, we need to retain them in tokenization for ssplit to make use of them tokenizeNewline = (!props.GetProperty(StanfordCoreNLP.NewlineIsSentenceBreakProperty, "never").Equals("never")) || bool.ValueOf(props.GetProperty(StanfordCoreNLP.NewlineSplitterProperty, "false")); // record whether or not sentence splitting on two newlines ; if so, need to remove single newlines sentenceSplitOnTwoNewlines = props.GetProperty(StanfordCoreNLP.NewlineIsSentenceBreakProperty, "never").Equals("two"); }
public NERClassifierCombiner(bool applyNumericClassifiers, bool useSUTime, bool augmentRegexNER, params AbstractSequenceClassifier <CoreLabel>[] classifiers) : base(classifiers) { this.applyNumericClassifiers = applyNumericClassifiers; this.nerLanguage = NerLanguageDefault; this.useSUTime = useSUTime; this.nsc = new NumberSequenceClassifier(useSUTime); if (augmentRegexNER) { this.gazetteMapping = ReadRegexnerGazette(DefaultPaths.DefaultNerGazetteMapping); } else { this.gazetteMapping = Java.Util.Collections.EmptyMap(); } }
/// <exception cref="System.IO.IOException"/> public NERClassifierCombiner(Properties props) : base(props) { // todo [cdm 2015]: Could avoid constructing this if applyNumericClassifiers is false applyNumericClassifiers = PropertiesUtils.GetBool(props, ApplyNumericClassifiersProperty, ApplyNumericClassifiersDefault); nerLanguage = NERClassifierCombiner.Language.FromString(PropertiesUtils.GetString(props, NerLanguageProperty, null), NerLanguageDefault); useSUTime = PropertiesUtils.GetBool(props, NumberSequenceClassifier.UseSutimeProperty, NumberSequenceClassifier.UseSutimeDefault); nsc = new NumberSequenceClassifier(new Properties(), useSUTime, props); if (PropertiesUtils.GetBool(props, NERClassifierCombiner.ApplyGazetteProperty, NERClassifierCombiner.ApplyGazetteDefault)) { this.gazetteMapping = ReadRegexnerGazette(DefaultPaths.DefaultNerGazetteMapping); } else { this.gazetteMapping = Java.Util.Collections.EmptyMap(); } }
/// <exception cref="System.IO.IOException"/> private void LoadClassifiers(Properties props, IList <string> paths) { baseClassifiers = new List <AbstractSequenceClassifier <IN> >(); if (PropertiesUtils.GetBool(props, "ner.usePresetNERTags", false)) { AbstractSequenceClassifier <IN> presetASC = new PresetSequenceClassifier(props); baseClassifiers.Add(presetASC); } foreach (string path in paths) { AbstractSequenceClassifier <IN> cls = LoadClassifierFromPath(props, path); baseClassifiers.Add(cls); } if (baseClassifiers.Count > 0) { flags.backgroundSymbol = baseClassifiers[0].flags.backgroundSymbol; } }
/// <summary>Load a classifier from a file or the default.</summary> /// <remarks> /// Load a classifier from a file or the default. /// The default is specified by passing in /// <see langword="null"/> /// . /// </remarks> public virtual void LoadClassifier(string resource) { try { if (resource != null) { classifier = CRFClassifier.GetClassifier(resource); } else { // default classifier in jar classifier = CRFClassifier.GetDefaultClassifier(); } } catch (Exception e) { // we catch Throwable, since we'd also like to be able to get an OutOfMemoryError string message; if (resource != null) { message = "Error loading classpath CRF: " + resource; } else { message = "Error loading default CRF"; } log.Info(message); string title = "CRF Load Error"; string msg = e.ToString(); if (msg != null) { message += '\n' + msg; } DisplayError(title, message); return; } RemoveTags(); BuildTagPanel(); // buildExtractButton(); extractButton.SetEnabled(true); extract.SetEnabled(true); }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.InvalidCastException"/> /// <exception cref="System.TypeLoadException"/> public NERClassifierCombiner(ObjectInputStream ois, Properties props) : base(ois, props) { // constructor which builds an NERClassifierCombiner from an ObjectInputStream // read the useSUTime from disk bool diskUseSUTime = ois.ReadBoolean(); if (props.GetProperty("ner.useSUTime") != null) { this.useSUTime = bool.Parse(props.GetProperty("ner.useSUTime")); } else { this.useSUTime = diskUseSUTime; } // read the applyNumericClassifiers from disk bool diskApplyNumericClassifiers = ois.ReadBoolean(); if (props.GetProperty("ner.applyNumericClassifiers") != null) { this.applyNumericClassifiers = bool.Parse(props.GetProperty("ner.applyNumericClassifiers")); } else { this.applyNumericClassifiers = diskApplyNumericClassifiers; } this.nerLanguage = NerLanguageDefault; // build the nsc, note that initProps should be set by ClassifierCombiner this.nsc = new NumberSequenceClassifier(new Properties(), useSUTime, props); if (PropertiesUtils.GetBool(props, NERClassifierCombiner.ApplyGazetteProperty, NERClassifierCombiner.ApplyGazetteDefault)) { this.gazetteMapping = ReadRegexnerGazette(DefaultPaths.DefaultNerGazetteMapping); } else { this.gazetteMapping = Java.Util.Collections.EmptyMap(); } }
public NumberAnnotator(string backgroundSymbol, bool verbose, bool useSUTime) { BackgroundSymbol = backgroundSymbol; Verbose = verbose; nsc = new NumberSequenceClassifier(useSUTime); }
/// <exception cref="System.Exception"/> public static void Main(string[] args) { string serializedClassifier = "classifiers/english.all.3class.distsim.crf.ser.gz"; if (args.Length > 0) { serializedClassifier = args[0]; } AbstractSequenceClassifier <CoreLabel> classifier = CRFClassifier.GetClassifier(serializedClassifier); /* For either a file to annotate or for the hardcoded text example, this * demo file shows several ways to process the input, for teaching purposes. */ if (args.Length > 1) { /* For the file, it shows (1) how to run NER on a String, (2) how * to get the entities in the String with character offsets, and * (3) how to run NER on a whole file (without loading it into a String). */ string fileContents = IOUtils.SlurpFile(args[1]); IList <IList <CoreLabel> > @out = classifier.Classify(fileContents); foreach (IList <CoreLabel> sentence in @out) { foreach (CoreLabel word in sentence) { System.Console.Out.Write(word.Word() + '/' + word.Get(typeof(CoreAnnotations.AnswerAnnotation)) + ' '); } System.Console.Out.WriteLine(); } System.Console.Out.WriteLine("---"); @out = classifier.ClassifyFile(args[1]); foreach (IList <CoreLabel> sentence_1 in @out) { foreach (CoreLabel word in sentence_1) { System.Console.Out.Write(word.Word() + '/' + word.Get(typeof(CoreAnnotations.AnswerAnnotation)) + ' '); } System.Console.Out.WriteLine(); } System.Console.Out.WriteLine("---"); IList <Triple <string, int, int> > list = classifier.ClassifyToCharacterOffsets(fileContents); foreach (Triple <string, int, int> item in list) { System.Console.Out.WriteLine(item.First() + ": " + Sharpen.Runtime.Substring(fileContents, item.Second(), item.Third())); } System.Console.Out.WriteLine("---"); System.Console.Out.WriteLine("Ten best entity labelings"); IDocumentReaderAndWriter <CoreLabel> readerAndWriter = classifier.MakePlainTextReaderAndWriter(); classifier.ClassifyAndWriteAnswersKBest(args[1], 10, readerAndWriter); System.Console.Out.WriteLine("---"); System.Console.Out.WriteLine("Per-token marginalized probabilities"); classifier.PrintProbs(args[1], readerAndWriter); } else { // -- This code prints out the first order (token pair) clique probabilities. // -- But that output is a bit overwhelming, so we leave it commented out by default. // System.out.println("---"); // System.out.println("First Order Clique Probabilities"); // ((CRFClassifier) classifier).printFirstOrderProbs(args[1], readerAndWriter); /* For the hard-coded String, it shows how to run it on a single * sentence, and how to do this and produce several formats, including * slash tags and an inline XML output format. It also shows the full * contents of the {@code CoreLabel}s that are constructed by the * classifier. And it shows getting out the probabilities of different * assignments and an n-best list of classifications with probabilities. */ string[] example = new string[] { "Good afternoon Rajat Raina, how are you today?", "I go to school at Stanford University, which is located in California." }; foreach (string str in example) { System.Console.Out.WriteLine(classifier.ClassifyToString(str)); } System.Console.Out.WriteLine("---"); foreach (string str_1 in example) { // This one puts in spaces and newlines between tokens, so just print not println. System.Console.Out.Write(classifier.ClassifyToString(str_1, "slashTags", false)); } System.Console.Out.WriteLine("---"); foreach (string str_2 in example) { // This one is best for dealing with the output as a TSV (tab-separated column) file. // The first column gives entities, the second their classes, and the third the remaining text in a document System.Console.Out.Write(classifier.ClassifyToString(str_2, "tabbedEntities", false)); } System.Console.Out.WriteLine("---"); foreach (string str_3 in example) { System.Console.Out.WriteLine(classifier.ClassifyWithInlineXML(str_3)); } System.Console.Out.WriteLine("---"); foreach (string str_4 in example) { System.Console.Out.WriteLine(classifier.ClassifyToString(str_4, "xml", true)); } System.Console.Out.WriteLine("---"); foreach (string str_5 in example) { System.Console.Out.Write(classifier.ClassifyToString(str_5, "tsv", false)); } System.Console.Out.WriteLine("---"); // This gets out entities with character offsets int j = 0; foreach (string str_6 in example) { j++; IList <Triple <string, int, int> > triples = classifier.ClassifyToCharacterOffsets(str_6); foreach (Triple <string, int, int> trip in triples) { System.Console.Out.Printf("%s over character offsets [%d, %d) in sentence %d.%n", trip.First(), trip.Second(), trip.third, j); } } System.Console.Out.WriteLine("---"); // This prints out all the details of what is stored for each token int i = 0; foreach (string str_7 in example) { foreach (IList <CoreLabel> lcl in classifier.Classify(str_7)) { foreach (CoreLabel cl in lcl) { System.Console.Out.Write(i++ + ": "); System.Console.Out.WriteLine(cl.ToShorterString()); } } } System.Console.Out.WriteLine("---"); } }
/// <summary> /// Train a Stanford NER model from a configuration file /// </summary> /// <param name="prop">Configuration file</param> public bool Train(string prop) { try { java.util.Properties props = new java.util.Properties(); InputStream st = new BufferedInputStream(new FileInputStream(prop)); InputStreamReader reader = new InputStreamReader(st, "utf-8"); props.load(reader); _crfModel = new CRFClassifier(props); _crfModel.train(); String serializeTo = _crfModel.flags.serializeTo; if (serializeTo != null) { _crfModel.serializeClassifier(serializeTo); } return true; } catch (Exception e) { System.Console.WriteLine("Unable to train the Standford CRF model" + e.ToString()); return false; } }
/// <summary> /// Create CRF model from the model file /// </summary> /// <param name="crfSerializedClassifier">The model file</param> /// <returns>If loaded successfully, returns true else false.</returns> public bool LoadModel(string crfSerializedClassifier) { try { _crfModel = CRFClassifier.getClassifierNoExceptions(crfSerializedClassifier); _isCRFModelLoaded = true; return true; } catch { System.Console.WriteLine("Uable to load the Stanford CRF Model... "); _isCRFModelLoaded = false; return false; } }