/// <exception cref="System.IO.IOException"/> public NERClassifierCombiner(bool applyNumericClassifiers, NERClassifierCombiner.Language nerLanguage, bool useSUTime, bool augmentRegexNER, Properties nscProps, params string[] loadPaths) : base(nscProps, ClassifierCombiner.ExtractCombinationModeSafe(nscProps), loadPaths) { // NOTE: nscProps may contains sutime props which will not be recognized by the SeqClassifierFlags this.applyNumericClassifiers = applyNumericClassifiers; this.nerLanguage = nerLanguage; this.useSUTime = useSUTime; // check for which language to use for number sequence classifier if (nerLanguage == NERClassifierCombiner.Language.Chinese) { this.nsc = new ChineseNumberSequenceClassifier(new Properties(), useSUTime, nscProps); } else { this.nsc = new NumberSequenceClassifier(new Properties(), useSUTime, nscProps); } if (augmentRegexNER) { this.gazetteMapping = ReadRegexnerGazette(DefaultPaths.DefaultNerGazetteMapping); } else { this.gazetteMapping = Java.Util.Collections.EmptyMap(); } }
/// <summary>Method for displaying info about an NERClassifierCombiner.</summary> public static void ShowNCCInfo(NERClassifierCombiner ncc) { log.Info(string.Empty); log.Info("info for this NERClassifierCombiner: "); ClassifierCombiner.ShowCCInfo(ncc); log.Info("useSUTime: " + ncc.useSUTime); log.Info("applyNumericClassifier: " + ncc.applyNumericClassifiers); log.Info(string.Empty); }
public virtual void RunTest(string[] firstInput, string[] secondInput, string[] expectedOutput, params string[] labels) { IList <CoreLabel> input1 = CoreUtilities.ToCoreLabelList(words, tags, firstInput); IList <CoreLabel> input2 = CoreUtilities.ToCoreLabelList(words, tags, secondInput); IList <CoreLabel> result = CoreUtilities.ToCoreLabelList(words, tags, expectedOutput); ICollection <string> auxLabels = new HashSet <string>(); foreach (string label in labels) { auxLabels.Add(label); } ClassifierCombiner.MergeTwoDocuments(input1, input2, auxLabels, "O"); NUnit.Framework.Assert.AreEqual(result, input1); }
public virtual void OutputResults(string[] firstInput, string[] secondInput, string[] expectedOutput, params string[] labels) { IList <CoreLabel> input1 = CoreUtilities.ToCoreLabelList(words, tags, firstInput); IList <CoreLabel> input2 = CoreUtilities.ToCoreLabelList(words, tags, secondInput); IList <CoreLabel> result = CoreUtilities.ToCoreLabelList(words, tags, expectedOutput); ICollection <string> auxLabels = new HashSet <string>(); foreach (string label in labels) { auxLabels.Add(label); } ClassifierCombiner.MergeTwoDocuments(input1, input2, auxLabels, "O"); foreach (CoreLabel word in input1) { System.Console.Out.WriteLine(word.Word() + " " + word.Tag() + " " + word.Get(typeof(CoreAnnotations.AnswerAnnotation))); } }
/// <summary>The main method.</summary> /// <exception cref="System.Exception"/> public static void Main(string[] args) { StringUtils.LogInvocationString(log, args); Properties props = StringUtils.ArgsToProperties(args); SeqClassifierFlags flags = new SeqClassifierFlags(props, false); // false for print probs as printed in next code block string loadPath = props.GetProperty("loadClassifier"); NERClassifierCombiner ncc; if (loadPath != null) { // note that when loading a serialized classifier, the philosophy is override // any settings in props with those given in the commandline // so if you dumped it with useSUTime = false, and you say -useSUTime at // the commandline, the commandline takes precedence ncc = ((NERClassifierCombiner)GetClassifier(loadPath, props)); } else { // pass null for passDownProperties to let all props go through ncc = CreateNERClassifierCombiner("ner", null, props); } // write the NERClassifierCombiner to the given path on disk string serializeTo = props.GetProperty("serializeTo"); if (serializeTo != null) { ncc.SerializeClassifier(serializeTo); } string textFile = props.GetProperty("textFile"); if (textFile != null) { ncc.ClassifyAndWriteAnswers(textFile); } // run on multiple textFiles , based off CRFClassifier code string textFiles = props.GetProperty("textFiles"); if (textFiles != null) { IList <File> files = new List <File>(); foreach (string filename in textFiles.Split(",")) { files.Add(new File(filename)); } ncc.ClassifyFilesAndWriteAnswers(files); } // options for run the NERClassifierCombiner on a testFile or testFiles string testFile = props.GetProperty("testFile"); string testFiles = props.GetProperty("testFiles"); string crfToExamine = props.GetProperty("crfToExamine"); IDocumentReaderAndWriter <CoreLabel> readerAndWriter = ncc.DefaultReaderAndWriter(); if (testFile != null || testFiles != null) { // check if there is not a crf specific request if (crfToExamine == null) { // in this case there is no crfToExamine if (testFile != null) { ncc.ClassifyAndWriteAnswers(testFile, readerAndWriter, true); } else { IList <File> files = Arrays.Stream(testFiles.Split(",")).Map(null).Collect(Collectors.ToList()); ncc.ClassifyFilesAndWriteAnswers(files, ncc.DefaultReaderAndWriter(), true); } } else { ClassifierCombiner.ExamineCRF(ncc, crfToExamine, flags, testFile, testFiles, readerAndWriter); } } // option for showing info about the NERClassifierCombiner string showNCCInfo = props.GetProperty("showNCCInfo"); if (showNCCInfo != null) { ShowNCCInfo(ncc); } // option for reading in from stdin if (flags.readStdin) { ncc.ClassifyStdin(); } }