/// <exception cref="System.IO.IOException"/> /// <exception cref="System.TypeLoadException"/> public static void Main(string[] args) { RedwoodConfiguration.Standard().Apply(); // Disable SLF4J crap. ArgumentParser.FillOptions(typeof(Edu.Stanford.Nlp.IE.KBPEnsembleExtractor), args); object @object = IOUtils.ReadObjectFromURLOrClasspathOrFileSystem(StatisticalModel); IKBPRelationExtractor statisticalExtractor; if (@object is LinearClassifier) { //noinspection unchecked statisticalExtractor = new KBPStatisticalExtractor((IClassifier <string, string>)@object); } else { if (@object is KBPStatisticalExtractor) { statisticalExtractor = (KBPStatisticalExtractor)@object; } else { throw new InvalidCastException(@object.GetType() + " cannot be cast into a " + typeof(KBPStatisticalExtractor)); } } logger.Info("Read statistical model from " + StatisticalModel); IKBPRelationExtractor extractor = new Edu.Stanford.Nlp.IE.KBPEnsembleExtractor(new KBPTokensregexExtractor(TokensregexDir), new KBPSemgrexExtractor(SemgrexDir), statisticalExtractor); IList <Pair <KBPRelationExtractor.KBPInput, string> > testExamples = IKBPRelationExtractor.ReadDataset(TestFile); extractor.ComputeAccuracy(testExamples.Stream(), Predictions.Map(null)); }
/// <exception cref="System.IO.IOException"/> public static void Main(string[] args) { RedwoodConfiguration.Standard().Apply(); // Disable SLF4J crap. ArgumentParser.FillOptions(typeof(Edu.Stanford.Nlp.IE.KBPSemgrexExtractor), args); Edu.Stanford.Nlp.IE.KBPSemgrexExtractor extractor = new Edu.Stanford.Nlp.IE.KBPSemgrexExtractor(Dir); IList <Pair <KBPRelationExtractor.KBPInput, string> > testExamples = IKBPRelationExtractor.ReadDataset(TestFile); extractor.ComputeAccuracy(testExamples.Stream(), Predictions.Map(null)); }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.TypeLoadException"/> public static void Main(string[] args) { RedwoodConfiguration.Standard().Apply(); // Disable SLF4J crap. ArgumentParser.FillOptions(typeof(Edu.Stanford.Nlp.IE.KBPStatisticalExtractor), args); // Fill command-line options // Load the test (or dev) data Redwood.Util.ForceTrack("Test data"); IList <Pair <KBPRelationExtractor.KBPInput, string> > testExamples = IKBPRelationExtractor.ReadDataset(TestFile); log.Info("Read " + testExamples.Count + " examples"); Redwood.Util.EndTrack("Test data"); // If we can't find an existing model, train one if (!IOUtils.ExistsInClasspathOrFileSystem(ModelFile)) { Redwood.Util.ForceTrack("Training data"); IList <Pair <KBPRelationExtractor.KBPInput, string> > trainExamples = IKBPRelationExtractor.ReadDataset(TrainFile); log.Info("Read " + trainExamples.Count + " examples"); log.Info(string.Empty + trainExamples.Stream().Map(null).Filter(null).Count() + " are " + KBPRelationExtractorConstants.NoRelation); Redwood.Util.EndTrack("Training data"); // Featurize + create the dataset Redwood.Util.ForceTrack("Creating dataset"); RVFDataset <string, string> dataset = new RVFDataset <string, string>(); AtomicInteger i = new AtomicInteger(0); long beginTime = Runtime.CurrentTimeMillis(); trainExamples.Stream().Parallel().ForEach(null); // This takes a while per example trainExamples.Clear(); // Free up some memory Redwood.Util.EndTrack("Creating dataset"); // Train the classifier log.Info("Training classifier:"); IClassifier <string, string> classifier = TrainMultinomialClassifier(dataset, FeatureThreshold, Sigma); dataset.Clear(); // Free up some memory // Save the classifier IOUtils.WriteObjectToFile(new Edu.Stanford.Nlp.IE.KBPStatisticalExtractor(classifier), ModelFile); } // Read either a newly-trained or pre-trained model object model = IOUtils.ReadObjectFromURLOrClasspathOrFileSystem(ModelFile); Edu.Stanford.Nlp.IE.KBPStatisticalExtractor classifier_1; if (model is IClassifier) { //noinspection unchecked classifier_1 = new Edu.Stanford.Nlp.IE.KBPStatisticalExtractor((IClassifier <string, string>)model); } else { classifier_1 = ((Edu.Stanford.Nlp.IE.KBPStatisticalExtractor)model); } // Evaluate the model classifier_1.ComputeAccuracy(testExamples.Stream(), Predictions.Map(null)); }
/// <summary>Create a new KBP annotator from the given properties.</summary> /// <param name="props">The properties to use when creating this extractor.</param> public KBPAnnotator(string name, Properties props) { //@ArgumentParser.Option(name="kbp.language", gloss="language for kbp") //private String language = "english"; /* * A TokensRegexNER annotator for the special KBP NER types (case-sensitive). */ //private final TokensRegexNERAnnotator casedNER; /* * A TokensRegexNER annotator for the special KBP NER types (case insensitive). */ //private final TokensRegexNERAnnotator caselessNER; // Parse standard properties ArgumentParser.FillOptions(this, name, props); //Locale kbpLanguage = //(language.toLowerCase().equals("zh") || language.toLowerCase().equals("chinese")) ? //Locale.CHINESE : Locale.ENGLISH ; kbpProperties = props; try { List <IKBPRelationExtractor> extractors = new List <IKBPRelationExtractor>(); // add tokensregex rules if (!tokensregexdir.Equals(NotProvided)) { extractors.Add(new KBPTokensregexExtractor(tokensregexdir, Verbose)); } // add semgrex rules if (!semgrexdir.Equals(NotProvided)) { extractors.Add(new KBPSemgrexExtractor(semgrexdir, Verbose)); } // attempt to add statistical model if (!model.Equals(NotProvided)) { log.Info("Loading KBP classifier from: " + model); object @object = IOUtils.ReadObjectFromURLOrClasspathOrFileSystem(model); IKBPRelationExtractor statisticalExtractor; if (@object is LinearClassifier) { //noinspection unchecked statisticalExtractor = new KBPStatisticalExtractor((IClassifier <string, string>)@object); } else { if (@object is KBPStatisticalExtractor) { statisticalExtractor = (KBPStatisticalExtractor)@object; } else { throw new InvalidCastException(@object.GetType() + " cannot be cast into a " + typeof(KBPStatisticalExtractor)); } } extractors.Add(statisticalExtractor); } // build extractor this.extractor = new KBPEnsembleExtractor(Sharpen.Collections.ToArray(extractors, new IKBPRelationExtractor[extractors.Count])); // set maximum length of sentence to operate on maxLength = System.Convert.ToInt32(props.GetProperty("kbp.maxlen", "-1")); } catch (Exception e) { throw new RuntimeIOException(e); } // set up map for converting between older and new KBP relation names relationNameConversionMap = new Dictionary <string, string>(); relationNameConversionMap["org:dissolved"] = "org:date_dissolved"; relationNameConversionMap["org:founded"] = "org:date_founded"; relationNameConversionMap["org:number_of_employees/members"] = "org:number_of_employees_members"; relationNameConversionMap["org:political/religious_affiliation"] = "org:political_religious_affiliation"; relationNameConversionMap["org:top_members/employees"] = "org:top_members_employees"; relationNameConversionMap["per:member_of"] = "per:employee_or_member_of"; relationNameConversionMap["per:employee_of"] = "per:employee_or_member_of"; relationNameConversionMap["per:stateorprovinces_of_residence"] = "per:statesorprovinces_of_residence"; // set up KBP language kbpLanguage = LanguageInfo.GetLanguageFromString(props.GetProperty("kbp.language", "en")); // build the Spanish coref system if necessary if (LanguageInfo.HumanLanguage.Spanish.Equals(kbpLanguage)) { spanishCorefSystem = new KBPBasicSpanishCorefSystem(); } }