public ParserAnnotator(string annotatorName, Properties props) { string model = props.GetProperty(annotatorName + ".model", LexicalizedParser.DefaultParserLoc); if (model == null) { throw new ArgumentException("No model specified for Parser annotator " + annotatorName); } this.Verbose = PropertiesUtils.GetBool(props, annotatorName + ".debug", false); string[] flags = ConvertFlagsToArray(props.GetProperty(annotatorName + ".flags")); this.parser = LoadModel(model, Verbose, flags); this.maxSentenceLength = PropertiesUtils.GetInt(props, annotatorName + ".maxlen", -1); string treeMapClass = props.GetProperty(annotatorName + ".treemap"); if (treeMapClass == null) { this.treeMap = null; } else { this.treeMap = ReflectionLoading.LoadByReflection(treeMapClass, props); } this.maxParseTime = PropertiesUtils.GetLong(props, annotatorName + ".maxtime", -1); this.kBest = PropertiesUtils.GetInt(props, annotatorName + ".kbest", 1); this.keepPunct = PropertiesUtils.GetBool(props, annotatorName + ".keepPunct", true); string buildGraphsProperty = annotatorName + ".buildgraphs"; if (!this.parser.GetTLPParams().SupportsBasicDependencies()) { if (PropertiesUtils.GetBool(props, buildGraphsProperty)) { log.Info("WARNING: " + buildGraphsProperty + " set to true, but " + this.parser.GetTLPParams().GetType() + " does not support dependencies"); } this.BuildGraphs = false; } else { this.BuildGraphs = PropertiesUtils.GetBool(props, buildGraphsProperty, true); } if (this.BuildGraphs) { bool generateOriginalDependencies = PropertiesUtils.GetBool(props, annotatorName + ".originalDependencies", false); parser.GetTLPParams().SetGenerateOriginalDependencies(generateOriginalDependencies); ITreebankLanguagePack tlp = parser.GetTLPParams().TreebankLanguagePack(); IPredicate <string> punctFilter = this.keepPunct ? Filters.AcceptFilter() : tlp.PunctuationWordRejectFilter(); this.gsf = tlp.GrammaticalStructureFactory(punctFilter, parser.GetTLPParams().TypedDependencyHeadFinder()); } else { this.gsf = null; } this.nThreads = PropertiesUtils.GetInt(props, annotatorName + ".nthreads", PropertiesUtils.GetInt(props, "nthreads", 1)); bool usesBinary = StanfordCoreNLP.UsesBinaryTrees(props); this.saveBinaryTrees = PropertiesUtils.GetBool(props, annotatorName + ".binaryTrees", usesBinary); this.noSquash = PropertiesUtils.GetBool(props, annotatorName + ".nosquash", false); this.extraDependencies = MetaClass.Cast(props.GetProperty(annotatorName + ".extradependencies", "NONE"), typeof(GrammaticalStructure.Extras)); }
public Options(string name, Properties props) { includeRange = PropertiesUtils.GetBool(props, name + ".includeRange", includeRange); markTimeRanges = PropertiesUtils.GetBool(props, name + ".markTimeRanges", markTimeRanges); includeNested = PropertiesUtils.GetBool(props, name + ".includeNested", includeNested); restrictToTimex3 = PropertiesUtils.GetBool(props, name + ".restrictToTimex3", restrictToTimex3); teRelHeurLevel = Options.RelativeHeuristicLevel.ValueOf(props.GetProperty(name + ".teRelHeurLevel", teRelHeurLevel.ToString())); verbose = PropertiesUtils.GetBool(props, name + ".verbose", verbose); // set default rules by SUTime language language = props.GetProperty(name + ".language", language); if (!languageToRulesFiles.Keys.Contains(language)) { language = "english"; } grammarFilename = languageToRulesFiles[language]; // override if rules are set by properties grammarFilename = props.GetProperty(name + ".rules", grammarFilename); searchForDocDate = PropertiesUtils.GetBool(props, name + ".searchForDocDate", searchForDocDate); string binderProperty = props.GetProperty(name + ".binders"); int nBinders; string[] binderClasses; if (binderProperty == null) { nBinders = DefaultBinders.Length; binderClasses = DefaultBinders; } else { nBinders = PropertiesUtils.GetInt(props, name + ".binders", 0); binderClasses = new string[nBinders]; for (int i = 0; i < nBinders; ++i) { string binderPrefix = name + ".binder." + (i + 1); binderClasses[i] = props.GetProperty(binderPrefix); } } if (nBinders > 0 && Runtime.GetProperty("STS") == null) { binders = new Env.IBinder[nBinders]; for (int i = 0; i < nBinders; i++) { int bi = i + 1; string binderPrefix = name + ".binder." + bi; try { Type binderClass = Sharpen.Runtime.GetType(binderClasses[i]); binderPrefix = binderPrefix + "."; binders[i] = (Env.IBinder)System.Activator.CreateInstance(binderClass); binders[i].Init(binderPrefix, props); } catch (Exception ex) { throw new Exception("Error initializing binder " + bi, ex); } } } }
public DependencyParseAnnotator(Properties properties) { string modelPath = PropertiesUtils.GetString(properties, "model", DependencyParser.DefaultModel); parser = DependencyParser.LoadFromModelFile(modelPath, properties); nThreads = PropertiesUtils.GetInt(properties, "testThreads", DefaultNthreads); maxTime = PropertiesUtils.GetLong(properties, "sentenceTimeout", DefaultMaxtime); extraDependencies = MetaClass.Cast(properties.GetProperty("extradependencies", "NONE"), typeof(GrammaticalStructure.Extras)); }
public POSTaggerAnnotator(string annotatorName, Properties props) { string posLoc = props.GetProperty(annotatorName + ".model"); if (posLoc == null) { posLoc = DefaultPaths.DefaultPosModel; } bool verbose = PropertiesUtils.GetBool(props, annotatorName + ".verbose", false); this.pos = LoadModel(posLoc, verbose); this.maxSentenceLength = PropertiesUtils.GetInt(props, annotatorName + ".maxlen", int.MaxValue); this.nThreads = PropertiesUtils.GetInt(props, annotatorName + ".nthreads", PropertiesUtils.GetInt(props, "nthreads", 1)); this.reuseTags = PropertiesUtils.GetBool(props, annotatorName + ".reuseTags", false); }
private void SetProperties(Properties props) { trainingThreads = PropertiesUtils.GetInt(props, "trainingThreads", trainingThreads); wordCutOff = PropertiesUtils.GetInt(props, "wordCutOff", wordCutOff); initRange = PropertiesUtils.GetDouble(props, "initRange", initRange); maxIter = PropertiesUtils.GetInt(props, "maxIter", maxIter); batchSize = PropertiesUtils.GetInt(props, "batchSize", batchSize); adaEps = PropertiesUtils.GetDouble(props, "adaEps", adaEps); adaAlpha = PropertiesUtils.GetDouble(props, "adaAlpha", adaAlpha); regParameter = PropertiesUtils.GetDouble(props, "regParameter", regParameter); dropProb = PropertiesUtils.GetDouble(props, "dropProb", dropProb); hiddenSize = PropertiesUtils.GetInt(props, "hiddenSize", hiddenSize); embeddingSize = PropertiesUtils.GetInt(props, "embeddingSize", embeddingSize); numPreComputed = PropertiesUtils.GetInt(props, "numPreComputed", numPreComputed); evalPerIter = PropertiesUtils.GetInt(props, "evalPerIter", evalPerIter); clearGradientsPerIter = PropertiesUtils.GetInt(props, "clearGradientsPerIter", clearGradientsPerIter); saveIntermediate = PropertiesUtils.GetBool(props, "saveIntermediate", saveIntermediate); unlabeled = PropertiesUtils.GetBool(props, "unlabeled", unlabeled); cPOS = PropertiesUtils.GetBool(props, "cPOS", cPOS); noPunc = PropertiesUtils.GetBool(props, "noPunc", noPunc); doWordEmbeddingGradUpdate = PropertiesUtils.GetBool(props, "doWordEmbeddingGradUpdate", doWordEmbeddingGradUpdate); // Runtime parsing options sentenceDelimiter = PropertiesUtils.GetString(props, "sentenceDelimiter", sentenceDelimiter); tagger = PropertiesUtils.GetString(props, "tagger.model", tagger); string escaperClass = props.GetProperty("escaper"); escaper = escaperClass != null?ReflectionLoading.LoadByReflection(escaperClass) : null; // Language options language = props.Contains("language") ? GetLanguage(props.GetProperty("language")) : language; tlp = [email protected](); // if a tlp was specified go with that string tlpCanonicalName = props.GetProperty("tlp"); if (tlpCanonicalName != null) { try { tlp = ReflectionLoading.LoadByReflection(tlpCanonicalName); System.Console.Error.WriteLine("Loaded TreebankLanguagePack: " + tlpCanonicalName); } catch (Exception) { System.Console.Error.WriteLine("Error: Failed to load TreebankLanguagePack: " + tlpCanonicalName); } } }
/// <param name="args"/> public static void Main(string[] args) { // Strips off hyphens Properties options = StringUtils.ArgsToProperties(args, OptionArgDefs()); if (options.Contains("help") || args.Length == 0) { log.Info(Usage()); System.Environment.Exit(-1); } int nThreads = PropertiesUtils.GetInt(options, "nthreads", 1); Edu.Stanford.Nlp.International.Arabic.Process.ArabicSegmenter segmenter = GetSegmenter(options); // Decode either an evaluation file or raw text try { PrintWriter pwOut; if (segmenter.flags.outputEncoding != null) { OutputStreamWriter @out = new OutputStreamWriter(System.Console.Out, segmenter.flags.outputEncoding); pwOut = new PrintWriter(@out, true); } else { if (segmenter.flags.inputEncoding != null) { OutputStreamWriter @out = new OutputStreamWriter(System.Console.Out, segmenter.flags.inputEncoding); pwOut = new PrintWriter(@out, true); } else { pwOut = new PrintWriter(System.Console.Out, true); } } if (segmenter.flags.testFile != null) { if (segmenter.flags.answerFile == null) { segmenter.Evaluate(pwOut); } else { Edu.Stanford.Nlp.International.Arabic.Process.ArabicSegmenter.EvaluateRawText(pwOut); } } else { BufferedReader br = (segmenter.flags.textFile == null) ? IOUtils.ReaderFromStdin() : IOUtils.ReaderFromString(segmenter.flags.textFile, segmenter.flags.inputEncoding); double charsPerSec = Decode(segmenter, br, pwOut, nThreads); IOUtils.CloseIgnoringExceptions(br); System.Console.Error.Printf("Done! Processed input text at %.2f input characters/second%n", charsPerSec); } } catch (UnsupportedEncodingException e) { Sharpen.Runtime.PrintStackTrace(e); } catch (IOException) { System.Console.Error.Printf("%s: Could not open %s%n", typeof(Edu.Stanford.Nlp.International.Arabic.Process.ArabicSegmenter).FullName, segmenter.flags.textFile); } }
/// <summary>Run the Evalb scoring metric on guess/gold input.</summary> /// <remarks>Run the Evalb scoring metric on guess/gold input. The default language is English.</remarks> /// <param name="args"/> public static void Main(string[] args) { if (args.Length < minArgs) { log.Info(Usage()); System.Environment.Exit(-1); } Properties options = StringUtils.ArgsToProperties(args, OptionArgDefs()); Language language = PropertiesUtils.Get(options, "l", Language.English, typeof(Language)); ITreebankLangParserParams tlpp = language.@params; int maxGoldYield = PropertiesUtils.GetInt(options, "y", int.MaxValue); bool Verbose = PropertiesUtils.GetBool(options, "v", false); bool sortByF1 = PropertiesUtils.HasProperty(options, "s"); int worstKTreesToEmit = PropertiesUtils.GetInt(options, "s", 0); PriorityQueue <Triple <double, Tree, Tree> > queue = sortByF1 ? new PriorityQueue <Triple <double, Tree, Tree> >(2000, new Evalb.F1Comparator()) : null; bool doCatLevel = PropertiesUtils.GetBool(options, "c", false); string labelRegex = options.GetProperty("f", null); string encoding = options.GetProperty("e", "UTF-8"); string[] parsedArgs = options.GetProperty(string.Empty, string.Empty).Split("\\s+"); if (parsedArgs.Length != minArgs) { log.Info(Usage()); System.Environment.Exit(-1); } string goldFile = parsedArgs[0]; string guessFile = parsedArgs[1]; // Command-line has been parsed. Configure the metric for evaluation. tlpp.SetInputEncoding(encoding); PrintWriter pwOut = tlpp.Pw(); Treebank guessTreebank = tlpp.DiskTreebank(); guessTreebank.LoadPath(guessFile); pwOut.Println("GUESS TREEBANK:"); pwOut.Println(guessTreebank.TextualSummary()); Treebank goldTreebank = tlpp.DiskTreebank(); goldTreebank.LoadPath(goldFile); pwOut.Println("GOLD TREEBANK:"); pwOut.Println(goldTreebank.TextualSummary()); Evalb metric = new Evalb("Evalb LP/LR", true); EvalbByCat evalbCat = (doCatLevel) ? new EvalbByCat("EvalbByCat LP/LR", true, labelRegex) : null; ITreeTransformer tc = tlpp.Collinizer(); //The evalb ref implementation assigns status for each tree pair as follows: // // 0 - Ok (yields match) // 1 - length mismatch // 2 - null parse e.g. (()). // //In the cases of 1,2, evalb does not include the tree pair in the LP/LR computation. IEnumerator <Tree> goldItr = goldTreebank.GetEnumerator(); IEnumerator <Tree> guessItr = guessTreebank.GetEnumerator(); int goldLineId = 0; int guessLineId = 0; int skippedGuessTrees = 0; while (guessItr.MoveNext() && goldItr.MoveNext()) { Tree guessTree = guessItr.Current; IList <ILabel> guessYield = guessTree.Yield(); guessLineId++; Tree goldTree = goldItr.Current; IList <ILabel> goldYield = goldTree.Yield(); goldLineId++; // Check that we should evaluate this tree if (goldYield.Count > maxGoldYield) { skippedGuessTrees++; continue; } // Only trees with equal yields can be evaluated if (goldYield.Count != guessYield.Count) { pwOut.Printf("Yield mismatch gold: %d tokens vs. guess: %d tokens (lines: gold %d guess %d)%n", goldYield.Count, guessYield.Count, goldLineId, guessLineId); skippedGuessTrees++; continue; } Tree evalGuess = tc.TransformTree(guessTree); Tree evalGold = tc.TransformTree(goldTree); metric.Evaluate(evalGuess, evalGold, ((Verbose) ? pwOut : null)); if (doCatLevel) { evalbCat.Evaluate(evalGuess, evalGold, ((Verbose) ? pwOut : null)); } if (sortByF1) { StoreTrees(queue, guessTree, goldTree, metric.GetLastF1()); } } if (guessItr.MoveNext() || goldItr.MoveNext()) { System.Console.Error.Printf("Guess/gold files do not have equal lengths (guess: %d gold: %d)%n.", guessLineId, goldLineId); } pwOut.Println("================================================================================"); if (skippedGuessTrees != 0) { pwOut.Printf("%s %d guess trees\n", "Unable to evaluate", skippedGuessTrees); } metric.Display(true, pwOut); pwOut.Println(); if (doCatLevel) { evalbCat.Display(true, pwOut); pwOut.Println(); } if (sortByF1) { EmitSortedTrees(queue, worstKTreesToEmit, guessFile); } pwOut.Close(); }
public static int GetMaxSentDistForSieve(Properties props, string sievename) { return(PropertiesUtils.GetInt(props, MaxSentDistProp.Replace("SIEVENAME", sievename), 1000)); }
public static int GetFeatureCountThreshold(Properties props, string sievename) { return(PropertiesUtils.GetInt(props, ThresFeaturecountProp.Replace("SIEVENAME", sievename), 20)); }
public static int GetNumFeatures(Properties props, string sievename) { return(PropertiesUtils.GetInt(props, NumFeaturesProp.Replace("SIEVENAME", sievename), 30)); }
public static int GetTreeDepth(Properties props, string sievename) { return(PropertiesUtils.GetInt(props, TreeDepthProp.Replace("SIEVENAME", sievename), 0)); }
public static void Main(string[] args) { if (args.Length < minArgs) { System.Console.Out.WriteLine(usage); System.Environment.Exit(-1); } // Process command-line options Properties options = StringUtils.ArgsToProperties(args, optionArgDefinitions); string fileName = options.GetProperty(string.Empty); if (fileName == null || fileName.Equals(string.Empty)) { System.Console.Out.WriteLine(usage); System.Environment.Exit(-1); } int maxLen = PropertiesUtils.GetInt(options, "y", int.MaxValue); bool printTrees = PropertiesUtils.GetBool(options, "p", false); bool flattenTrees = PropertiesUtils.GetBool(options, "f", false); bool printPOS = PropertiesUtils.GetBool(options, "a", false); bool printTnT = PropertiesUtils.GetBool(options, "t", false); Language language = PropertiesUtils.Get(options, "l", Language.English, typeof(Language)); ITreebankLangParserParams tlpp = language.@params; string encoding = options.GetProperty("e", "UTF-8"); tlpp.SetInputEncoding(encoding); tlpp.SetOutputEncoding(encoding); DiskTreebank tb = tlpp.DiskTreebank(); tb.LoadPath(fileName); // Read the treebank PrintWriter pw = tlpp.Pw(); int numTrees = 0; foreach (Tree tree in tb) { if (tree.Yield().Count > maxLen) { continue; } ++numTrees; if (printTrees) { pw.Println(tree.ToString()); } else { if (flattenTrees) { pw.Println(SentenceUtils.ListToString(tree.Yield())); } else { if (printPOS) { pw.Println(SentenceUtils.ListToString(tree.PreTerminalYield())); } else { if (printTnT) { IList <CoreLabel> yield = tree.TaggedLabeledYield(); foreach (CoreLabel label in yield) { pw.Printf("%s\t%s%n", label.Word(), label.Tag()); } pw.Println(); } } } } } System.Console.Error.Printf("Read %d trees.%n", numTrees); }
public static int GetSeed(Properties props) { return(PropertiesUtils.GetInt(props, SeedProp, 1)); }
// ---------- Heuristic Mention Filtering ---------- public static int MaxMentionDistance(Properties props) { return(PropertiesUtils.GetInt(props, "coref.maxMentionDistance", Conll(props) ? int.MaxValue : 50)); }
/// <param name="args"/> public static void Main(string[] args) { if (args.Length < MinArgs) { log.Info(Usage()); System.Environment.Exit(-1); } Properties options = StringUtils.ArgsToProperties(args, OptionArgDefs()); bool Verbose = PropertiesUtils.GetBool(options, "v", false); File testTreebank = options.Contains("t") ? new File(options.GetProperty("t")) : null; int maxGoldSentLen = PropertiesUtils.GetInt(options, "l", int.MaxValue); bool SerInput = PropertiesUtils.GetBool(options, "o", false); string[] parsedArgs = options.GetProperty(string.Empty, string.Empty).Split("\\s+"); if (parsedArgs.Length != MinArgs) { log.Info(Usage()); System.Environment.Exit(-1); } File trainTreebank = new File(parsedArgs[0]); DateTime startTime = new DateTime(); log.Info("###################################"); log.Info("### Joint Segmentation / Parser ###"); log.Info("###################################"); System.Console.Error.Printf("Start time: %s\n", startTime); JointParsingModel parsingModel = new JointParsingModel(); parsingModel.SetVerbose(Verbose); parsingModel.SetMaxEvalSentLen(maxGoldSentLen); parsingModel.SetSerInput(SerInput); //WSGDEBUG -- Some stuff for eclipse debugging InputStream inputStream = null; try { if (Runtime.GetProperty("eclipse") == null) { inputStream = (SerInput) ? new ObjectInputStream(new GZIPInputStream(Runtime.@in)) : Runtime.@in; } else { FileInputStream fileStream = new FileInputStream(new File("debug.2.xml")); inputStream = (SerInput) ? new ObjectInputStream(new GZIPInputStream(fileStream)) : fileStream; } } catch (IOException e) { Sharpen.Runtime.PrintStackTrace(e); System.Environment.Exit(-1); } finally { if (inputStream != null) { try { inputStream.Close(); } catch (IOException) { } } } if (!trainTreebank.Exists()) { log.Info("Training treebank does not exist!\n " + trainTreebank.GetPath()); } else { if (testTreebank != null && !testTreebank.Exists()) { log.Info("Test treebank does not exist!\n " + testTreebank.GetPath()); } else { if (parsingModel.Run(trainTreebank, testTreebank, inputStream)) { log.Info("Successful shutdown!"); } else { log.Error("Parsing model failure."); } } } DateTime stopTime = new DateTime(); long elapsedTime = stopTime.GetTime() - startTime.GetTime(); log.Info(); log.Info(); System.Console.Error.Printf("Completed processing at %s\n", stopTime); System.Console.Error.Printf("Elapsed time: %d seconds\n", (int)(elapsedTime / 1000F)); }
/// <exception cref="System.IO.IOException"/> public NERCombinerAnnotator(Properties properties) { IList <string> models = new List <string>(); string modelNames = properties.GetProperty("ner.model"); if (modelNames == null) { modelNames = DefaultPaths.DefaultNerThreeclassModel + ',' + DefaultPaths.DefaultNerMucModel + ',' + DefaultPaths.DefaultNerConllModel; } if (!modelNames.IsEmpty()) { Sharpen.Collections.AddAll(models, Arrays.AsList(modelNames.Split(","))); } if (models.IsEmpty()) { // Allow for no real NER model - can just use numeric classifiers or SUTime. // Have to unset ner.model, so unlikely that people got here by accident. log.Info("WARNING: no NER models specified"); } bool applyNumericClassifiers = PropertiesUtils.GetBool(properties, NERClassifierCombiner.ApplyNumericClassifiersProperty, NERClassifierCombiner.ApplyNumericClassifiersDefault); bool applyRegexner = PropertiesUtils.GetBool(properties, NERClassifierCombiner.ApplyGazetteProperty, NERClassifierCombiner.ApplyGazetteDefault); bool useSUTime = PropertiesUtils.GetBool(properties, NumberSequenceClassifier.UseSutimeProperty, NumberSequenceClassifier.UseSutimeDefault); // option for setting doc date to be the present during each annotation usePresentDateForDocDate = PropertiesUtils.GetBool(properties, "ner." + "usePresentDateForDocDate", false); // option for setting doc date from a provided string providedDocDate = PropertiesUtils.GetString(properties, "ner." + "providedDocDate", string.Empty); Pattern p = Pattern.Compile("[0-9]{4}\\-[0-9]{2}\\-[0-9]{2}"); Matcher m = p.Matcher(providedDocDate); if (!m.Matches()) { providedDocDate = string.Empty; } NERClassifierCombiner.Language nerLanguage = NERClassifierCombiner.Language.FromString(PropertiesUtils.GetString(properties, NERClassifierCombiner.NerLanguageProperty, null), NERClassifierCombiner.NerLanguageDefault); bool verbose = PropertiesUtils.GetBool(properties, "ner." + "verbose", false); string[] loadPaths = Sharpen.Collections.ToArray(models, new string[models.Count]); Properties combinerProperties = PropertiesUtils.ExtractSelectedProperties(properties, NERClassifierCombiner.DefaultPassDownProperties); if (useSUTime) { // Make sure SUTime parameters are included Properties sutimeProps = PropertiesUtils.ExtractPrefixedProperties(properties, NumberSequenceClassifier.SutimeProperty + '.', true); PropertiesUtils.OverWriteProperties(combinerProperties, sutimeProps); } NERClassifierCombiner nerCombiner = new NERClassifierCombiner(applyNumericClassifiers, nerLanguage, useSUTime, applyRegexner, combinerProperties, loadPaths); this.nThreads = PropertiesUtils.GetInt(properties, "ner.nthreads", PropertiesUtils.GetInt(properties, "nthreads", 1)); this.maxTime = PropertiesUtils.GetLong(properties, "ner.maxtime", 0); this.maxSentenceLength = PropertiesUtils.GetInt(properties, "ner.maxlen", int.MaxValue); this.language = LanguageInfo.GetLanguageFromString(PropertiesUtils.GetString(properties, "ner.language", "en")); // in case of Spanish, use the Spanish number regexner annotator if (language.Equals(LanguageInfo.HumanLanguage.Spanish)) { Properties spanishNumberRegexNerProperties = new Properties(); spanishNumberRegexNerProperties["spanish.number.regexner.mapping"] = spanishNumberRegexRules; spanishNumberRegexNerProperties["spanish.number.regexner.validpospattern"] = "^(NUM).*"; spanishNumberRegexNerProperties["spanish.number.regexner.ignorecase"] = "true"; spanishNumberAnnotator = new TokensRegexNERAnnotator("spanish.number.regexner", spanishNumberRegexNerProperties); } // set up fine grained ner SetUpFineGrainedNER(properties); // set up additional rules ner SetUpAdditionalRulesNER(properties); // set up entity mentions SetUpEntityMentionBuilding(properties); Verbose = verbose; this.ner = nerCombiner; }
/// <param name="args"/> public static void Main(string[] args) { if (args.Length < MinArgs) { log.Info(Usage()); System.Environment.Exit(-1); } Properties options = StringUtils.ArgsToProperties(args, OptionArgDefs()); bool Verbose = PropertiesUtils.GetBool(options, "v", false); Language Language = PropertiesUtils.Get(options, "l", Language.English, typeof(Language)); int MaxGoldYield = PropertiesUtils.GetInt(options, "g", int.MaxValue); int MaxGuessYield = PropertiesUtils.GetInt(options, "y", int.MaxValue); string[] parsedArgs = options.GetProperty(string.Empty, string.Empty).Split("\\s+"); if (parsedArgs.Length != MinArgs) { log.Info(Usage()); System.Environment.Exit(-1); } File goldFile = new File(parsedArgs[0]); File guessFile = new File(parsedArgs[1]); ITreebankLangParserParams tlpp = Language.@params; PrintWriter pwOut = tlpp.Pw(); Treebank guessTreebank = tlpp.DiskTreebank(); guessTreebank.LoadPath(guessFile); pwOut.Println("GUESS TREEBANK:"); pwOut.Println(guessTreebank.TextualSummary()); Treebank goldTreebank = tlpp.DiskTreebank(); goldTreebank.LoadPath(goldFile); pwOut.Println("GOLD TREEBANK:"); pwOut.Println(goldTreebank.TextualSummary()); Edu.Stanford.Nlp.Parser.Metrics.CollinsDepEval depEval = new Edu.Stanford.Nlp.Parser.Metrics.CollinsDepEval("CollinsDep", true, tlpp.HeadFinder(), tlpp.TreebankLanguagePack().StartSymbol()); ITreeTransformer tc = tlpp.Collinizer(); //PennTreeReader skips over null/malformed parses. So when the yields of the gold/guess trees //don't match, we need to keep looking for the next gold tree that matches. //The evalb ref implementation differs slightly as it expects one tree per line. It assigns //status as follows: // // 0 - Ok (yields match) // 1 - length mismatch // 2 - null parse e.g. (()). // //In the cases of 1,2, evalb does not include the tree pair in the LP/LR computation. IEnumerator <Tree> goldItr = goldTreebank.GetEnumerator(); int goldLineId = 0; int skippedGuessTrees = 0; foreach (Tree guess in guessTreebank) { Tree evalGuess = tc.TransformTree(guess); if (guess.Yield().Count > MaxGuessYield) { skippedGuessTrees++; continue; } bool doneEval = false; while (goldItr.MoveNext() && !doneEval) { Tree gold = goldItr.Current; Tree evalGold = tc.TransformTree(gold); goldLineId++; if (gold.Yield().Count > MaxGoldYield) { continue; } else { if (evalGold.Yield().Count != evalGuess.Yield().Count) { pwOut.Println("Yield mismatch at gold line " + goldLineId); skippedGuessTrees++; break; } } //Default evalb behavior -- skip this guess tree depEval.Evaluate(evalGuess, evalGold, ((Verbose) ? pwOut : null)); doneEval = true; } } //Move to the next guess parse pwOut.Println("================================================================================"); if (skippedGuessTrees != 0) { pwOut.Printf("%s %d guess trees\n", ((MaxGuessYield < int.MaxValue) ? "Skipped" : "Unable to evaluate"), skippedGuessTrees); } depEval.Display(true, pwOut); pwOut.Close(); }
public static int MaxTrainExamplesPerDocument(Properties props) { return(PropertiesUtils.GetInt(props, "coref.statistical.maxTrainExamplesPerDocument", int.MaxValue)); }
public static int MaxMentionDistanceWithStringMatch(Properties props) { return(PropertiesUtils.GetInt(props, "coref.maxMentionDistanceWithStringMatch", 500)); }
public static int GetThreadCounts(Properties props) { return(PropertiesUtils.GetInt(props, ThreadsProp, Runtime.GetRuntime().AvailableProcessors())); }