示例#1
0
        public ParserAnnotator(string annotatorName, Properties props)
        {
            string model = props.GetProperty(annotatorName + ".model", LexicalizedParser.DefaultParserLoc);

            if (model == null)
            {
                throw new ArgumentException("No model specified for Parser annotator " + annotatorName);
            }
            this.Verbose = PropertiesUtils.GetBool(props, annotatorName + ".debug", false);
            string[] flags = ConvertFlagsToArray(props.GetProperty(annotatorName + ".flags"));
            this.parser            = LoadModel(model, Verbose, flags);
            this.maxSentenceLength = PropertiesUtils.GetInt(props, annotatorName + ".maxlen", -1);
            string treeMapClass = props.GetProperty(annotatorName + ".treemap");

            if (treeMapClass == null)
            {
                this.treeMap = null;
            }
            else
            {
                this.treeMap = ReflectionLoading.LoadByReflection(treeMapClass, props);
            }
            this.maxParseTime = PropertiesUtils.GetLong(props, annotatorName + ".maxtime", -1);
            this.kBest        = PropertiesUtils.GetInt(props, annotatorName + ".kbest", 1);
            this.keepPunct    = PropertiesUtils.GetBool(props, annotatorName + ".keepPunct", true);
            string buildGraphsProperty = annotatorName + ".buildgraphs";

            if (!this.parser.GetTLPParams().SupportsBasicDependencies())
            {
                if (PropertiesUtils.GetBool(props, buildGraphsProperty))
                {
                    log.Info("WARNING: " + buildGraphsProperty + " set to true, but " + this.parser.GetTLPParams().GetType() + " does not support dependencies");
                }
                this.BuildGraphs = false;
            }
            else
            {
                this.BuildGraphs = PropertiesUtils.GetBool(props, buildGraphsProperty, true);
            }
            if (this.BuildGraphs)
            {
                bool generateOriginalDependencies = PropertiesUtils.GetBool(props, annotatorName + ".originalDependencies", false);
                parser.GetTLPParams().SetGenerateOriginalDependencies(generateOriginalDependencies);
                ITreebankLanguagePack tlp         = parser.GetTLPParams().TreebankLanguagePack();
                IPredicate <string>   punctFilter = this.keepPunct ? Filters.AcceptFilter() : tlp.PunctuationWordRejectFilter();
                this.gsf = tlp.GrammaticalStructureFactory(punctFilter, parser.GetTLPParams().TypedDependencyHeadFinder());
            }
            else
            {
                this.gsf = null;
            }
            this.nThreads = PropertiesUtils.GetInt(props, annotatorName + ".nthreads", PropertiesUtils.GetInt(props, "nthreads", 1));
            bool usesBinary = StanfordCoreNLP.UsesBinaryTrees(props);

            this.saveBinaryTrees   = PropertiesUtils.GetBool(props, annotatorName + ".binaryTrees", usesBinary);
            this.noSquash          = PropertiesUtils.GetBool(props, annotatorName + ".nosquash", false);
            this.extraDependencies = MetaClass.Cast(props.GetProperty(annotatorName + ".extradependencies", "NONE"), typeof(GrammaticalStructure.Extras));
        }
示例#2
0
        public Options(string name, Properties props)
        {
            includeRange     = PropertiesUtils.GetBool(props, name + ".includeRange", includeRange);
            markTimeRanges   = PropertiesUtils.GetBool(props, name + ".markTimeRanges", markTimeRanges);
            includeNested    = PropertiesUtils.GetBool(props, name + ".includeNested", includeNested);
            restrictToTimex3 = PropertiesUtils.GetBool(props, name + ".restrictToTimex3", restrictToTimex3);
            teRelHeurLevel   = Options.RelativeHeuristicLevel.ValueOf(props.GetProperty(name + ".teRelHeurLevel", teRelHeurLevel.ToString()));
            verbose          = PropertiesUtils.GetBool(props, name + ".verbose", verbose);
            // set default rules by SUTime language
            language = props.GetProperty(name + ".language", language);
            if (!languageToRulesFiles.Keys.Contains(language))
            {
                language = "english";
            }
            grammarFilename = languageToRulesFiles[language];
            // override if rules are set by properties
            grammarFilename  = props.GetProperty(name + ".rules", grammarFilename);
            searchForDocDate = PropertiesUtils.GetBool(props, name + ".searchForDocDate", searchForDocDate);
            string binderProperty = props.GetProperty(name + ".binders");
            int    nBinders;

            string[] binderClasses;
            if (binderProperty == null)
            {
                nBinders      = DefaultBinders.Length;
                binderClasses = DefaultBinders;
            }
            else
            {
                nBinders      = PropertiesUtils.GetInt(props, name + ".binders", 0);
                binderClasses = new string[nBinders];
                for (int i = 0; i < nBinders; ++i)
                {
                    string binderPrefix = name + ".binder." + (i + 1);
                    binderClasses[i] = props.GetProperty(binderPrefix);
                }
            }
            if (nBinders > 0 && Runtime.GetProperty("STS") == null)
            {
                binders = new Env.IBinder[nBinders];
                for (int i = 0; i < nBinders; i++)
                {
                    int    bi           = i + 1;
                    string binderPrefix = name + ".binder." + bi;
                    try
                    {
                        Type binderClass = Sharpen.Runtime.GetType(binderClasses[i]);
                        binderPrefix = binderPrefix + ".";
                        binders[i]   = (Env.IBinder)System.Activator.CreateInstance(binderClass);
                        binders[i].Init(binderPrefix, props);
                    }
                    catch (Exception ex)
                    {
                        throw new Exception("Error initializing binder " + bi, ex);
                    }
                }
            }
        }
示例#3
0
        public DependencyParseAnnotator(Properties properties)
        {
            string modelPath = PropertiesUtils.GetString(properties, "model", DependencyParser.DefaultModel);

            parser            = DependencyParser.LoadFromModelFile(modelPath, properties);
            nThreads          = PropertiesUtils.GetInt(properties, "testThreads", DefaultNthreads);
            maxTime           = PropertiesUtils.GetLong(properties, "sentenceTimeout", DefaultMaxtime);
            extraDependencies = MetaClass.Cast(properties.GetProperty("extradependencies", "NONE"), typeof(GrammaticalStructure.Extras));
        }
        public POSTaggerAnnotator(string annotatorName, Properties props)
        {
            string posLoc = props.GetProperty(annotatorName + ".model");

            if (posLoc == null)
            {
                posLoc = DefaultPaths.DefaultPosModel;
            }
            bool verbose = PropertiesUtils.GetBool(props, annotatorName + ".verbose", false);

            this.pos = LoadModel(posLoc, verbose);
            this.maxSentenceLength = PropertiesUtils.GetInt(props, annotatorName + ".maxlen", int.MaxValue);
            this.nThreads          = PropertiesUtils.GetInt(props, annotatorName + ".nthreads", PropertiesUtils.GetInt(props, "nthreads", 1));
            this.reuseTags         = PropertiesUtils.GetBool(props, annotatorName + ".reuseTags", false);
        }
示例#5
0
        private void SetProperties(Properties props)
        {
            trainingThreads       = PropertiesUtils.GetInt(props, "trainingThreads", trainingThreads);
            wordCutOff            = PropertiesUtils.GetInt(props, "wordCutOff", wordCutOff);
            initRange             = PropertiesUtils.GetDouble(props, "initRange", initRange);
            maxIter               = PropertiesUtils.GetInt(props, "maxIter", maxIter);
            batchSize             = PropertiesUtils.GetInt(props, "batchSize", batchSize);
            adaEps                = PropertiesUtils.GetDouble(props, "adaEps", adaEps);
            adaAlpha              = PropertiesUtils.GetDouble(props, "adaAlpha", adaAlpha);
            regParameter          = PropertiesUtils.GetDouble(props, "regParameter", regParameter);
            dropProb              = PropertiesUtils.GetDouble(props, "dropProb", dropProb);
            hiddenSize            = PropertiesUtils.GetInt(props, "hiddenSize", hiddenSize);
            embeddingSize         = PropertiesUtils.GetInt(props, "embeddingSize", embeddingSize);
            numPreComputed        = PropertiesUtils.GetInt(props, "numPreComputed", numPreComputed);
            evalPerIter           = PropertiesUtils.GetInt(props, "evalPerIter", evalPerIter);
            clearGradientsPerIter = PropertiesUtils.GetInt(props, "clearGradientsPerIter", clearGradientsPerIter);
            saveIntermediate      = PropertiesUtils.GetBool(props, "saveIntermediate", saveIntermediate);
            unlabeled             = PropertiesUtils.GetBool(props, "unlabeled", unlabeled);
            cPOS   = PropertiesUtils.GetBool(props, "cPOS", cPOS);
            noPunc = PropertiesUtils.GetBool(props, "noPunc", noPunc);
            doWordEmbeddingGradUpdate = PropertiesUtils.GetBool(props, "doWordEmbeddingGradUpdate", doWordEmbeddingGradUpdate);
            // Runtime parsing options
            sentenceDelimiter = PropertiesUtils.GetString(props, "sentenceDelimiter", sentenceDelimiter);
            tagger            = PropertiesUtils.GetString(props, "tagger.model", tagger);
            string escaperClass = props.GetProperty("escaper");

            escaper = escaperClass != null?ReflectionLoading.LoadByReflection(escaperClass) : null;

            // Language options
            language = props.Contains("language") ? GetLanguage(props.GetProperty("language")) : language;
            tlp      = [email protected]();
            // if a tlp was specified go with that
            string tlpCanonicalName = props.GetProperty("tlp");

            if (tlpCanonicalName != null)
            {
                try
                {
                    tlp = ReflectionLoading.LoadByReflection(tlpCanonicalName);
                    System.Console.Error.WriteLine("Loaded TreebankLanguagePack: " + tlpCanonicalName);
                }
                catch (Exception)
                {
                    System.Console.Error.WriteLine("Error: Failed to load TreebankLanguagePack: " + tlpCanonicalName);
                }
            }
        }
示例#6
0
        /// <param name="args"/>
        public static void Main(string[] args)
        {
            // Strips off hyphens
            Properties options = StringUtils.ArgsToProperties(args, OptionArgDefs());

            if (options.Contains("help") || args.Length == 0)
            {
                log.Info(Usage());
                System.Environment.Exit(-1);
            }
            int nThreads = PropertiesUtils.GetInt(options, "nthreads", 1);

            Edu.Stanford.Nlp.International.Arabic.Process.ArabicSegmenter segmenter = GetSegmenter(options);
            // Decode either an evaluation file or raw text
            try
            {
                PrintWriter pwOut;
                if (segmenter.flags.outputEncoding != null)
                {
                    OutputStreamWriter @out = new OutputStreamWriter(System.Console.Out, segmenter.flags.outputEncoding);
                    pwOut = new PrintWriter(@out, true);
                }
                else
                {
                    if (segmenter.flags.inputEncoding != null)
                    {
                        OutputStreamWriter @out = new OutputStreamWriter(System.Console.Out, segmenter.flags.inputEncoding);
                        pwOut = new PrintWriter(@out, true);
                    }
                    else
                    {
                        pwOut = new PrintWriter(System.Console.Out, true);
                    }
                }
                if (segmenter.flags.testFile != null)
                {
                    if (segmenter.flags.answerFile == null)
                    {
                        segmenter.Evaluate(pwOut);
                    }
                    else
                    {
                        Edu.Stanford.Nlp.International.Arabic.Process.ArabicSegmenter.EvaluateRawText(pwOut);
                    }
                }
                else
                {
                    BufferedReader br          = (segmenter.flags.textFile == null) ? IOUtils.ReaderFromStdin() : IOUtils.ReaderFromString(segmenter.flags.textFile, segmenter.flags.inputEncoding);
                    double         charsPerSec = Decode(segmenter, br, pwOut, nThreads);
                    IOUtils.CloseIgnoringExceptions(br);
                    System.Console.Error.Printf("Done! Processed input text at %.2f input characters/second%n", charsPerSec);
                }
            }
            catch (UnsupportedEncodingException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (IOException)
            {
                System.Console.Error.Printf("%s: Could not open %s%n", typeof(Edu.Stanford.Nlp.International.Arabic.Process.ArabicSegmenter).FullName, segmenter.flags.textFile);
            }
        }
示例#7
0
        /// <summary>Run the Evalb scoring metric on guess/gold input.</summary>
        /// <remarks>Run the Evalb scoring metric on guess/gold input. The default language is English.</remarks>
        /// <param name="args"/>
        public static void Main(string[] args)
        {
            if (args.Length < minArgs)
            {
                log.Info(Usage());
                System.Environment.Exit(-1);
            }
            Properties options             = StringUtils.ArgsToProperties(args, OptionArgDefs());
            Language   language            = PropertiesUtils.Get(options, "l", Language.English, typeof(Language));
            ITreebankLangParserParams tlpp = language.@params;
            int  maxGoldYield      = PropertiesUtils.GetInt(options, "y", int.MaxValue);
            bool Verbose           = PropertiesUtils.GetBool(options, "v", false);
            bool sortByF1          = PropertiesUtils.HasProperty(options, "s");
            int  worstKTreesToEmit = PropertiesUtils.GetInt(options, "s", 0);
            PriorityQueue <Triple <double, Tree, Tree> > queue = sortByF1 ? new PriorityQueue <Triple <double, Tree, Tree> >(2000, new Evalb.F1Comparator()) : null;
            bool   doCatLevel = PropertiesUtils.GetBool(options, "c", false);
            string labelRegex = options.GetProperty("f", null);
            string encoding   = options.GetProperty("e", "UTF-8");

            string[] parsedArgs = options.GetProperty(string.Empty, string.Empty).Split("\\s+");
            if (parsedArgs.Length != minArgs)
            {
                log.Info(Usage());
                System.Environment.Exit(-1);
            }
            string goldFile  = parsedArgs[0];
            string guessFile = parsedArgs[1];

            // Command-line has been parsed. Configure the metric for evaluation.
            tlpp.SetInputEncoding(encoding);
            PrintWriter pwOut         = tlpp.Pw();
            Treebank    guessTreebank = tlpp.DiskTreebank();

            guessTreebank.LoadPath(guessFile);
            pwOut.Println("GUESS TREEBANK:");
            pwOut.Println(guessTreebank.TextualSummary());
            Treebank goldTreebank = tlpp.DiskTreebank();

            goldTreebank.LoadPath(goldFile);
            pwOut.Println("GOLD TREEBANK:");
            pwOut.Println(goldTreebank.TextualSummary());
            Evalb            metric   = new Evalb("Evalb LP/LR", true);
            EvalbByCat       evalbCat = (doCatLevel) ? new EvalbByCat("EvalbByCat LP/LR", true, labelRegex) : null;
            ITreeTransformer tc       = tlpp.Collinizer();
            //The evalb ref implementation assigns status for each tree pair as follows:
            //
            //   0 - Ok (yields match)
            //   1 - length mismatch
            //   2 - null parse e.g. (()).
            //
            //In the cases of 1,2, evalb does not include the tree pair in the LP/LR computation.
            IEnumerator <Tree> goldItr  = goldTreebank.GetEnumerator();
            IEnumerator <Tree> guessItr = guessTreebank.GetEnumerator();
            int goldLineId        = 0;
            int guessLineId       = 0;
            int skippedGuessTrees = 0;

            while (guessItr.MoveNext() && goldItr.MoveNext())
            {
                Tree           guessTree  = guessItr.Current;
                IList <ILabel> guessYield = guessTree.Yield();
                guessLineId++;
                Tree           goldTree  = goldItr.Current;
                IList <ILabel> goldYield = goldTree.Yield();
                goldLineId++;
                // Check that we should evaluate this tree
                if (goldYield.Count > maxGoldYield)
                {
                    skippedGuessTrees++;
                    continue;
                }
                // Only trees with equal yields can be evaluated
                if (goldYield.Count != guessYield.Count)
                {
                    pwOut.Printf("Yield mismatch gold: %d tokens vs. guess: %d tokens (lines: gold %d guess %d)%n", goldYield.Count, guessYield.Count, goldLineId, guessLineId);
                    skippedGuessTrees++;
                    continue;
                }
                Tree evalGuess = tc.TransformTree(guessTree);
                Tree evalGold  = tc.TransformTree(goldTree);
                metric.Evaluate(evalGuess, evalGold, ((Verbose) ? pwOut : null));
                if (doCatLevel)
                {
                    evalbCat.Evaluate(evalGuess, evalGold, ((Verbose) ? pwOut : null));
                }
                if (sortByF1)
                {
                    StoreTrees(queue, guessTree, goldTree, metric.GetLastF1());
                }
            }
            if (guessItr.MoveNext() || goldItr.MoveNext())
            {
                System.Console.Error.Printf("Guess/gold files do not have equal lengths (guess: %d gold: %d)%n.", guessLineId, goldLineId);
            }
            pwOut.Println("================================================================================");
            if (skippedGuessTrees != 0)
            {
                pwOut.Printf("%s %d guess trees\n", "Unable to evaluate", skippedGuessTrees);
            }
            metric.Display(true, pwOut);
            pwOut.Println();
            if (doCatLevel)
            {
                evalbCat.Display(true, pwOut);
                pwOut.Println();
            }
            if (sortByF1)
            {
                EmitSortedTrees(queue, worstKTreesToEmit, guessFile);
            }
            pwOut.Close();
        }
示例#8
0
 public static int GetMaxSentDistForSieve(Properties props, string sievename)
 {
     return(PropertiesUtils.GetInt(props, MaxSentDistProp.Replace("SIEVENAME", sievename), 1000));
 }
示例#9
0
 public static int GetFeatureCountThreshold(Properties props, string sievename)
 {
     return(PropertiesUtils.GetInt(props, ThresFeaturecountProp.Replace("SIEVENAME", sievename), 20));
 }
示例#10
0
 public static int GetNumFeatures(Properties props, string sievename)
 {
     return(PropertiesUtils.GetInt(props, NumFeaturesProp.Replace("SIEVENAME", sievename), 30));
 }
示例#11
0
 public static int GetTreeDepth(Properties props, string sievename)
 {
     return(PropertiesUtils.GetInt(props, TreeDepthProp.Replace("SIEVENAME", sievename), 0));
 }
        public static void Main(string[] args)
        {
            if (args.Length < minArgs)
            {
                System.Console.Out.WriteLine(usage);
                System.Environment.Exit(-1);
            }
            // Process command-line options
            Properties options  = StringUtils.ArgsToProperties(args, optionArgDefinitions);
            string     fileName = options.GetProperty(string.Empty);

            if (fileName == null || fileName.Equals(string.Empty))
            {
                System.Console.Out.WriteLine(usage);
                System.Environment.Exit(-1);
            }
            int      maxLen                = PropertiesUtils.GetInt(options, "y", int.MaxValue);
            bool     printTrees            = PropertiesUtils.GetBool(options, "p", false);
            bool     flattenTrees          = PropertiesUtils.GetBool(options, "f", false);
            bool     printPOS              = PropertiesUtils.GetBool(options, "a", false);
            bool     printTnT              = PropertiesUtils.GetBool(options, "t", false);
            Language language              = PropertiesUtils.Get(options, "l", Language.English, typeof(Language));
            ITreebankLangParserParams tlpp = language.@params;
            string encoding                = options.GetProperty("e", "UTF-8");

            tlpp.SetInputEncoding(encoding);
            tlpp.SetOutputEncoding(encoding);
            DiskTreebank tb = tlpp.DiskTreebank();

            tb.LoadPath(fileName);
            // Read the treebank
            PrintWriter pw       = tlpp.Pw();
            int         numTrees = 0;

            foreach (Tree tree in tb)
            {
                if (tree.Yield().Count > maxLen)
                {
                    continue;
                }
                ++numTrees;
                if (printTrees)
                {
                    pw.Println(tree.ToString());
                }
                else
                {
                    if (flattenTrees)
                    {
                        pw.Println(SentenceUtils.ListToString(tree.Yield()));
                    }
                    else
                    {
                        if (printPOS)
                        {
                            pw.Println(SentenceUtils.ListToString(tree.PreTerminalYield()));
                        }
                        else
                        {
                            if (printTnT)
                            {
                                IList <CoreLabel> yield = tree.TaggedLabeledYield();
                                foreach (CoreLabel label in yield)
                                {
                                    pw.Printf("%s\t%s%n", label.Word(), label.Tag());
                                }
                                pw.Println();
                            }
                        }
                    }
                }
            }
            System.Console.Error.Printf("Read %d trees.%n", numTrees);
        }
示例#13
0
 public static int GetSeed(Properties props)
 {
     return(PropertiesUtils.GetInt(props, SeedProp, 1));
 }
 // ---------- Heuristic Mention Filtering ----------
 public static int MaxMentionDistance(Properties props)
 {
     return(PropertiesUtils.GetInt(props, "coref.maxMentionDistance", Conll(props) ? int.MaxValue : 50));
 }
示例#15
0
        /// <param name="args"/>
        public static void Main(string[] args)
        {
            if (args.Length < MinArgs)
            {
                log.Info(Usage());
                System.Environment.Exit(-1);
            }
            Properties options        = StringUtils.ArgsToProperties(args, OptionArgDefs());
            bool       Verbose        = PropertiesUtils.GetBool(options, "v", false);
            File       testTreebank   = options.Contains("t") ? new File(options.GetProperty("t")) : null;
            int        maxGoldSentLen = PropertiesUtils.GetInt(options, "l", int.MaxValue);
            bool       SerInput       = PropertiesUtils.GetBool(options, "o", false);

            string[] parsedArgs = options.GetProperty(string.Empty, string.Empty).Split("\\s+");
            if (parsedArgs.Length != MinArgs)
            {
                log.Info(Usage());
                System.Environment.Exit(-1);
            }
            File     trainTreebank = new File(parsedArgs[0]);
            DateTime startTime     = new DateTime();

            log.Info("###################################");
            log.Info("### Joint Segmentation / Parser ###");
            log.Info("###################################");
            System.Console.Error.Printf("Start time: %s\n", startTime);
            JointParsingModel parsingModel = new JointParsingModel();

            parsingModel.SetVerbose(Verbose);
            parsingModel.SetMaxEvalSentLen(maxGoldSentLen);
            parsingModel.SetSerInput(SerInput);
            //WSGDEBUG -- Some stuff for eclipse debugging
            InputStream inputStream = null;

            try
            {
                if (Runtime.GetProperty("eclipse") == null)
                {
                    inputStream = (SerInput) ? new ObjectInputStream(new GZIPInputStream(Runtime.@in)) : Runtime.@in;
                }
                else
                {
                    FileInputStream fileStream = new FileInputStream(new File("debug.2.xml"));
                    inputStream = (SerInput) ? new ObjectInputStream(new GZIPInputStream(fileStream)) : fileStream;
                }
            }
            catch (IOException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
                System.Environment.Exit(-1);
            }
            finally
            {
                if (inputStream != null)
                {
                    try
                    {
                        inputStream.Close();
                    }
                    catch (IOException)
                    {
                    }
                }
            }
            if (!trainTreebank.Exists())
            {
                log.Info("Training treebank does not exist!\n  " + trainTreebank.GetPath());
            }
            else
            {
                if (testTreebank != null && !testTreebank.Exists())
                {
                    log.Info("Test treebank does not exist!\n  " + testTreebank.GetPath());
                }
                else
                {
                    if (parsingModel.Run(trainTreebank, testTreebank, inputStream))
                    {
                        log.Info("Successful shutdown!");
                    }
                    else
                    {
                        log.Error("Parsing model failure.");
                    }
                }
            }
            DateTime stopTime    = new DateTime();
            long     elapsedTime = stopTime.GetTime() - startTime.GetTime();

            log.Info();
            log.Info();
            System.Console.Error.Printf("Completed processing at %s\n", stopTime);
            System.Console.Error.Printf("Elapsed time: %d seconds\n", (int)(elapsedTime / 1000F));
        }
        /// <exception cref="System.IO.IOException"/>
        public NERCombinerAnnotator(Properties properties)
        {
            IList <string> models     = new List <string>();
            string         modelNames = properties.GetProperty("ner.model");

            if (modelNames == null)
            {
                modelNames = DefaultPaths.DefaultNerThreeclassModel + ',' + DefaultPaths.DefaultNerMucModel + ',' + DefaultPaths.DefaultNerConllModel;
            }
            if (!modelNames.IsEmpty())
            {
                Sharpen.Collections.AddAll(models, Arrays.AsList(modelNames.Split(",")));
            }
            if (models.IsEmpty())
            {
                // Allow for no real NER model - can just use numeric classifiers or SUTime.
                // Have to unset ner.model, so unlikely that people got here by accident.
                log.Info("WARNING: no NER models specified");
            }
            bool applyNumericClassifiers = PropertiesUtils.GetBool(properties, NERClassifierCombiner.ApplyNumericClassifiersProperty, NERClassifierCombiner.ApplyNumericClassifiersDefault);
            bool applyRegexner           = PropertiesUtils.GetBool(properties, NERClassifierCombiner.ApplyGazetteProperty, NERClassifierCombiner.ApplyGazetteDefault);
            bool useSUTime = PropertiesUtils.GetBool(properties, NumberSequenceClassifier.UseSutimeProperty, NumberSequenceClassifier.UseSutimeDefault);

            // option for setting doc date to be the present during each annotation
            usePresentDateForDocDate = PropertiesUtils.GetBool(properties, "ner." + "usePresentDateForDocDate", false);
            // option for setting doc date from a provided string
            providedDocDate = PropertiesUtils.GetString(properties, "ner." + "providedDocDate", string.Empty);
            Pattern p = Pattern.Compile("[0-9]{4}\\-[0-9]{2}\\-[0-9]{2}");
            Matcher m = p.Matcher(providedDocDate);

            if (!m.Matches())
            {
                providedDocDate = string.Empty;
            }
            NERClassifierCombiner.Language nerLanguage = NERClassifierCombiner.Language.FromString(PropertiesUtils.GetString(properties, NERClassifierCombiner.NerLanguageProperty, null), NERClassifierCombiner.NerLanguageDefault);
            bool verbose = PropertiesUtils.GetBool(properties, "ner." + "verbose", false);

            string[]   loadPaths          = Sharpen.Collections.ToArray(models, new string[models.Count]);
            Properties combinerProperties = PropertiesUtils.ExtractSelectedProperties(properties, NERClassifierCombiner.DefaultPassDownProperties);

            if (useSUTime)
            {
                // Make sure SUTime parameters are included
                Properties sutimeProps = PropertiesUtils.ExtractPrefixedProperties(properties, NumberSequenceClassifier.SutimeProperty + '.', true);
                PropertiesUtils.OverWriteProperties(combinerProperties, sutimeProps);
            }
            NERClassifierCombiner nerCombiner = new NERClassifierCombiner(applyNumericClassifiers, nerLanguage, useSUTime, applyRegexner, combinerProperties, loadPaths);

            this.nThreads          = PropertiesUtils.GetInt(properties, "ner.nthreads", PropertiesUtils.GetInt(properties, "nthreads", 1));
            this.maxTime           = PropertiesUtils.GetLong(properties, "ner.maxtime", 0);
            this.maxSentenceLength = PropertiesUtils.GetInt(properties, "ner.maxlen", int.MaxValue);
            this.language          = LanguageInfo.GetLanguageFromString(PropertiesUtils.GetString(properties, "ner.language", "en"));
            // in case of Spanish, use the Spanish number regexner annotator
            if (language.Equals(LanguageInfo.HumanLanguage.Spanish))
            {
                Properties spanishNumberRegexNerProperties = new Properties();
                spanishNumberRegexNerProperties["spanish.number.regexner.mapping"]         = spanishNumberRegexRules;
                spanishNumberRegexNerProperties["spanish.number.regexner.validpospattern"] = "^(NUM).*";
                spanishNumberRegexNerProperties["spanish.number.regexner.ignorecase"]      = "true";
                spanishNumberAnnotator = new TokensRegexNERAnnotator("spanish.number.regexner", spanishNumberRegexNerProperties);
            }
            // set up fine grained ner
            SetUpFineGrainedNER(properties);
            // set up additional rules ner
            SetUpAdditionalRulesNER(properties);
            // set up entity mentions
            SetUpEntityMentionBuilding(properties);
            Verbose  = verbose;
            this.ner = nerCombiner;
        }
        /// <param name="args"/>
        public static void Main(string[] args)
        {
            if (args.Length < MinArgs)
            {
                log.Info(Usage());
                System.Environment.Exit(-1);
            }
            Properties options       = StringUtils.ArgsToProperties(args, OptionArgDefs());
            bool       Verbose       = PropertiesUtils.GetBool(options, "v", false);
            Language   Language      = PropertiesUtils.Get(options, "l", Language.English, typeof(Language));
            int        MaxGoldYield  = PropertiesUtils.GetInt(options, "g", int.MaxValue);
            int        MaxGuessYield = PropertiesUtils.GetInt(options, "y", int.MaxValue);

            string[] parsedArgs = options.GetProperty(string.Empty, string.Empty).Split("\\s+");
            if (parsedArgs.Length != MinArgs)
            {
                log.Info(Usage());
                System.Environment.Exit(-1);
            }
            File goldFile  = new File(parsedArgs[0]);
            File guessFile = new File(parsedArgs[1]);
            ITreebankLangParserParams tlpp = Language.@params;
            PrintWriter pwOut         = tlpp.Pw();
            Treebank    guessTreebank = tlpp.DiskTreebank();

            guessTreebank.LoadPath(guessFile);
            pwOut.Println("GUESS TREEBANK:");
            pwOut.Println(guessTreebank.TextualSummary());
            Treebank goldTreebank = tlpp.DiskTreebank();

            goldTreebank.LoadPath(goldFile);
            pwOut.Println("GOLD TREEBANK:");
            pwOut.Println(goldTreebank.TextualSummary());
            Edu.Stanford.Nlp.Parser.Metrics.CollinsDepEval depEval = new Edu.Stanford.Nlp.Parser.Metrics.CollinsDepEval("CollinsDep", true, tlpp.HeadFinder(), tlpp.TreebankLanguagePack().StartSymbol());
            ITreeTransformer tc = tlpp.Collinizer();
            //PennTreeReader skips over null/malformed parses. So when the yields of the gold/guess trees
            //don't match, we need to keep looking for the next gold tree that matches.
            //The evalb ref implementation differs slightly as it expects one tree per line. It assigns
            //status as follows:
            //
            //   0 - Ok (yields match)
            //   1 - length mismatch
            //   2 - null parse e.g. (()).
            //
            //In the cases of 1,2, evalb does not include the tree pair in the LP/LR computation.
            IEnumerator <Tree> goldItr = goldTreebank.GetEnumerator();
            int goldLineId             = 0;
            int skippedGuessTrees      = 0;

            foreach (Tree guess in guessTreebank)
            {
                Tree evalGuess = tc.TransformTree(guess);
                if (guess.Yield().Count > MaxGuessYield)
                {
                    skippedGuessTrees++;
                    continue;
                }
                bool doneEval = false;
                while (goldItr.MoveNext() && !doneEval)
                {
                    Tree gold     = goldItr.Current;
                    Tree evalGold = tc.TransformTree(gold);
                    goldLineId++;
                    if (gold.Yield().Count > MaxGoldYield)
                    {
                        continue;
                    }
                    else
                    {
                        if (evalGold.Yield().Count != evalGuess.Yield().Count)
                        {
                            pwOut.Println("Yield mismatch at gold line " + goldLineId);
                            skippedGuessTrees++;
                            break;
                        }
                    }
                    //Default evalb behavior -- skip this guess tree
                    depEval.Evaluate(evalGuess, evalGold, ((Verbose) ? pwOut : null));
                    doneEval = true;
                }
            }
            //Move to the next guess parse
            pwOut.Println("================================================================================");
            if (skippedGuessTrees != 0)
            {
                pwOut.Printf("%s %d guess trees\n", ((MaxGuessYield < int.MaxValue) ? "Skipped" : "Unable to evaluate"), skippedGuessTrees);
            }
            depEval.Display(true, pwOut);
            pwOut.Close();
        }
示例#18
0
 public static int MaxTrainExamplesPerDocument(Properties props)
 {
     return(PropertiesUtils.GetInt(props, "coref.statistical.maxTrainExamplesPerDocument", int.MaxValue));
 }
 public static int MaxMentionDistanceWithStringMatch(Properties props)
 {
     return(PropertiesUtils.GetInt(props, "coref.maxMentionDistanceWithStringMatch", 500));
 }
示例#20
0
 public static int GetThreadCounts(Properties props)
 {
     return(PropertiesUtils.GetInt(props, ThreadsProp, Runtime.GetRuntime().AvailableProcessors()));
 }