Beispiel #1
0
 public CorefAnnotator(Properties props)
 {
     this.props = props;
     try
     {
         // if user tries to run with coref.language = ENGLISH and coref.algorithm = hybrid, throw Exception
         // we do not support those settings at this time
         if (CorefProperties.Algorithm(props).Equals(CorefProperties.CorefAlgorithmType.Hybrid) && CorefProperties.GetLanguage(props).Equals(Locale.English))
         {
             log.Error("Error: coref.algorithm=hybrid is not supported for English, " + "please change coref.algorithm or coref.language");
             throw new Exception();
         }
         // suppress
         props.SetProperty("coref.printConLLLoadingMessage", "false");
         corefSystem = new CorefSystem(props);
         props.Remove("coref.printConLLLoadingMessage");
     }
     catch (Exception e)
     {
         log.Error("Error creating CorefAnnotator...terminating pipeline construction!");
         log.Error(e);
         throw new Exception(e);
     }
     // unless custom mention detection is set, just use the default coref mention detector
     performMentionDetection = !PropertiesUtils.GetBool(props, "coref.useCustomMentionDetection", false);
     if (performMentionDetection)
     {
         mentionAnnotator = new CorefMentionAnnotator(props);
     }
 }
Beispiel #2
0
 public CamundaWorkersBuilder SetProperties(Func <IConfiguration> configuration)
 {
     Properties    = PropertiesUtils.Create <TProperties>(configuration()) ?? throw CamundaWorkersBuilderException.PropertiesAreEmpty();
     CamundaClient = new SyrinxCamundaClientService(Properties.SyrinxProperties);
     ServiceCollection.AddScoped(factory => CamundaClient);
     return(this);
 }
Beispiel #3
0
        public override bool SetOptions(Properties opts)
        {
            bool ret = base.SetOptions(opts);

            if (opts.Contains(ConfigParser.paramSplit))
            {
                string splitFileName = opts.GetProperty(ConfigParser.paramSplit);
                splitSet = MakeSplitSet(splitFileName);
            }
            CcTagset = PropertiesUtils.GetBool(opts, ConfigParser.paramCCTagset, false);
            treebank = new MemoryTreebank(new FrenchXMLTreeReaderFactory(CcTagset), FrenchTreebankLanguagePack.FtbEncoding);
            if (lexMapper == null)
            {
                lexMapper = new DefaultMapper();
                lexMapper.Setup(null, lexMapOptions.Split(","));
            }
            if (pathsToMappings.Count != 0)
            {
                if (posMapper == null)
                {
                    posMapper = new DefaultMapper();
                }
                foreach (File path in pathsToMappings)
                {
                    posMapper.Setup(path);
                }
            }
            return(ret);
        }
Beispiel #4
0
        public TrueCaseAnnotator(string modelLoc, string classBias, string mixedCaseFileName, bool overwriteText, bool verbose)
        {
            this.overwriteText = overwriteText;
            this.verbose       = verbose;
            Properties props = PropertiesUtils.AsProperties("loadClassifier", modelLoc, "mixedCaseMapFile", mixedCaseFileName, "classBias", classBias);

            trueCaser = new CRFBiasedClassifier <CoreLabel>(props);
            if (modelLoc != null)
            {
                trueCaser.LoadClassifierNoExceptions(modelLoc, props);
            }
            else
            {
                throw new Exception("Model location not specified for true-case classifier!");
            }
            if (classBias != null)
            {
                StringTokenizer biases = new StringTokenizer(classBias, ",");
                while (biases.HasMoreTokens())
                {
                    StringTokenizer bias  = new StringTokenizer(biases.NextToken(), ":");
                    string          cname = bias.NextToken();
                    double          w     = double.Parse(bias.NextToken());
                    trueCaser.SetBiasWeight(cname, w);
                    if (this.verbose)
                    {
                        log.Info("Setting bias for class " + cname + " to " + w);
                    }
                }
            }
            // Load map containing mixed-case words:
            mixedCaseMap = LoadMixedCaseMap(mixedCaseFileName);
        }
        public ArabicSegmenterAnnotator(string name, Properties props)
        {
            string model = null;
            // Keep only the properties that apply to this annotator
            Properties modelProps = new Properties();
            string     desiredKey = name + '.';

            foreach (string key in props.StringPropertyNames())
            {
                if (key.StartsWith(desiredKey))
                {
                    // skip past name and the subsequent "."
                    string modelKey = Sharpen.Runtime.Substring(key, desiredKey.Length);
                    if (modelKey.Equals("model"))
                    {
                        model = props.GetProperty(key);
                    }
                    else
                    {
                        modelProps.SetProperty(modelKey, props.GetProperty(key));
                    }
                }
            }
            this.Verbose = PropertiesUtils.GetBool(props, name + ".verbose", false);
            if (model == null)
            {
                throw new Exception("Expected a property " + name + ".model");
            }
            LoadModel(model, modelProps);
        }
Beispiel #6
0
        /// <summary>A fast, rule-based tokenizer for Modern Standard French.</summary>
        /// <remarks>
        /// A fast, rule-based tokenizer for Modern Standard French.
        /// Performs punctuation splitting and light tokenization by default.
        /// <p>
        /// Currently, this tokenizer does not do line splitting. It assumes that the input
        /// file is delimited by the system line separator. The output will be equivalently
        /// delimited.
        /// </remarks>
        /// <param name="args"/>
        public static void Main(string[] args)
        {
            Properties options = StringUtils.ArgsToProperties(args, ArgOptionDefs());

            if (options.Contains("help"))
            {
                log.Info(Usage());
                return;
            }
            // Lexer options
            ITokenizerFactory <CoreLabel> tf = options.Contains("ftb") ? FrenchTokenizer.FtbFactory() : FrenchTokenizer.Factory();
            string orthoOptions = options.GetProperty("options", string.Empty);

            // When called from this main method, split on newline. No options for
            // more granular sentence splitting.
            orthoOptions = orthoOptions.IsEmpty() ? "tokenizeNLs" : orthoOptions + ",tokenizeNLs";
            tf.SetOptions(orthoOptions);
            // Other options
            string encoding = options.GetProperty("encoding", "UTF-8");
            bool   toLower  = PropertiesUtils.GetBool(options, "lowerCase", false);
            // Read the file from stdin
            int  nLines    = 0;
            int  nTokens   = 0;
            long startTime = Runtime.NanoTime();

            try
            {
                ITokenizer <CoreLabel> tokenizer = tf.GetTokenizer(new InputStreamReader(Runtime.@in, encoding));
                bool printSpace = false;
                while (tokenizer.MoveNext())
                {
                    ++nTokens;
                    string word = tokenizer.Current.Word();
                    if (word.Equals(FrenchLexer.NewlineToken))
                    {
                        ++nLines;
                        printSpace = false;
                        System.Console.Out.WriteLine();
                    }
                    else
                    {
                        if (printSpace)
                        {
                            System.Console.Out.Write(" ");
                        }
                        string outputToken = toLower ? word.ToLower(Locale.French) : word;
                        System.Console.Out.Write(outputToken);
                        printSpace = true;
                    }
                }
            }
            catch (UnsupportedEncodingException e)
            {
                log.Error(e);
            }
            long   elapsedTime = Runtime.NanoTime() - startTime;
            double linesPerSec = (double)nLines / (elapsedTime / 1e9);

            System.Console.Error.Printf("Done! Tokenized %d lines (%d tokens) at %.2f lines/sec%n", nLines, nTokens, linesPerSec);
        }
Beispiel #7
0
        public virtual void Init(string name, Properties props)
        {
            string prefix         = (name == null) ? string.Empty : name + ".";
            string delimiterRegex = props.GetProperty(prefix + "delimiter");

            if (delimiterRegex != null)
            {
                delimiterPattern = Pattern.Compile(delimiterRegex);
            }
            replaceWhitespace = PropertiesUtils.GetBool(props, prefix + "replaceWhitespace", replaceWhitespace);
            string mapString = props.GetProperty(prefix + "columns");

            tokensAnnotationClassName = props.GetProperty(prefix + "tokens", "edu.stanford.nlp.ling.CoreAnnotations$TokensAnnotation");
            string tokenFactoryClassName = props.GetProperty(prefix + "tokenFactory");

            if (tokenFactoryClassName != null)
            {
                try
                {
                    this.tokenFactory = (ICoreTokenFactory <IN>)System.Activator.CreateInstance(Sharpen.Runtime.GetType(tokenFactoryClassName));
                }
                catch (Exception e)
                {
                    throw new Exception(e);
                }
            }
            else
            {
                this.tokenFactory = (ICoreTokenFactory <IN>) new CoreLabelTokenFactory();
            }
            Init(mapString, this.tokenFactory, this.tokensAnnotationClassName);
        }
Beispiel #8
0
        /// <summary>Register an Annotator that can be created by the pool.</summary>
        /// <remarks>
        /// Register an Annotator that can be created by the pool.
        /// Note that factories are used here so that many possible annotators can
        /// be defined within the AnnotatorPool, but an Annotator is only created
        /// when one is actually needed.
        /// </remarks>
        /// <param name="name">The name to be associated with the Annotator.</param>
        /// <param name="props">The properties we are using to create the annotator</param>
        /// <param name="annotator">
        /// A factory that creates an instance of the desired Annotator.
        /// This should be an instance of
        /// <see cref="Edu.Stanford.Nlp.Util.Lazy{E}.Cache{E}(Java.Util.Function.ISupplier{T})"/>
        /// , if we want
        /// the annotator pool to behave as a cache (i.e., evict old annotators
        /// when the GC requires it).
        /// </param>
        /// <returns>true if a new annotator was created; false if we reuse an existing one</returns>
        public virtual bool Register(string name, Properties props, Lazy <IAnnotator> annotator)
        {
            bool   newAnnotator = false;
            string newSig       = PropertiesUtils.GetSignature(name, props);

            lock (this.cachedAnnotators)
            {
                AnnotatorPool.CachedAnnotator oldAnnotator = this.cachedAnnotators[name];
                if (oldAnnotator == null || !Objects.Equals(oldAnnotator.signature, newSig))
                {
                    // the new annotator uses different properties so we need to update!
                    if (oldAnnotator != null)
                    {
                        // Try to get it from the global cache
                        log.Debug("Replacing old annotator \"" + name + "\" with signature [" + oldAnnotator.signature + "] with new annotator with signature [" + newSig + "]");
                    }
                    // Add the new annotator
                    this.cachedAnnotators[name] = new AnnotatorPool.CachedAnnotator(newSig, annotator);
                    // Unmount the old annotator
                    Optional.OfNullable(oldAnnotator).FlatMap(null).IfPresent(null);
                    // Register that we added an annotator
                    newAnnotator = true;
                }
            }
            // nothing to do if an annotator with same name and signature already exists
            return(newAnnotator);
        }
        /// <summary>
        /// Fix tree structure, phrasal categories and part-of-speech labels in newly expanded
        /// multi-word tokens.
        /// </summary>
        /// <exception cref="System.Exception"/>
        /// <exception cref="Java.Util.Concurrent.ExecutionException"/>
        private IList <Tree> FixMultiWordTokens(IList <Tree> trees)
        {
            bool ner = PropertiesUtils.GetBool(options, "ner", false);
            // Shared resources
            IFactory <TreeNormalizer> tnf = new _IFactory_389();
            ITreeFactory tf = new LabeledScoredTreeFactory();
            IThreadsafeProcessor <ICollection <Tree>, ICollection <Tree> > processor = new AnCoraProcessor.MultiWordProcessor(this, tnf, tf, ner);
            int availableProcessors = Runtime.GetRuntime().AvailableProcessors();
            MulticoreWrapper <ICollection <Tree>, ICollection <Tree> > wrapper = new MulticoreWrapper <ICollection <Tree>, ICollection <Tree> >(availableProcessors, processor, false);
            // Chunk our work so that parallelization is actually worth it
            int numChunks = availableProcessors * 20;
            IList <IList <Tree> > chunked = CollectionUtils.PartitionIntoFolds(trees, numChunks);
            IList <Tree>          ret     = new List <Tree>();

            foreach (ICollection <Tree> coll in chunked)
            {
                wrapper.Put(coll);
                while (wrapper.Peek())
                {
                    Sharpen.Collections.AddAll(ret, wrapper.Poll());
                }
            }
            wrapper.Join();
            while (wrapper.Peek())
            {
                Sharpen.Collections.AddAll(ret, wrapper.Poll());
            }
            return(ret);
        }
Beispiel #10
0
            public virtual ITokenizer <T> GetTokenizer(Reader r, string extraOptions)
            {
                Properties prop             = StringUtils.StringToProperties(extraOptions);
                bool       tokenizeNewlines = PropertiesUtils.GetBool(prop, "tokenizeNLs", this.tokenizeNLs);

                return(new WhitespaceTokenizer <T>(factory, r, tokenizeNewlines));
            }
Beispiel #11
0
            public WhitespaceTokenizerFactory(ILexedTokenFactory <T> factory, string options)
            {
                this.factory = factory;
                Properties prop = StringUtils.StringToProperties(options);

                this.tokenizeNLs = PropertiesUtils.GetBool(prop, "tokenizeNLs", false);
            }
Beispiel #12
0
        public void ValidaCep()
        {
            try
            {
                string cep = "91450080";

                string logName = "logConsulta" + DateTime.Now.ToString("dd_MM_yyyy_HH_mm_ss_FFF");
                Directory.CreateDirectory(ProjConfig.GetPath("\\TestResults\\") + logName + "\\");
                File.Create((ProjConfig.GetPath("\\TestResults\\") + logName + "\\" + logName + ".txt")).Close();
                log = new PropertiesUtils(ProjConfig.GetPath("\\TestResults\\") + logName + "\\" + logName + ".txt");
                log.Save();
                int i = 1;

                if (cep.Length == 8)
                {
                    RestClient    restClient   = new RestClient(string.Format("https://viacep.com.br/ws/{0}/json/ ", cep));
                    RestRequest   restRequest  = new RestRequest(Method.GET);
                    IRestResponse restResponse = restClient.Execute(restRequest);

                    if (restResponse.StatusCode == System.Net.HttpStatusCode.BadRequest)
                    {
                        Console.WriteLine("Erro na requisição da API" + restResponse.Content);
                    }
                    else
                    {
                        DadosRetorno dadosRetorno = new JsonDeserializer().Deserialize <DadosRetorno>(restResponse);

                        if (dadosRetorno.cep is null)
                        {
                            Console.WriteLine("Cep não encontado na base de dados");
                            log.Set("Cep não encontado na base de dados: " + ToString(), dadosRetorno.Equals(null));
                            log.Save();
                            return;
                        }
                        log.Set("CEP: " + i.ToString(), dadosRetorno.cep);
                        log.Set("Logradouro: " + i.ToString(), dadosRetorno.logradouro);
                        log.Set("Complemento: " + i.ToString(), dadosRetorno.complemento);
                        log.Set("Bairro: " + i.ToString(), dadosRetorno.bairro);
                        log.Set("Localidade: " + i.ToString(), dadosRetorno.localidade);
                        log.Set("UF: " + i.ToString(), dadosRetorno.uf);
                        log.Set("Unidade: " + i.ToString(), dadosRetorno.unidade);
                        log.Set("IBGE: " + i.ToString(), dadosRetorno.ibge);
                        log.Set("GIA: " + i.ToString(), dadosRetorno.gia);
                        log.Save();
                    }
                }
                else
                {
                    Console.WriteLine("Cep inválido verifique o formato informado!");
                    log.Set("Cep inválido verifique o formato informado! " + ToString(), cep.GetType());
                    log.Save();
                    return;
                }
            }
            catch (Exception erro)
            {
                Console.WriteLine("Erro" + erro.Message);
            }
        }
Beispiel #13
0
        static void Main(string[] args)
        {
            var environment   = Environment.GetEnvironmentVariable("ASPNETCORE_ENVIRONMENT");
            var configuration = ConfigurationUtils.GetEnvironmentConfiguration(environment);
            var properties    = PropertiesUtils.Create <ProjectProperties>(configuration);

            ConsoleApplicationUtils.Launch(() => DatabaseMigrationUtils.Perform(args, properties.DatabaseConnectionProperties, typeof(Program).Assembly));
        }
            public WhitespaceTokenizerFactory(LexedTokenFactory <T> factory,
                                              String options)
            {
                this.factory = factory;
                Dictionary <String, String> prop = StringUtils.stringToProperties(options);

                this.tokenizeNLs = PropertiesUtils.getBool(prop, "tokenizeNLs", false);
            }
            public Tokenizer <T> getTokenizer(TextReader r, String extraOptions)
            {
                Dictionary <String, String> prop = StringUtils.stringToProperties(extraOptions);
                bool tokenizeNewlines            =
                    PropertiesUtils.getBool(prop, "tokenizeNLs", this.tokenizeNLs);

                return(new WhitespaceTokenizer <T>(factory, r, tokenizeNewlines));
            }
        public static void Main(string[] args)
        {
            if (args.Length < minArgs)
            {
                System.Console.Out.WriteLine(Usage());
                System.Environment.Exit(-1);
            }
            Properties options             = StringUtils.ArgsToProperties(args, ArgDefs());
            Language   language            = PropertiesUtils.Get(options, "l", Language.English, typeof(Language));
            ITreebankLangParserParams tlpp = language.@params;
            DiskTreebank tb            = null;
            string       encoding      = options.GetProperty("l", "UTF-8");
            bool         removeBracket = PropertiesUtils.GetBool(options, "b", false);

            tlpp.SetInputEncoding(encoding);
            tlpp.SetOutputEncoding(encoding);
            tb = tlpp.DiskTreebank();
            string[] files = options.GetProperty(string.Empty, string.Empty).Split("\\s+");
            if (files.Length != 0)
            {
                foreach (string filename in files)
                {
                    tb.LoadPath(filename);
                }
            }
            else
            {
                log.Info(Usage());
                System.Environment.Exit(-1);
            }
            PrintWriter  pwo         = tlpp.Pw();
            string       startSymbol = tlpp.TreebankLanguagePack().StartSymbol();
            ITreeFactory tf          = new LabeledScoredTreeFactory();
            int          nTrees      = 0;

            foreach (Tree t in tb)
            {
                if (removeBracket)
                {
                    if (t.Value().Equals(startSymbol))
                    {
                        t = t.FirstChild();
                    }
                }
                else
                {
                    if (!t.Value().Equals(startSymbol))
                    {
                        //Add a bracket if it isn't already there
                        t = tf.NewTreeNode(startSymbol, Java.Util.Collections.SingletonList(t));
                    }
                }
                pwo.Println(t.ToString());
                nTrees++;
            }
            pwo.Close();
            System.Console.Error.Printf("Processed %d trees.%n", nTrees);
        }
Beispiel #17
0
        // static demo class
        /// <exception cref="System.IO.IOException"/>
        public static void Main(string[] args)
        {
            if (args.Length < 2)
            {
                System.Console.Error.WriteLine("TokensRegexMatcher rules file [outFile]");
                return;
            }
            string      rules = args[0];
            PrintWriter @out;

            if (args.Length > 2)
            {
                @out = new PrintWriter(args[2]);
            }
            else
            {
                @out = new PrintWriter(System.Console.Out);
            }
            StanfordCoreNLP pipeline   = new StanfordCoreNLP(PropertiesUtils.AsProperties("annotators", "tokenize,ssplit,pos,lemma,ner"));
            Annotation      annotation = new Annotation(IOUtils.SlurpFileNoExceptions(args[1]));

            pipeline.Annotate(annotation);
            // Load lines of file as TokenSequencePatterns
            IList <TokenSequencePattern> tokenSequencePatterns = new List <TokenSequencePattern>();

            foreach (string line in ObjectBank.GetLineIterator(rules))
            {
                TokenSequencePattern pattern = TokenSequencePattern.Compile(line);
                tokenSequencePatterns.Add(pattern);
            }
            IList <ICoreMap> sentences = annotation.Get(typeof(CoreAnnotations.SentencesAnnotation));
            int i = 0;

            foreach (ICoreMap sentence in sentences)
            {
                IList <CoreLabel> tokens = sentence.Get(typeof(CoreAnnotations.TokensAnnotation));
                @out.Println("Sentence #" + ++i);
                @out.Print("  Tokens:");
                foreach (CoreLabel token in tokens)
                {
                    @out.Print(' ');
                    @out.Print(token.ToShortString("Text", "PartOfSpeech", "NamedEntityTag"));
                }
                @out.Println();
                MultiPatternMatcher <ICoreMap>           multiMatcher = TokenSequencePattern.GetMultiPatternMatcher(tokenSequencePatterns);
                IList <ISequenceMatchResult <ICoreMap> > answers      = multiMatcher.FindNonOverlapping(tokens);
                int j = 0;
                foreach (ISequenceMatchResult <ICoreMap> matched in answers)
                {
                    @out.Println("  Match #" + ++j);
                    for (int k = 0; k <= matched.GroupCount(); k++)
                    {
                        @out.Println("    group " + k + " = " + matched.Group(k));
                    }
                }
            }
            @out.Flush();
        }
Beispiel #18
0
        public ParserAnnotator(string annotatorName, Properties props)
        {
            string model = props.GetProperty(annotatorName + ".model", LexicalizedParser.DefaultParserLoc);

            if (model == null)
            {
                throw new ArgumentException("No model specified for Parser annotator " + annotatorName);
            }
            this.Verbose = PropertiesUtils.GetBool(props, annotatorName + ".debug", false);
            string[] flags = ConvertFlagsToArray(props.GetProperty(annotatorName + ".flags"));
            this.parser            = LoadModel(model, Verbose, flags);
            this.maxSentenceLength = PropertiesUtils.GetInt(props, annotatorName + ".maxlen", -1);
            string treeMapClass = props.GetProperty(annotatorName + ".treemap");

            if (treeMapClass == null)
            {
                this.treeMap = null;
            }
            else
            {
                this.treeMap = ReflectionLoading.LoadByReflection(treeMapClass, props);
            }
            this.maxParseTime = PropertiesUtils.GetLong(props, annotatorName + ".maxtime", -1);
            this.kBest        = PropertiesUtils.GetInt(props, annotatorName + ".kbest", 1);
            this.keepPunct    = PropertiesUtils.GetBool(props, annotatorName + ".keepPunct", true);
            string buildGraphsProperty = annotatorName + ".buildgraphs";

            if (!this.parser.GetTLPParams().SupportsBasicDependencies())
            {
                if (PropertiesUtils.GetBool(props, buildGraphsProperty))
                {
                    log.Info("WARNING: " + buildGraphsProperty + " set to true, but " + this.parser.GetTLPParams().GetType() + " does not support dependencies");
                }
                this.BuildGraphs = false;
            }
            else
            {
                this.BuildGraphs = PropertiesUtils.GetBool(props, buildGraphsProperty, true);
            }
            if (this.BuildGraphs)
            {
                bool generateOriginalDependencies = PropertiesUtils.GetBool(props, annotatorName + ".originalDependencies", false);
                parser.GetTLPParams().SetGenerateOriginalDependencies(generateOriginalDependencies);
                ITreebankLanguagePack tlp         = parser.GetTLPParams().TreebankLanguagePack();
                IPredicate <string>   punctFilter = this.keepPunct ? Filters.AcceptFilter() : tlp.PunctuationWordRejectFilter();
                this.gsf = tlp.GrammaticalStructureFactory(punctFilter, parser.GetTLPParams().TypedDependencyHeadFinder());
            }
            else
            {
                this.gsf = null;
            }
            this.nThreads = PropertiesUtils.GetInt(props, annotatorName + ".nthreads", PropertiesUtils.GetInt(props, "nthreads", 1));
            bool usesBinary = StanfordCoreNLP.UsesBinaryTrees(props);

            this.saveBinaryTrees   = PropertiesUtils.GetBool(props, annotatorName + ".binaryTrees", usesBinary);
            this.noSquash          = PropertiesUtils.GetBool(props, annotatorName + ".nosquash", false);
            this.extraDependencies = MetaClass.Cast(props.GetProperty(annotatorName + ".extradependencies", "NONE"), typeof(GrammaticalStructure.Extras));
        }
Beispiel #19
0
        public Options(string name, Properties props)
        {
            includeRange     = PropertiesUtils.GetBool(props, name + ".includeRange", includeRange);
            markTimeRanges   = PropertiesUtils.GetBool(props, name + ".markTimeRanges", markTimeRanges);
            includeNested    = PropertiesUtils.GetBool(props, name + ".includeNested", includeNested);
            restrictToTimex3 = PropertiesUtils.GetBool(props, name + ".restrictToTimex3", restrictToTimex3);
            teRelHeurLevel   = Options.RelativeHeuristicLevel.ValueOf(props.GetProperty(name + ".teRelHeurLevel", teRelHeurLevel.ToString()));
            verbose          = PropertiesUtils.GetBool(props, name + ".verbose", verbose);
            // set default rules by SUTime language
            language = props.GetProperty(name + ".language", language);
            if (!languageToRulesFiles.Keys.Contains(language))
            {
                language = "english";
            }
            grammarFilename = languageToRulesFiles[language];
            // override if rules are set by properties
            grammarFilename  = props.GetProperty(name + ".rules", grammarFilename);
            searchForDocDate = PropertiesUtils.GetBool(props, name + ".searchForDocDate", searchForDocDate);
            string binderProperty = props.GetProperty(name + ".binders");
            int    nBinders;

            string[] binderClasses;
            if (binderProperty == null)
            {
                nBinders      = DefaultBinders.Length;
                binderClasses = DefaultBinders;
            }
            else
            {
                nBinders      = PropertiesUtils.GetInt(props, name + ".binders", 0);
                binderClasses = new string[nBinders];
                for (int i = 0; i < nBinders; ++i)
                {
                    string binderPrefix = name + ".binder." + (i + 1);
                    binderClasses[i] = props.GetProperty(binderPrefix);
                }
            }
            if (nBinders > 0 && Runtime.GetProperty("STS") == null)
            {
                binders = new Env.IBinder[nBinders];
                for (int i = 0; i < nBinders; i++)
                {
                    int    bi           = i + 1;
                    string binderPrefix = name + ".binder." + bi;
                    try
                    {
                        Type binderClass = Sharpen.Runtime.GetType(binderClasses[i]);
                        binderPrefix = binderPrefix + ".";
                        binders[i]   = (Env.IBinder)System.Activator.CreateInstance(binderClass);
                        binders[i].Init(binderPrefix, props);
                    }
                    catch (Exception ex)
                    {
                        throw new Exception("Error initializing binder " + bi, ex);
                    }
                }
            }
        }
Beispiel #20
0
        public DependencyParseAnnotator(Properties properties)
        {
            string modelPath = PropertiesUtils.GetString(properties, "model", DependencyParser.DefaultModel);

            parser            = DependencyParser.LoadFromModelFile(modelPath, properties);
            nThreads          = PropertiesUtils.GetInt(properties, "testThreads", DefaultNthreads);
            maxTime           = PropertiesUtils.GetLong(properties, "sentenceTimeout", DefaultMaxtime);
            extraDependencies = MetaClass.Cast(properties.GetProperty("extradependencies", "NONE"), typeof(GrammaticalStructure.Extras));
        }
        // ---------- Mention Detection ----------
        public static CorefProperties.MentionDetectionType MdType(Properties props)
        {
            string type = PropertiesUtils.GetString(props, "coref.md.type", UseConstituencyParse(props) ? "RULE" : "dep");

            if (Sharpen.Runtime.EqualsIgnoreCase(type, "dep"))
            {
                type = "DEPENDENCY";
            }
            return(CorefProperties.MentionDetectionType.ValueOf(type.ToUpper()));
        }
Beispiel #22
0
        /// <summary>Annotate for coreference (statistical or hybrid)</summary>
        public virtual IAnnotator Coref(Properties properties)
        {
            Properties corefProperties   = PropertiesUtils.ExtractPrefixedProperties(properties, AnnotatorConstants.StanfordCoref + ".", true);
            Properties mentionProperties = PropertiesUtils.ExtractPrefixedProperties(properties, AnnotatorConstants.StanfordCorefMention + ".", true);
            Properties allPropsForCoref  = new Properties();

            allPropsForCoref.PutAll(corefProperties);
            allPropsForCoref.PutAll(mentionProperties);
            return(new CorefAnnotator(allPropsForCoref));
        }
        public NumberAnnotator(string name, Properties props)
        {
            string property = name + "." + BackgroundSymbolProperty;

            BackgroundSymbol = props.GetProperty(property, DefaultBackgroundSymbol);
            bool useSUTime = PropertiesUtils.GetBool(props, NumberSequenceClassifier.UseSutimeProperty, NumberSequenceClassifier.UseSutimeDefault);

            Verbose = false;
            nsc     = new NumberSequenceClassifier(useSUTime);
        }
Beispiel #24
0
        /// <summary>
        /// This factory method is used to create the NERClassifierCombiner used in NERCombinerAnnotator
        /// (and, thence, in StanfordCoreNLP).
        /// </summary>
        /// <param name="name">
        /// A "x.y" format property name prefix (the "x" part). This is commonly null,
        /// and then "ner" is used.  If it is the empty string, then no property prefix is used.
        /// </param>
        /// <param name="passDownProperties">
        /// Property names for which the property should be passed down
        /// to the NERClassifierCombiner. The default is not to pass down, but pass down is
        /// useful for things like charset encoding.
        /// </param>
        /// <param name="properties">
        /// Various properties, including a list in "ner.model".
        /// The used ones start with name + "." or are in passDownProperties
        /// </param>
        /// <returns>An NERClassifierCombiner with the given properties</returns>
        public static NERClassifierCombiner CreateNERClassifierCombiner(string name, ICollection <string> passDownProperties, Properties properties)
        {
            string prefix     = (name == null) ? "ner." : name.IsEmpty() ? string.Empty : name + '.';
            string modelNames = properties.GetProperty(prefix + "model");

            if (modelNames == null)
            {
                modelNames = DefaultPaths.DefaultNerThreeclassModel + ',' + DefaultPaths.DefaultNerMucModel + ',' + DefaultPaths.DefaultNerConllModel;
            }
            // but modelNames can still be empty string is set explicitly to be empty!
            string[] models;
            if (!modelNames.IsEmpty())
            {
                models = modelNames.Split(",");
            }
            else
            {
                // Allow for no real NER model - can just use numeric classifiers or SUTime
                log.Info("WARNING: no NER models specified");
                models = StringUtils.EmptyStringArray;
            }
            NERClassifierCombiner nerCombiner;

            try
            {
                bool       applyNumericClassifiers = PropertiesUtils.GetBool(properties, prefix + ApplyNumericClassifiersPropertyBase, ApplyNumericClassifiersDefault);
                bool       useSUTime     = PropertiesUtils.GetBool(properties, prefix + NumberSequenceClassifier.UseSutimePropertyBase, NumberSequenceClassifier.UseSutimeDefault);
                bool       applyRegexner = PropertiesUtils.GetBool(properties, NERClassifierCombiner.ApplyGazetteProperty, NERClassifierCombiner.ApplyGazetteDefault);
                Properties combinerProperties;
                if (passDownProperties != null)
                {
                    combinerProperties = PropertiesUtils.ExtractSelectedProperties(properties, passDownProperties);
                    if (useSUTime)
                    {
                        // Make sure SUTime parameters are included
                        Properties sutimeProps = PropertiesUtils.ExtractPrefixedProperties(properties, NumberSequenceClassifier.SutimeProperty + ".", true);
                        PropertiesUtils.OverWriteProperties(combinerProperties, sutimeProps);
                    }
                }
                else
                {
                    // if passDownProperties is null, just pass everything through
                    combinerProperties = properties;
                }
                //Properties combinerProperties = PropertiesUtils.extractSelectedProperties(properties, passDownProperties);
                NERClassifierCombiner.Language nerLanguage = NERClassifierCombiner.Language.FromString(properties.GetProperty(prefix + "language"), NERClassifierCombiner.Language.English);
                nerCombiner = new NERClassifierCombiner(applyNumericClassifiers, nerLanguage, useSUTime, applyRegexner, combinerProperties, models);
            }
            catch (IOException e)
            {
                throw new RuntimeIOException(e);
            }
            return(nerCombiner);
        }
Beispiel #25
0
            public virtual ITokenizer <IHasWord> GetTokenizer(Reader r, string extraOptions)
            {
                bool tokenizeNewlines = this.tokenizeNLs;

                if (extraOptions != null)
                {
                    Properties prop = StringUtils.StringToProperties(extraOptions);
                    tokenizeNewlines = PropertiesUtils.GetBool(prop, "tokenizeNLs", this.tokenizeNLs);
                }
                return(new WordSegmentingTokenizer(segmenter, WhitespaceTokenizer.NewCoreLabelWhitespaceTokenizer(r, tokenizeNewlines)));
            }
 public virtual void SetUpAdditionalRulesNER(Properties properties)
 {
     this.applyAdditionalRules = (!properties.GetProperty("ner.additional.regexner.mapping", string.Empty).Equals(string.Empty));
     if (this.applyAdditionalRules)
     {
         string     additionalRulesPrefix = "ner.additional.regexner";
         Properties additionalRulesProps  = PropertiesUtils.ExtractPrefixedProperties(properties, additionalRulesPrefix + ".", true);
         // build the additional rules ner TokensRegexNERAnnotator
         additionalRulesNERAnnotator = new TokensRegexNERAnnotator(additionalRulesPrefix, additionalRulesProps);
     }
 }
        public virtual void TestCustomSimpleSentence()
        {
            Annotation      ann        = new Annotation("CoNLL is neat. Better than XML.");
            string          outputKeys = "word,pos";
            StanfordCoreNLP pipeline   = new StanfordCoreNLP(PropertiesUtils.AsProperties("annotators", "tokenize, ssplit", "outputFormatOptions", outputKeys));

            pipeline.Annotate(ann);
            string actual   = new CoNLLOutputter(outputKeys).Print(ann);
            string expected = "CoNLL\t_\n" + "is\t_\n" + "neat\t_\n" + ".\t_\n" + '\n' + "Better\t_\n" + "than\t_\n" + "XML\t_\n" + ".\t_\n" + '\n';

            NUnit.Framework.Assert.AreEqual(expected, actual);
        }
        public virtual void TestSimpleSentence()
        {
            Annotation      ann      = new Annotation("CoNLL is neat. Better than XML.");
            StanfordCoreNLP pipeline = new StanfordCoreNLP(PropertiesUtils.AsProperties("annotators", "tokenize, ssplit"));

            pipeline.Annotate(ann);
            string actual   = new CoNLLOutputter().Print(ann);
            string expected = "1\tCoNLL\t_\t_\t_\t_\t_\n" + "2\tis\t_\t_\t_\t_\t_\n" + "3\tneat\t_\t_\t_\t_\t_\n" + "4\t.\t_\t_\t_\t_\t_\n" + '\n' + "1\tBetter\t_\t_\t_\t_\t_\n" + "2\tthan\t_\t_\t_\t_\t_\n" + "3\tXML\t_\t_\t_\t_\t_\n" + "4\t.\t_\t_\t_\t_\t_\n"
                              + '\n';

            NUnit.Framework.Assert.AreEqual(expected, actual);
        }
 public virtual void SetUpEntityMentionBuilding(Properties properties)
 {
     this.buildEntityMentions = PropertiesUtils.GetBool(properties, "ner.buildEntityMentions", true);
     if (this.buildEntityMentions)
     {
         string     entityMentionsPrefix = "ner.entitymentions";
         Properties entityMentionsProps  = PropertiesUtils.ExtractPrefixedProperties(properties, entityMentionsPrefix + ".", true);
         // pass language info to the entity mention annotator
         entityMentionsProps.SetProperty("ner.entitymentions.language", language.ToString());
         entityMentionsAnnotator = new EntityMentionsAnnotator(entityMentionsPrefix, entityMentionsProps);
     }
 }
        public ChineseSegmenterAnnotator(string name, Properties props)
        {
            string model = null;
            // Keep only the properties that apply to this annotator
            Properties modelProps = new Properties();
            string     desiredKey = name + '.';

            foreach (string key in props.StringPropertyNames())
            {
                if (key.StartsWith(desiredKey))
                {
                    // skip past name and the subsequent "."
                    string modelKey = Sharpen.Runtime.Substring(key, desiredKey.Length);
                    if (modelKey.Equals("model"))
                    {
                        model = props.GetProperty(key);
                    }
                    else
                    {
                        modelProps.SetProperty(modelKey, props.GetProperty(key));
                    }
                }
            }
            this.Verbose        = PropertiesUtils.GetBool(props, name + ".verbose", false);
            this.normalizeSpace = PropertiesUtils.GetBool(props, name + ".normalizeSpace", false);
            if (model == null)
            {
                throw new Exception("Expected a property " + name + ".model");
            }
            // don't write very much, because the CRFClassifier already reports loading
            if (Verbose)
            {
                log.Info("Loading Segmentation Model ... ");
            }
            try
            {
                segmenter = CRFClassifier.GetClassifier(model, modelProps);
            }
            catch (Exception e)
            {
                throw;
            }
            catch (Exception e)
            {
                throw new Exception(e);
            }
            // If newlines are treated as sentence split, we need to retain them in tokenization for ssplit to make use of them
            tokenizeNewline = (!props.GetProperty(StanfordCoreNLP.NewlineIsSentenceBreakProperty, "never").Equals("never")) || bool.ValueOf(props.GetProperty(StanfordCoreNLP.NewlineSplitterProperty, "false"));
            // record whether or not sentence splitting on two newlines ; if so, need to remove single newlines
            sentenceSplitOnTwoNewlines = props.GetProperty(StanfordCoreNLP.NewlineIsSentenceBreakProperty, "never").Equals("two");
        }