public CorefAnnotator(Properties props) { this.props = props; try { // if user tries to run with coref.language = ENGLISH and coref.algorithm = hybrid, throw Exception // we do not support those settings at this time if (CorefProperties.Algorithm(props).Equals(CorefProperties.CorefAlgorithmType.Hybrid) && CorefProperties.GetLanguage(props).Equals(Locale.English)) { log.Error("Error: coref.algorithm=hybrid is not supported for English, " + "please change coref.algorithm or coref.language"); throw new Exception(); } // suppress props.SetProperty("coref.printConLLLoadingMessage", "false"); corefSystem = new CorefSystem(props); props.Remove("coref.printConLLLoadingMessage"); } catch (Exception e) { log.Error("Error creating CorefAnnotator...terminating pipeline construction!"); log.Error(e); throw new Exception(e); } // unless custom mention detection is set, just use the default coref mention detector performMentionDetection = !PropertiesUtils.GetBool(props, "coref.useCustomMentionDetection", false); if (performMentionDetection) { mentionAnnotator = new CorefMentionAnnotator(props); } }
public CamundaWorkersBuilder SetProperties(Func <IConfiguration> configuration) { Properties = PropertiesUtils.Create <TProperties>(configuration()) ?? throw CamundaWorkersBuilderException.PropertiesAreEmpty(); CamundaClient = new SyrinxCamundaClientService(Properties.SyrinxProperties); ServiceCollection.AddScoped(factory => CamundaClient); return(this); }
public override bool SetOptions(Properties opts) { bool ret = base.SetOptions(opts); if (opts.Contains(ConfigParser.paramSplit)) { string splitFileName = opts.GetProperty(ConfigParser.paramSplit); splitSet = MakeSplitSet(splitFileName); } CcTagset = PropertiesUtils.GetBool(opts, ConfigParser.paramCCTagset, false); treebank = new MemoryTreebank(new FrenchXMLTreeReaderFactory(CcTagset), FrenchTreebankLanguagePack.FtbEncoding); if (lexMapper == null) { lexMapper = new DefaultMapper(); lexMapper.Setup(null, lexMapOptions.Split(",")); } if (pathsToMappings.Count != 0) { if (posMapper == null) { posMapper = new DefaultMapper(); } foreach (File path in pathsToMappings) { posMapper.Setup(path); } } return(ret); }
public TrueCaseAnnotator(string modelLoc, string classBias, string mixedCaseFileName, bool overwriteText, bool verbose) { this.overwriteText = overwriteText; this.verbose = verbose; Properties props = PropertiesUtils.AsProperties("loadClassifier", modelLoc, "mixedCaseMapFile", mixedCaseFileName, "classBias", classBias); trueCaser = new CRFBiasedClassifier <CoreLabel>(props); if (modelLoc != null) { trueCaser.LoadClassifierNoExceptions(modelLoc, props); } else { throw new Exception("Model location not specified for true-case classifier!"); } if (classBias != null) { StringTokenizer biases = new StringTokenizer(classBias, ","); while (biases.HasMoreTokens()) { StringTokenizer bias = new StringTokenizer(biases.NextToken(), ":"); string cname = bias.NextToken(); double w = double.Parse(bias.NextToken()); trueCaser.SetBiasWeight(cname, w); if (this.verbose) { log.Info("Setting bias for class " + cname + " to " + w); } } } // Load map containing mixed-case words: mixedCaseMap = LoadMixedCaseMap(mixedCaseFileName); }
public ArabicSegmenterAnnotator(string name, Properties props) { string model = null; // Keep only the properties that apply to this annotator Properties modelProps = new Properties(); string desiredKey = name + '.'; foreach (string key in props.StringPropertyNames()) { if (key.StartsWith(desiredKey)) { // skip past name and the subsequent "." string modelKey = Sharpen.Runtime.Substring(key, desiredKey.Length); if (modelKey.Equals("model")) { model = props.GetProperty(key); } else { modelProps.SetProperty(modelKey, props.GetProperty(key)); } } } this.Verbose = PropertiesUtils.GetBool(props, name + ".verbose", false); if (model == null) { throw new Exception("Expected a property " + name + ".model"); } LoadModel(model, modelProps); }
/// <summary>A fast, rule-based tokenizer for Modern Standard French.</summary> /// <remarks> /// A fast, rule-based tokenizer for Modern Standard French. /// Performs punctuation splitting and light tokenization by default. /// <p> /// Currently, this tokenizer does not do line splitting. It assumes that the input /// file is delimited by the system line separator. The output will be equivalently /// delimited. /// </remarks> /// <param name="args"/> public static void Main(string[] args) { Properties options = StringUtils.ArgsToProperties(args, ArgOptionDefs()); if (options.Contains("help")) { log.Info(Usage()); return; } // Lexer options ITokenizerFactory <CoreLabel> tf = options.Contains("ftb") ? FrenchTokenizer.FtbFactory() : FrenchTokenizer.Factory(); string orthoOptions = options.GetProperty("options", string.Empty); // When called from this main method, split on newline. No options for // more granular sentence splitting. orthoOptions = orthoOptions.IsEmpty() ? "tokenizeNLs" : orthoOptions + ",tokenizeNLs"; tf.SetOptions(orthoOptions); // Other options string encoding = options.GetProperty("encoding", "UTF-8"); bool toLower = PropertiesUtils.GetBool(options, "lowerCase", false); // Read the file from stdin int nLines = 0; int nTokens = 0; long startTime = Runtime.NanoTime(); try { ITokenizer <CoreLabel> tokenizer = tf.GetTokenizer(new InputStreamReader(Runtime.@in, encoding)); bool printSpace = false; while (tokenizer.MoveNext()) { ++nTokens; string word = tokenizer.Current.Word(); if (word.Equals(FrenchLexer.NewlineToken)) { ++nLines; printSpace = false; System.Console.Out.WriteLine(); } else { if (printSpace) { System.Console.Out.Write(" "); } string outputToken = toLower ? word.ToLower(Locale.French) : word; System.Console.Out.Write(outputToken); printSpace = true; } } } catch (UnsupportedEncodingException e) { log.Error(e); } long elapsedTime = Runtime.NanoTime() - startTime; double linesPerSec = (double)nLines / (elapsedTime / 1e9); System.Console.Error.Printf("Done! Tokenized %d lines (%d tokens) at %.2f lines/sec%n", nLines, nTokens, linesPerSec); }
public virtual void Init(string name, Properties props) { string prefix = (name == null) ? string.Empty : name + "."; string delimiterRegex = props.GetProperty(prefix + "delimiter"); if (delimiterRegex != null) { delimiterPattern = Pattern.Compile(delimiterRegex); } replaceWhitespace = PropertiesUtils.GetBool(props, prefix + "replaceWhitespace", replaceWhitespace); string mapString = props.GetProperty(prefix + "columns"); tokensAnnotationClassName = props.GetProperty(prefix + "tokens", "edu.stanford.nlp.ling.CoreAnnotations$TokensAnnotation"); string tokenFactoryClassName = props.GetProperty(prefix + "tokenFactory"); if (tokenFactoryClassName != null) { try { this.tokenFactory = (ICoreTokenFactory <IN>)System.Activator.CreateInstance(Sharpen.Runtime.GetType(tokenFactoryClassName)); } catch (Exception e) { throw new Exception(e); } } else { this.tokenFactory = (ICoreTokenFactory <IN>) new CoreLabelTokenFactory(); } Init(mapString, this.tokenFactory, this.tokensAnnotationClassName); }
/// <summary>Register an Annotator that can be created by the pool.</summary> /// <remarks> /// Register an Annotator that can be created by the pool. /// Note that factories are used here so that many possible annotators can /// be defined within the AnnotatorPool, but an Annotator is only created /// when one is actually needed. /// </remarks> /// <param name="name">The name to be associated with the Annotator.</param> /// <param name="props">The properties we are using to create the annotator</param> /// <param name="annotator"> /// A factory that creates an instance of the desired Annotator. /// This should be an instance of /// <see cref="Edu.Stanford.Nlp.Util.Lazy{E}.Cache{E}(Java.Util.Function.ISupplier{T})"/> /// , if we want /// the annotator pool to behave as a cache (i.e., evict old annotators /// when the GC requires it). /// </param> /// <returns>true if a new annotator was created; false if we reuse an existing one</returns> public virtual bool Register(string name, Properties props, Lazy <IAnnotator> annotator) { bool newAnnotator = false; string newSig = PropertiesUtils.GetSignature(name, props); lock (this.cachedAnnotators) { AnnotatorPool.CachedAnnotator oldAnnotator = this.cachedAnnotators[name]; if (oldAnnotator == null || !Objects.Equals(oldAnnotator.signature, newSig)) { // the new annotator uses different properties so we need to update! if (oldAnnotator != null) { // Try to get it from the global cache log.Debug("Replacing old annotator \"" + name + "\" with signature [" + oldAnnotator.signature + "] with new annotator with signature [" + newSig + "]"); } // Add the new annotator this.cachedAnnotators[name] = new AnnotatorPool.CachedAnnotator(newSig, annotator); // Unmount the old annotator Optional.OfNullable(oldAnnotator).FlatMap(null).IfPresent(null); // Register that we added an annotator newAnnotator = true; } } // nothing to do if an annotator with same name and signature already exists return(newAnnotator); }
/// <summary> /// Fix tree structure, phrasal categories and part-of-speech labels in newly expanded /// multi-word tokens. /// </summary> /// <exception cref="System.Exception"/> /// <exception cref="Java.Util.Concurrent.ExecutionException"/> private IList <Tree> FixMultiWordTokens(IList <Tree> trees) { bool ner = PropertiesUtils.GetBool(options, "ner", false); // Shared resources IFactory <TreeNormalizer> tnf = new _IFactory_389(); ITreeFactory tf = new LabeledScoredTreeFactory(); IThreadsafeProcessor <ICollection <Tree>, ICollection <Tree> > processor = new AnCoraProcessor.MultiWordProcessor(this, tnf, tf, ner); int availableProcessors = Runtime.GetRuntime().AvailableProcessors(); MulticoreWrapper <ICollection <Tree>, ICollection <Tree> > wrapper = new MulticoreWrapper <ICollection <Tree>, ICollection <Tree> >(availableProcessors, processor, false); // Chunk our work so that parallelization is actually worth it int numChunks = availableProcessors * 20; IList <IList <Tree> > chunked = CollectionUtils.PartitionIntoFolds(trees, numChunks); IList <Tree> ret = new List <Tree>(); foreach (ICollection <Tree> coll in chunked) { wrapper.Put(coll); while (wrapper.Peek()) { Sharpen.Collections.AddAll(ret, wrapper.Poll()); } } wrapper.Join(); while (wrapper.Peek()) { Sharpen.Collections.AddAll(ret, wrapper.Poll()); } return(ret); }
public virtual ITokenizer <T> GetTokenizer(Reader r, string extraOptions) { Properties prop = StringUtils.StringToProperties(extraOptions); bool tokenizeNewlines = PropertiesUtils.GetBool(prop, "tokenizeNLs", this.tokenizeNLs); return(new WhitespaceTokenizer <T>(factory, r, tokenizeNewlines)); }
public WhitespaceTokenizerFactory(ILexedTokenFactory <T> factory, string options) { this.factory = factory; Properties prop = StringUtils.StringToProperties(options); this.tokenizeNLs = PropertiesUtils.GetBool(prop, "tokenizeNLs", false); }
public void ValidaCep() { try { string cep = "91450080"; string logName = "logConsulta" + DateTime.Now.ToString("dd_MM_yyyy_HH_mm_ss_FFF"); Directory.CreateDirectory(ProjConfig.GetPath("\\TestResults\\") + logName + "\\"); File.Create((ProjConfig.GetPath("\\TestResults\\") + logName + "\\" + logName + ".txt")).Close(); log = new PropertiesUtils(ProjConfig.GetPath("\\TestResults\\") + logName + "\\" + logName + ".txt"); log.Save(); int i = 1; if (cep.Length == 8) { RestClient restClient = new RestClient(string.Format("https://viacep.com.br/ws/{0}/json/ ", cep)); RestRequest restRequest = new RestRequest(Method.GET); IRestResponse restResponse = restClient.Execute(restRequest); if (restResponse.StatusCode == System.Net.HttpStatusCode.BadRequest) { Console.WriteLine("Erro na requisição da API" + restResponse.Content); } else { DadosRetorno dadosRetorno = new JsonDeserializer().Deserialize <DadosRetorno>(restResponse); if (dadosRetorno.cep is null) { Console.WriteLine("Cep não encontado na base de dados"); log.Set("Cep não encontado na base de dados: " + ToString(), dadosRetorno.Equals(null)); log.Save(); return; } log.Set("CEP: " + i.ToString(), dadosRetorno.cep); log.Set("Logradouro: " + i.ToString(), dadosRetorno.logradouro); log.Set("Complemento: " + i.ToString(), dadosRetorno.complemento); log.Set("Bairro: " + i.ToString(), dadosRetorno.bairro); log.Set("Localidade: " + i.ToString(), dadosRetorno.localidade); log.Set("UF: " + i.ToString(), dadosRetorno.uf); log.Set("Unidade: " + i.ToString(), dadosRetorno.unidade); log.Set("IBGE: " + i.ToString(), dadosRetorno.ibge); log.Set("GIA: " + i.ToString(), dadosRetorno.gia); log.Save(); } } else { Console.WriteLine("Cep inválido verifique o formato informado!"); log.Set("Cep inválido verifique o formato informado! " + ToString(), cep.GetType()); log.Save(); return; } } catch (Exception erro) { Console.WriteLine("Erro" + erro.Message); } }
static void Main(string[] args) { var environment = Environment.GetEnvironmentVariable("ASPNETCORE_ENVIRONMENT"); var configuration = ConfigurationUtils.GetEnvironmentConfiguration(environment); var properties = PropertiesUtils.Create <ProjectProperties>(configuration); ConsoleApplicationUtils.Launch(() => DatabaseMigrationUtils.Perform(args, properties.DatabaseConnectionProperties, typeof(Program).Assembly)); }
public WhitespaceTokenizerFactory(LexedTokenFactory <T> factory, String options) { this.factory = factory; Dictionary <String, String> prop = StringUtils.stringToProperties(options); this.tokenizeNLs = PropertiesUtils.getBool(prop, "tokenizeNLs", false); }
public Tokenizer <T> getTokenizer(TextReader r, String extraOptions) { Dictionary <String, String> prop = StringUtils.stringToProperties(extraOptions); bool tokenizeNewlines = PropertiesUtils.getBool(prop, "tokenizeNLs", this.tokenizeNLs); return(new WhitespaceTokenizer <T>(factory, r, tokenizeNewlines)); }
public static void Main(string[] args) { if (args.Length < minArgs) { System.Console.Out.WriteLine(Usage()); System.Environment.Exit(-1); } Properties options = StringUtils.ArgsToProperties(args, ArgDefs()); Language language = PropertiesUtils.Get(options, "l", Language.English, typeof(Language)); ITreebankLangParserParams tlpp = language.@params; DiskTreebank tb = null; string encoding = options.GetProperty("l", "UTF-8"); bool removeBracket = PropertiesUtils.GetBool(options, "b", false); tlpp.SetInputEncoding(encoding); tlpp.SetOutputEncoding(encoding); tb = tlpp.DiskTreebank(); string[] files = options.GetProperty(string.Empty, string.Empty).Split("\\s+"); if (files.Length != 0) { foreach (string filename in files) { tb.LoadPath(filename); } } else { log.Info(Usage()); System.Environment.Exit(-1); } PrintWriter pwo = tlpp.Pw(); string startSymbol = tlpp.TreebankLanguagePack().StartSymbol(); ITreeFactory tf = new LabeledScoredTreeFactory(); int nTrees = 0; foreach (Tree t in tb) { if (removeBracket) { if (t.Value().Equals(startSymbol)) { t = t.FirstChild(); } } else { if (!t.Value().Equals(startSymbol)) { //Add a bracket if it isn't already there t = tf.NewTreeNode(startSymbol, Java.Util.Collections.SingletonList(t)); } } pwo.Println(t.ToString()); nTrees++; } pwo.Close(); System.Console.Error.Printf("Processed %d trees.%n", nTrees); }
// static demo class /// <exception cref="System.IO.IOException"/> public static void Main(string[] args) { if (args.Length < 2) { System.Console.Error.WriteLine("TokensRegexMatcher rules file [outFile]"); return; } string rules = args[0]; PrintWriter @out; if (args.Length > 2) { @out = new PrintWriter(args[2]); } else { @out = new PrintWriter(System.Console.Out); } StanfordCoreNLP pipeline = new StanfordCoreNLP(PropertiesUtils.AsProperties("annotators", "tokenize,ssplit,pos,lemma,ner")); Annotation annotation = new Annotation(IOUtils.SlurpFileNoExceptions(args[1])); pipeline.Annotate(annotation); // Load lines of file as TokenSequencePatterns IList <TokenSequencePattern> tokenSequencePatterns = new List <TokenSequencePattern>(); foreach (string line in ObjectBank.GetLineIterator(rules)) { TokenSequencePattern pattern = TokenSequencePattern.Compile(line); tokenSequencePatterns.Add(pattern); } IList <ICoreMap> sentences = annotation.Get(typeof(CoreAnnotations.SentencesAnnotation)); int i = 0; foreach (ICoreMap sentence in sentences) { IList <CoreLabel> tokens = sentence.Get(typeof(CoreAnnotations.TokensAnnotation)); @out.Println("Sentence #" + ++i); @out.Print(" Tokens:"); foreach (CoreLabel token in tokens) { @out.Print(' '); @out.Print(token.ToShortString("Text", "PartOfSpeech", "NamedEntityTag")); } @out.Println(); MultiPatternMatcher <ICoreMap> multiMatcher = TokenSequencePattern.GetMultiPatternMatcher(tokenSequencePatterns); IList <ISequenceMatchResult <ICoreMap> > answers = multiMatcher.FindNonOverlapping(tokens); int j = 0; foreach (ISequenceMatchResult <ICoreMap> matched in answers) { @out.Println(" Match #" + ++j); for (int k = 0; k <= matched.GroupCount(); k++) { @out.Println(" group " + k + " = " + matched.Group(k)); } } } @out.Flush(); }
public ParserAnnotator(string annotatorName, Properties props) { string model = props.GetProperty(annotatorName + ".model", LexicalizedParser.DefaultParserLoc); if (model == null) { throw new ArgumentException("No model specified for Parser annotator " + annotatorName); } this.Verbose = PropertiesUtils.GetBool(props, annotatorName + ".debug", false); string[] flags = ConvertFlagsToArray(props.GetProperty(annotatorName + ".flags")); this.parser = LoadModel(model, Verbose, flags); this.maxSentenceLength = PropertiesUtils.GetInt(props, annotatorName + ".maxlen", -1); string treeMapClass = props.GetProperty(annotatorName + ".treemap"); if (treeMapClass == null) { this.treeMap = null; } else { this.treeMap = ReflectionLoading.LoadByReflection(treeMapClass, props); } this.maxParseTime = PropertiesUtils.GetLong(props, annotatorName + ".maxtime", -1); this.kBest = PropertiesUtils.GetInt(props, annotatorName + ".kbest", 1); this.keepPunct = PropertiesUtils.GetBool(props, annotatorName + ".keepPunct", true); string buildGraphsProperty = annotatorName + ".buildgraphs"; if (!this.parser.GetTLPParams().SupportsBasicDependencies()) { if (PropertiesUtils.GetBool(props, buildGraphsProperty)) { log.Info("WARNING: " + buildGraphsProperty + " set to true, but " + this.parser.GetTLPParams().GetType() + " does not support dependencies"); } this.BuildGraphs = false; } else { this.BuildGraphs = PropertiesUtils.GetBool(props, buildGraphsProperty, true); } if (this.BuildGraphs) { bool generateOriginalDependencies = PropertiesUtils.GetBool(props, annotatorName + ".originalDependencies", false); parser.GetTLPParams().SetGenerateOriginalDependencies(generateOriginalDependencies); ITreebankLanguagePack tlp = parser.GetTLPParams().TreebankLanguagePack(); IPredicate <string> punctFilter = this.keepPunct ? Filters.AcceptFilter() : tlp.PunctuationWordRejectFilter(); this.gsf = tlp.GrammaticalStructureFactory(punctFilter, parser.GetTLPParams().TypedDependencyHeadFinder()); } else { this.gsf = null; } this.nThreads = PropertiesUtils.GetInt(props, annotatorName + ".nthreads", PropertiesUtils.GetInt(props, "nthreads", 1)); bool usesBinary = StanfordCoreNLP.UsesBinaryTrees(props); this.saveBinaryTrees = PropertiesUtils.GetBool(props, annotatorName + ".binaryTrees", usesBinary); this.noSquash = PropertiesUtils.GetBool(props, annotatorName + ".nosquash", false); this.extraDependencies = MetaClass.Cast(props.GetProperty(annotatorName + ".extradependencies", "NONE"), typeof(GrammaticalStructure.Extras)); }
public Options(string name, Properties props) { includeRange = PropertiesUtils.GetBool(props, name + ".includeRange", includeRange); markTimeRanges = PropertiesUtils.GetBool(props, name + ".markTimeRanges", markTimeRanges); includeNested = PropertiesUtils.GetBool(props, name + ".includeNested", includeNested); restrictToTimex3 = PropertiesUtils.GetBool(props, name + ".restrictToTimex3", restrictToTimex3); teRelHeurLevel = Options.RelativeHeuristicLevel.ValueOf(props.GetProperty(name + ".teRelHeurLevel", teRelHeurLevel.ToString())); verbose = PropertiesUtils.GetBool(props, name + ".verbose", verbose); // set default rules by SUTime language language = props.GetProperty(name + ".language", language); if (!languageToRulesFiles.Keys.Contains(language)) { language = "english"; } grammarFilename = languageToRulesFiles[language]; // override if rules are set by properties grammarFilename = props.GetProperty(name + ".rules", grammarFilename); searchForDocDate = PropertiesUtils.GetBool(props, name + ".searchForDocDate", searchForDocDate); string binderProperty = props.GetProperty(name + ".binders"); int nBinders; string[] binderClasses; if (binderProperty == null) { nBinders = DefaultBinders.Length; binderClasses = DefaultBinders; } else { nBinders = PropertiesUtils.GetInt(props, name + ".binders", 0); binderClasses = new string[nBinders]; for (int i = 0; i < nBinders; ++i) { string binderPrefix = name + ".binder." + (i + 1); binderClasses[i] = props.GetProperty(binderPrefix); } } if (nBinders > 0 && Runtime.GetProperty("STS") == null) { binders = new Env.IBinder[nBinders]; for (int i = 0; i < nBinders; i++) { int bi = i + 1; string binderPrefix = name + ".binder." + bi; try { Type binderClass = Sharpen.Runtime.GetType(binderClasses[i]); binderPrefix = binderPrefix + "."; binders[i] = (Env.IBinder)System.Activator.CreateInstance(binderClass); binders[i].Init(binderPrefix, props); } catch (Exception ex) { throw new Exception("Error initializing binder " + bi, ex); } } } }
public DependencyParseAnnotator(Properties properties) { string modelPath = PropertiesUtils.GetString(properties, "model", DependencyParser.DefaultModel); parser = DependencyParser.LoadFromModelFile(modelPath, properties); nThreads = PropertiesUtils.GetInt(properties, "testThreads", DefaultNthreads); maxTime = PropertiesUtils.GetLong(properties, "sentenceTimeout", DefaultMaxtime); extraDependencies = MetaClass.Cast(properties.GetProperty("extradependencies", "NONE"), typeof(GrammaticalStructure.Extras)); }
// ---------- Mention Detection ---------- public static CorefProperties.MentionDetectionType MdType(Properties props) { string type = PropertiesUtils.GetString(props, "coref.md.type", UseConstituencyParse(props) ? "RULE" : "dep"); if (Sharpen.Runtime.EqualsIgnoreCase(type, "dep")) { type = "DEPENDENCY"; } return(CorefProperties.MentionDetectionType.ValueOf(type.ToUpper())); }
/// <summary>Annotate for coreference (statistical or hybrid)</summary> public virtual IAnnotator Coref(Properties properties) { Properties corefProperties = PropertiesUtils.ExtractPrefixedProperties(properties, AnnotatorConstants.StanfordCoref + ".", true); Properties mentionProperties = PropertiesUtils.ExtractPrefixedProperties(properties, AnnotatorConstants.StanfordCorefMention + ".", true); Properties allPropsForCoref = new Properties(); allPropsForCoref.PutAll(corefProperties); allPropsForCoref.PutAll(mentionProperties); return(new CorefAnnotator(allPropsForCoref)); }
public NumberAnnotator(string name, Properties props) { string property = name + "." + BackgroundSymbolProperty; BackgroundSymbol = props.GetProperty(property, DefaultBackgroundSymbol); bool useSUTime = PropertiesUtils.GetBool(props, NumberSequenceClassifier.UseSutimeProperty, NumberSequenceClassifier.UseSutimeDefault); Verbose = false; nsc = new NumberSequenceClassifier(useSUTime); }
/// <summary> /// This factory method is used to create the NERClassifierCombiner used in NERCombinerAnnotator /// (and, thence, in StanfordCoreNLP). /// </summary> /// <param name="name"> /// A "x.y" format property name prefix (the "x" part). This is commonly null, /// and then "ner" is used. If it is the empty string, then no property prefix is used. /// </param> /// <param name="passDownProperties"> /// Property names for which the property should be passed down /// to the NERClassifierCombiner. The default is not to pass down, but pass down is /// useful for things like charset encoding. /// </param> /// <param name="properties"> /// Various properties, including a list in "ner.model". /// The used ones start with name + "." or are in passDownProperties /// </param> /// <returns>An NERClassifierCombiner with the given properties</returns> public static NERClassifierCombiner CreateNERClassifierCombiner(string name, ICollection <string> passDownProperties, Properties properties) { string prefix = (name == null) ? "ner." : name.IsEmpty() ? string.Empty : name + '.'; string modelNames = properties.GetProperty(prefix + "model"); if (modelNames == null) { modelNames = DefaultPaths.DefaultNerThreeclassModel + ',' + DefaultPaths.DefaultNerMucModel + ',' + DefaultPaths.DefaultNerConllModel; } // but modelNames can still be empty string is set explicitly to be empty! string[] models; if (!modelNames.IsEmpty()) { models = modelNames.Split(","); } else { // Allow for no real NER model - can just use numeric classifiers or SUTime log.Info("WARNING: no NER models specified"); models = StringUtils.EmptyStringArray; } NERClassifierCombiner nerCombiner; try { bool applyNumericClassifiers = PropertiesUtils.GetBool(properties, prefix + ApplyNumericClassifiersPropertyBase, ApplyNumericClassifiersDefault); bool useSUTime = PropertiesUtils.GetBool(properties, prefix + NumberSequenceClassifier.UseSutimePropertyBase, NumberSequenceClassifier.UseSutimeDefault); bool applyRegexner = PropertiesUtils.GetBool(properties, NERClassifierCombiner.ApplyGazetteProperty, NERClassifierCombiner.ApplyGazetteDefault); Properties combinerProperties; if (passDownProperties != null) { combinerProperties = PropertiesUtils.ExtractSelectedProperties(properties, passDownProperties); if (useSUTime) { // Make sure SUTime parameters are included Properties sutimeProps = PropertiesUtils.ExtractPrefixedProperties(properties, NumberSequenceClassifier.SutimeProperty + ".", true); PropertiesUtils.OverWriteProperties(combinerProperties, sutimeProps); } } else { // if passDownProperties is null, just pass everything through combinerProperties = properties; } //Properties combinerProperties = PropertiesUtils.extractSelectedProperties(properties, passDownProperties); NERClassifierCombiner.Language nerLanguage = NERClassifierCombiner.Language.FromString(properties.GetProperty(prefix + "language"), NERClassifierCombiner.Language.English); nerCombiner = new NERClassifierCombiner(applyNumericClassifiers, nerLanguage, useSUTime, applyRegexner, combinerProperties, models); } catch (IOException e) { throw new RuntimeIOException(e); } return(nerCombiner); }
public virtual ITokenizer <IHasWord> GetTokenizer(Reader r, string extraOptions) { bool tokenizeNewlines = this.tokenizeNLs; if (extraOptions != null) { Properties prop = StringUtils.StringToProperties(extraOptions); tokenizeNewlines = PropertiesUtils.GetBool(prop, "tokenizeNLs", this.tokenizeNLs); } return(new WordSegmentingTokenizer(segmenter, WhitespaceTokenizer.NewCoreLabelWhitespaceTokenizer(r, tokenizeNewlines))); }
public virtual void SetUpAdditionalRulesNER(Properties properties) { this.applyAdditionalRules = (!properties.GetProperty("ner.additional.regexner.mapping", string.Empty).Equals(string.Empty)); if (this.applyAdditionalRules) { string additionalRulesPrefix = "ner.additional.regexner"; Properties additionalRulesProps = PropertiesUtils.ExtractPrefixedProperties(properties, additionalRulesPrefix + ".", true); // build the additional rules ner TokensRegexNERAnnotator additionalRulesNERAnnotator = new TokensRegexNERAnnotator(additionalRulesPrefix, additionalRulesProps); } }
public virtual void TestCustomSimpleSentence() { Annotation ann = new Annotation("CoNLL is neat. Better than XML."); string outputKeys = "word,pos"; StanfordCoreNLP pipeline = new StanfordCoreNLP(PropertiesUtils.AsProperties("annotators", "tokenize, ssplit", "outputFormatOptions", outputKeys)); pipeline.Annotate(ann); string actual = new CoNLLOutputter(outputKeys).Print(ann); string expected = "CoNLL\t_\n" + "is\t_\n" + "neat\t_\n" + ".\t_\n" + '\n' + "Better\t_\n" + "than\t_\n" + "XML\t_\n" + ".\t_\n" + '\n'; NUnit.Framework.Assert.AreEqual(expected, actual); }
public virtual void TestSimpleSentence() { Annotation ann = new Annotation("CoNLL is neat. Better than XML."); StanfordCoreNLP pipeline = new StanfordCoreNLP(PropertiesUtils.AsProperties("annotators", "tokenize, ssplit")); pipeline.Annotate(ann); string actual = new CoNLLOutputter().Print(ann); string expected = "1\tCoNLL\t_\t_\t_\t_\t_\n" + "2\tis\t_\t_\t_\t_\t_\n" + "3\tneat\t_\t_\t_\t_\t_\n" + "4\t.\t_\t_\t_\t_\t_\n" + '\n' + "1\tBetter\t_\t_\t_\t_\t_\n" + "2\tthan\t_\t_\t_\t_\t_\n" + "3\tXML\t_\t_\t_\t_\t_\n" + "4\t.\t_\t_\t_\t_\t_\n" + '\n'; NUnit.Framework.Assert.AreEqual(expected, actual); }
public virtual void SetUpEntityMentionBuilding(Properties properties) { this.buildEntityMentions = PropertiesUtils.GetBool(properties, "ner.buildEntityMentions", true); if (this.buildEntityMentions) { string entityMentionsPrefix = "ner.entitymentions"; Properties entityMentionsProps = PropertiesUtils.ExtractPrefixedProperties(properties, entityMentionsPrefix + ".", true); // pass language info to the entity mention annotator entityMentionsProps.SetProperty("ner.entitymentions.language", language.ToString()); entityMentionsAnnotator = new EntityMentionsAnnotator(entityMentionsPrefix, entityMentionsProps); } }
public ChineseSegmenterAnnotator(string name, Properties props) { string model = null; // Keep only the properties that apply to this annotator Properties modelProps = new Properties(); string desiredKey = name + '.'; foreach (string key in props.StringPropertyNames()) { if (key.StartsWith(desiredKey)) { // skip past name and the subsequent "." string modelKey = Sharpen.Runtime.Substring(key, desiredKey.Length); if (modelKey.Equals("model")) { model = props.GetProperty(key); } else { modelProps.SetProperty(modelKey, props.GetProperty(key)); } } } this.Verbose = PropertiesUtils.GetBool(props, name + ".verbose", false); this.normalizeSpace = PropertiesUtils.GetBool(props, name + ".normalizeSpace", false); if (model == null) { throw new Exception("Expected a property " + name + ".model"); } // don't write very much, because the CRFClassifier already reports loading if (Verbose) { log.Info("Loading Segmentation Model ... "); } try { segmenter = CRFClassifier.GetClassifier(model, modelProps); } catch (Exception e) { throw; } catch (Exception e) { throw new Exception(e); } // If newlines are treated as sentence split, we need to retain them in tokenization for ssplit to make use of them tokenizeNewline = (!props.GetProperty(StanfordCoreNLP.NewlineIsSentenceBreakProperty, "never").Equals("never")) || bool.ValueOf(props.GetProperty(StanfordCoreNLP.NewlineSplitterProperty, "false")); // record whether or not sentence splitting on two newlines ; if so, need to remove single newlines sentenceSplitOnTwoNewlines = props.GetProperty(StanfordCoreNLP.NewlineIsSentenceBreakProperty, "never").Equals("two"); }