/// <summary>A fast, rule-based tokenizer for Modern Standard French.</summary> /// <remarks> /// A fast, rule-based tokenizer for Modern Standard French. /// Performs punctuation splitting and light tokenization by default. /// <p> /// Currently, this tokenizer does not do line splitting. It assumes that the input /// file is delimited by the system line separator. The output will be equivalently /// delimited. /// </remarks> /// <param name="args"/> public static void Main(string[] args) { Properties options = StringUtils.ArgsToProperties(args, ArgOptionDefs()); if (options.Contains("help")) { log.Info(Usage()); return; } // Lexer options ITokenizerFactory <CoreLabel> tf = options.Contains("ftb") ? FrenchTokenizer.FtbFactory() : FrenchTokenizer.Factory(); string orthoOptions = options.GetProperty("options", string.Empty); // When called from this main method, split on newline. No options for // more granular sentence splitting. orthoOptions = orthoOptions.IsEmpty() ? "tokenizeNLs" : orthoOptions + ",tokenizeNLs"; tf.SetOptions(orthoOptions); // Other options string encoding = options.GetProperty("encoding", "UTF-8"); bool toLower = PropertiesUtils.GetBool(options, "lowerCase", false); // Read the file from stdin int nLines = 0; int nTokens = 0; long startTime = Runtime.NanoTime(); try { ITokenizer <CoreLabel> tokenizer = tf.GetTokenizer(new InputStreamReader(Runtime.@in, encoding)); bool printSpace = false; while (tokenizer.MoveNext()) { ++nTokens; string word = tokenizer.Current.Word(); if (word.Equals(FrenchLexer.NewlineToken)) { ++nLines; printSpace = false; System.Console.Out.WriteLine(); } else { if (printSpace) { System.Console.Out.Write(" "); } string outputToken = toLower ? word.ToLower(Locale.French) : word; System.Console.Out.Write(outputToken); printSpace = true; } } } catch (UnsupportedEncodingException e) { log.Error(e); } long elapsedTime = Runtime.NanoTime() - startTime; double linesPerSec = (double)nLines / (elapsedTime / 1e9); System.Console.Error.Printf("Done! Tokenized %d lines (%d tokens) at %.2f lines/sec%n", nLines, nTokens, linesPerSec); }
/// <exception cref="System.Exception"/> public static void Main(string[] args) { Runtime.SetOut(new TextWriter(System.Console.Out, true, "UTF-8")); Runtime.SetErr(new TextWriter(System.Console.Error, true, "UTF-8")); Properties config = StringUtils.ArgsToProperties(args); CheckArgs(config); Edu.Stanford.Nlp.Tagger.Util.CountClosedTags cct = new Edu.Stanford.Nlp.Tagger.Util.CountClosedTags(config); string trainFiles = config.GetProperty(TrainFileProperty); string testFiles = config.GetProperty(TestFileProperty); IList <TaggedFileRecord> files = TaggedFileRecord.CreateRecords(config, trainFiles); foreach (TaggedFileRecord file in files) { cct.CountTrainingTags(file); } if (testFiles != null) { files = TaggedFileRecord.CreateRecords(config, testFiles); foreach (TaggedFileRecord file_1 in files) { cct.CountTestTags(file_1); } } cct.Report(); }
/// <summary>Some basic testing of the ClassifierCombiner.</summary> /// <param name="args">Command-line arguments as properties: -loadClassifier1 serializedFile -loadClassifier2 serializedFile</param> /// <exception cref="System.Exception">If IO or serialization error loading classifiers</exception> public static void Main(string[] args) { Properties props = StringUtils.ArgsToProperties(args); Edu.Stanford.Nlp.IE.ClassifierCombiner ec = new Edu.Stanford.Nlp.IE.ClassifierCombiner(props); log.Info(ec.ClassifyToString("Marketing : Sony Hopes to Win Much Bigger Market For Wide Range of Small-Video Products --- By Andrew B. Cohen Staff Reporter of The Wall Street Journal")); }
/// <exception cref="System.Exception"/> public static void Main(string[] args) { StringUtils.LogInvocationString(log, args); Properties props = StringUtils.ArgsToProperties(args); CRFClassifier <CoreLabel> crf = new CRFClassifier <CoreLabel>(props); string inputFile = crf.flags.trainFile; if (inputFile == null) { log.Info("Please provide input file using -trainFile"); System.Environment.Exit(-1); } string outputFile = crf.flags.exportFeatures; if (outputFile == null) { log.Info("Please provide output file using -exportFeatures"); System.Environment.Exit(-1); } Edu.Stanford.Nlp.IE.Crf.CRFFeatureExporter <CoreLabel> featureExporter = new Edu.Stanford.Nlp.IE.Crf.CRFFeatureExporter <CoreLabel>(crf); ICollection <IList <CoreLabel> > docs = crf.MakeObjectBankFromFile(inputFile, crf.MakeReaderAndWriter()); crf.MakeAnswerArraysAndTagIndex(docs); featureExporter.PrintFeatures(outputFile, docs); }
/// <exception cref="System.IO.IOException"/> public static void Main(string[] args) { if (args.Length < 1) { log.Info(usage); System.Environment.Exit(1); } Properties options = StringUtils.ArgsToProperties(args, argOptionDefs); string outputPath = options.GetProperty("o"); if (outputPath == null) { throw new ArgumentException("-o argument (output path for built tagger) is required"); } string[] remainingArgs = options.GetProperty(string.Empty).Split(" "); IList <File> fileList = new List <File>(); foreach (string arg in remainingArgs) { fileList.Add(new File(arg)); } Edu.Stanford.Nlp.International.Spanish.Pipeline.AnCoraPOSStats stats = new Edu.Stanford.Nlp.International.Spanish.Pipeline.AnCoraPOSStats(fileList, outputPath); stats.Process(); ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(outputPath)); TwoDimensionalCounter <string, string> tagger = stats.GetUnigramTagger(); oos.WriteObject(tagger); System.Console.Out.Printf("Wrote tagger to %s%n", outputPath); }
/// <exception cref="System.Exception"/> public static void Main(string[] args) { Properties props = StringUtils.ArgsToProperties(args); Edu.Stanford.Nlp.Coref.CorefSystem coref = new Edu.Stanford.Nlp.Coref.CorefSystem(props); coref.RunOnConll(props); }
/// <exception cref="System.Exception"/> public static void Main(string[] args) { Properties props = StringUtils.ArgsToProperties(args); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); string file = props.GetProperty("file"); string loadFile = props.GetProperty("loadFile"); if (loadFile != null && !loadFile.IsEmpty()) { Edu.Stanford.Nlp.Pipeline.CustomAnnotationSerializer ser = new Edu.Stanford.Nlp.Pipeline.CustomAnnotationSerializer(false, false); InputStream @is = new FileInputStream(loadFile); Pair <Annotation, InputStream> pair = ser.Read(@is); pair.second.Close(); Annotation anno = pair.first; System.Console.Out.WriteLine(anno.ToShorterString(StringUtils.EmptyStringArray)); @is.Close(); } else { if (file != null && !file.Equals(string.Empty)) { string text = IOUtils.SlurpFile(file); Annotation doc = new Annotation(text); pipeline.Annotate(doc); Edu.Stanford.Nlp.Pipeline.CustomAnnotationSerializer ser = new Edu.Stanford.Nlp.Pipeline.CustomAnnotationSerializer(false, false); TextWriter os = new TextWriter(new FileOutputStream(file + ".ser")); ser.Write(doc, os).Close(); log.Info("Serialized annotation saved in " + file + ".ser"); } else { log.Info("usage: CustomAnnotationSerializer [-file file] [-loadFile file]"); } } }
/// <exception cref="System.Exception"/> public static void Main(string[] args) { Properties props; if (args.Length > 0) { props = StringUtils.ArgsToProperties(args); } else { props = new Properties(); } if (!props.Contains("dcoref.conll2011")) { log.Info("-dcoref.conll2011 [input_CoNLL_corpus]: was not specified"); return; } if (!props.Contains("singleton.predictor.output")) { log.Info("-singleton.predictor.output [output_model_file]: was not specified"); return; } GeneralDataset <string, string> data = Edu.Stanford.Nlp.Coref.Misc.SingletonPredictor.GenerateFeatureVectors(props); LogisticClassifier <string, string> classifier = Edu.Stanford.Nlp.Coref.Misc.SingletonPredictor.Train(data); Edu.Stanford.Nlp.Coref.Misc.SingletonPredictor.SaveToSerialized(classifier, GetPathSingletonPredictor(props)); }
/// <exception cref="System.Exception"/> public static void Main(string[] args) { long startTime = Runtime.CurrentTimeMillis(); string text = "俄罗斯 航空 公司 一 名 官员 在 9号 说 , " + "米洛舍维奇 的 儿子 马可·米洛舍维奇 9号 早上 持 外交 护照 从 俄国 首都 莫斯科 搭机 飞往 中国 大陆 北京 , " + "可是 就 在 稍后 就 返回 莫斯科 。 " + "这 名 俄国 航空 公司 官员 说 马可 是 因为 护照 问题 而 在 北京 机场 被 中共 遣返 莫斯科 。 " + "北京 机场 方面 的 这 项 举动 清楚 显示 中共 有意 放弃 在 总统 大选 落败 的 前 南斯拉夫 总统 米洛舍维奇 , " + "因此 他 在 南斯拉夫 受到 民众 厌恶 的 儿子 马可 才 会 在 北京 机场 被 中共 当局 送回 莫斯科 。 " + "马可 持 外交 护照 能够 顺利 搭机 离开 莫斯科 , 但是 却 在 北京 受阻 , 可 算是 踢到 了 铁板 。 " + "可是 这 项 消息 和 先前 外界 谣传 中共 当局 准备 提供 米洛舍维奇 和 他 的 家人 安全 庇护所 有 着 很 大 的 出入 ," + " 一般 认为 在 去年 米洛舍维奇 挥兵 攻打 科索沃 境内 阿尔巴尼亚 一 分离主义 分子 的 时候 , " + "强力 反对 北约 组织 攻击 南斯拉夫 的 中共 , 会 全力 保护 米洛舍维奇 和 他 的 家人 及 亲信 。 " + "可是 从 9号 马可 被 送回 莫斯科 一 事 看 起来 , 中共 很 可能 会 放弃 米洛舍维奇 。"; args = new string[] { "-props", "edu/stanford/nlp/hcoref/properties/zh-coref-default.properties" }; Annotation document = new Annotation(text); Properties props = StringUtils.ArgsToProperties(args); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); pipeline.Annotate(document); System.Console.Out.WriteLine("---"); System.Console.Out.WriteLine("coref chains"); foreach (CorefChain cc in document.Get(typeof(CorefCoreAnnotations.CorefChainAnnotation)).Values) { System.Console.Out.WriteLine("\t" + cc); } foreach (ICoreMap sentence in document.Get(typeof(CoreAnnotations.SentencesAnnotation))) { System.Console.Out.WriteLine("---"); System.Console.Out.WriteLine("mentions"); foreach (Mention m in sentence.Get(typeof(CorefCoreAnnotations.CorefMentionsAnnotation))) { System.Console.Out.WriteLine("\t" + m); } } long endTime = Runtime.CurrentTimeMillis(); long time = (endTime - startTime) / 1000; System.Console.Out.WriteLine("Running time " + time / 60 + "min " + time % 60 + "s"); }
public static void Main(string[] args) { try { Properties props = StringUtils.ArgsToProperties(args); props.SetProperty("annotators", "tokenize,ssplit,lemma,pos,parse,ner"); StanfordCoreNLP pipeline = new StanfordCoreNLP(); string sentence = "Barack Obama lives in America. Obama works for the Federal Goverment."; Annotation doc = new Annotation(sentence); pipeline.Annotate(doc); Edu.Stanford.Nlp.Pipeline.RelationExtractorAnnotator r = new Edu.Stanford.Nlp.Pipeline.RelationExtractorAnnotator(props); r.Annotate(doc); foreach (ICoreMap s in doc.Get(typeof(CoreAnnotations.SentencesAnnotation))) { System.Console.Out.WriteLine("For sentence " + s.Get(typeof(CoreAnnotations.TextAnnotation))); IList <RelationMention> rls = s.Get(typeof(MachineReadingAnnotations.RelationMentionsAnnotation)); foreach (RelationMention rl in rls) { System.Console.Out.WriteLine(rl.ToString()); } } } catch (Exception e) { Sharpen.Runtime.PrintStackTrace(e); } }
/// <summary>This runs a simple train and test regime.</summary> /// <remarks> /// This runs a simple train and test regime. /// The data file format is one item per line, space separated, with first the class label /// and then a bunch of (categorical) string features. /// </remarks> /// <param name="args">The arguments/flags are: -trainFile trainFile -testFile testFile [-l1reg num] [-biased]</param> /// <exception cref="System.Exception"/> public static void Main(string[] args) { Properties prop = StringUtils.ArgsToProperties(args); double l1reg = double.ParseDouble(prop.GetProperty("l1reg", "0.0")); Dataset <string, string> ds = new Dataset <string, string>(); foreach (string line in ObjectBank.GetLineIterator(new File(prop.GetProperty("trainFile")))) { string[] bits = line.Split("\\s+"); ICollection <string> f = new LinkedList <string>(Arrays.AsList(bits).SubList(1, bits.Length)); string l = bits[0]; ds.Add(f, l); } ds.SummaryStatistics(); bool biased = prop.GetProperty("biased", "false").Equals("true"); LogisticClassifierFactory <string, string> factory = new LogisticClassifierFactory <string, string>(); Edu.Stanford.Nlp.Classify.LogisticClassifier <string, string> lc = factory.TrainClassifier(ds, l1reg, 1e-4, biased); foreach (string line_1 in ObjectBank.GetLineIterator(new File(prop.GetProperty("testFile")))) { string[] bits = line_1.Split("\\s+"); ICollection <string> f = new LinkedList <string>(Arrays.AsList(bits).SubList(1, bits.Length)); //String l = bits[0]; string g = lc.ClassOf(f); double prob = lc.ProbabilityOf(f, g); System.Console.Out.Printf("%4.3f\t%s\t%s%n", prob, g, line_1); } }
public static void Main(string[] args) { Edu.Stanford.Nlp.Trees.CoordinationTransformer transformer = new Edu.Stanford.Nlp.Trees.CoordinationTransformer(null); Treebank tb = new MemoryTreebank(); Properties props = StringUtils.ArgsToProperties(args); string treeFileName = props.GetProperty("treeFile"); if (treeFileName != null) { try { ITreeReader tr = new PennTreeReader(new BufferedReader(new InputStreamReader(new FileInputStream(treeFileName))), new LabeledScoredTreeFactory()); for (Tree t; (t = tr.ReadTree()) != null;) { tb.Add(t); } } catch (IOException e) { throw new Exception("File problem: " + e); } } foreach (Tree t_1 in tb) { System.Console.Out.WriteLine("Original tree"); t_1.PennPrint(); System.Console.Out.WriteLine(); System.Console.Out.WriteLine("Tree transformed"); Tree tree = transformer.TransformTree(t_1); tree.PennPrint(); System.Console.Out.WriteLine(); System.Console.Out.WriteLine("----------------------------"); } }
/// <exception cref="System.Exception"/> public static void Main(string[] args) { Properties props = StringUtils.ArgsToProperties(new string[] { "-props", args[0] }); Dictionaries dictionaries = new Dictionaries(props); CorefProperties.SetInput(props, CorefProperties.Dataset.Train); new MentionDetectionEvaluator().Run(props, dictionaries); }
public static void Main(string[] args) { if (args.Length < minArgs) { System.Console.Out.WriteLine(Usage()); System.Environment.Exit(-1); } Properties options = StringUtils.ArgsToProperties(args, ArgDefs()); Language language = PropertiesUtils.Get(options, "l", Language.English, typeof(Language)); ITreebankLangParserParams tlpp = language.@params; DiskTreebank tb = null; string encoding = options.GetProperty("l", "UTF-8"); bool removeBracket = PropertiesUtils.GetBool(options, "b", false); tlpp.SetInputEncoding(encoding); tlpp.SetOutputEncoding(encoding); tb = tlpp.DiskTreebank(); string[] files = options.GetProperty(string.Empty, string.Empty).Split("\\s+"); if (files.Length != 0) { foreach (string filename in files) { tb.LoadPath(filename); } } else { log.Info(Usage()); System.Environment.Exit(-1); } PrintWriter pwo = tlpp.Pw(); string startSymbol = tlpp.TreebankLanguagePack().StartSymbol(); ITreeFactory tf = new LabeledScoredTreeFactory(); int nTrees = 0; foreach (Tree t in tb) { if (removeBracket) { if (t.Value().Equals(startSymbol)) { t = t.FirstChild(); } } else { if (!t.Value().Equals(startSymbol)) { //Add a bracket if it isn't already there t = tf.NewTreeNode(startSymbol, Java.Util.Collections.SingletonList(t)); } } pwo.Println(t.ToString()); nTrees++; } pwo.Close(); System.Console.Error.Printf("Processed %d trees.%n", nTrees); }
/// <summary>A fast, rule-based tokenizer for Modern Standard Arabic (UTF-8 encoding).</summary> /// <remarks> /// A fast, rule-based tokenizer for Modern Standard Arabic (UTF-8 encoding). /// Performs punctuation splitting and light tokenization by default. /// Orthographic normalization options are available, and can be enabled with /// command line options. /// <p> /// Currently, this tokenizer does not do line splitting. It normalizes non-printing /// line separators across platforms and prints the system default line splitter /// to the output. /// <p> /// The following normalization options are provided: /// <ul> /// <li> /// <c>useUTF8Ellipsis</c> /// : Replaces sequences of three or more full stops with \u2026</li> /// <li> /// <c>normArDigits</c> /// : Convert Arabic digits to ASCII equivalents</li> /// <li> /// <c>normArPunc</c> /// : Convert Arabic punctuation to ASCII equivalents</li> /// <li> /// <c>normAlif</c> /// : Change all alif forms to bare alif</li> /// <li> /// <c>normYa</c> /// : Map ya to alif maqsura</li> /// <li> /// <c>removeDiacritics</c> /// : Strip all diacritics</li> /// <li> /// <c>removeTatweel</c> /// : Strip tatweel elongation character</li> /// <li> /// <c>removeQuranChars</c> /// : Remove diacritics that appear in the Quran</li> /// <li> /// <c>removeProMarker</c> /// : Remove the ATB null pronoun marker</li> /// <li> /// <c>removeSegMarker</c> /// : Remove the ATB clitic segmentation marker</li> /// <li> /// <c>removeMorphMarker</c> /// : Remove the ATB morpheme boundary markers</li> /// <li> /// <c>removeLengthening</c> /// : Replace all sequences of three or more identical (non-period) characters with one copy</li> /// <li> /// <c>atbEscaping</c> /// : Replace left/right parentheses with ATB escape characters</li> /// </ul> /// </remarks> /// <param name="args"/> public static void Main(string[] args) { if (args.Length > 0 && args[0].Contains("help")) { System.Console.Error.Printf("Usage: java %s [OPTIONS] < file%n", typeof(ArabicTokenizer).FullName); System.Console.Error.Printf("%nOptions:%n"); log.Info(" -help : Print this message. See javadocs for all normalization options."); log.Info(" -atb : Tokenization for the parsing experiments in Green and Manning (2010)"); System.Environment.Exit(-1); } // Process normalization options Properties tokenizerOptions = StringUtils.ArgsToProperties(args); ITokenizerFactory <CoreLabel> tf = tokenizerOptions.Contains("atb") ? ArabicTokenizer.AtbFactory() : ArabicTokenizer.Factory(); foreach (string option in tokenizerOptions.StringPropertyNames()) { tf.SetOptions(option); } // Replace line separators with a token so that we can // count lines tf.SetOptions("tokenizeNLs"); // Read the file int nLines = 0; int nTokens = 0; try { string encoding = "UTF-8"; ITokenizer <CoreLabel> tokenizer = tf.GetTokenizer(new InputStreamReader(Runtime.@in, encoding)); bool printSpace = false; while (tokenizer.MoveNext()) { ++nTokens; string word = tokenizer.Current.Word(); if (word.Equals(ArabicLexer.NewlineToken)) { ++nLines; printSpace = false; System.Console.Out.WriteLine(); } else { if (printSpace) { System.Console.Out.Write(" "); } System.Console.Out.Write(word); printSpace = true; } } } catch (UnsupportedEncodingException e) { Sharpen.Runtime.PrintStackTrace(e); } System.Console.Error.Printf("Done! Tokenized %d lines (%d tokens)%n", nLines, nTokens); }
/// <summary> /// Populate with the given command-line arguments all static /// <see cref="Option"/> /// -tagged fields in /// the given classes. /// </summary> /// <param name="classes"> /// The classes to populate static /// <see cref="Option"/> /// -tagged fields in. /// </param> /// <param name="args">The command-line arguments to use to fill in additional properties.</param> public static void FillOptions(Type[] classes, params string[] args) { Properties options = StringUtils.ArgsToProperties(args); //get options FillOptionsImpl(null, BootstrapClasses, options, false, true); //bootstrap FillOptionsImpl(null, classes, options); }
/// <exception cref="System.Exception"/> public static void Main(string[] args) { // Generate rules files Properties props = StringUtils.ArgsToProperties(args); Options options = new Options("qe", props); GeneratePrefixDefs(options.prefixFilename, options.prefixRulesFilename); GenerateUnitsStage0Rules(options.unitsFilename, options.text2UnitMapping, options.unitsRulesFilename); }
/// <summary>A debugging method to try entity linking sentences from the console.</summary> /// <exception cref="System.IO.IOException"/> public static void Main(string[] args) { Properties props = StringUtils.ArgsToProperties(args); props.SetProperty("annotators", "tokenize,ssplit,pos,lemma,ner,entitymentions,entitylink"); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); IOUtils.Console("sentence> ", null); }
/// <summary>A debugging method to try relation extraction from the console.</summary> /// <exception cref="System.IO.IOException">If any IO problem</exception> public static void Main(string[] args) { Properties props = StringUtils.ArgsToProperties(args); props.SetProperty("annotators", "tokenize,ssplit,pos,lemma,ner,regexner,parse,mention,coref,kbp"); props.SetProperty("regexner.mapping", "ignorecase=true,validpospattern=^(NN|JJ).*,edu/stanford/nlp/models/kbp/regexner_caseless.tab;edu/stanford/nlp/models/kbp/regexner_cased.tab"); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); IOUtils.Console("sentence> ", null); }
/// <exception cref="System.Exception"/> public static void Main(string[] args) { Properties props = StringUtils.ArgsToProperties("-props", args[0]); Dictionaries dictionaries = new Dictionaries(props); string outputPath = args[1]; ExportData(outputPath, CorefProperties.Dataset.Train, props, dictionaries); ExportData(outputPath, CorefProperties.Dataset.Dev, props, dictionaries); ExportData(outputPath, CorefProperties.Dataset.Test, props, dictionaries); }
/// <summary> /// Fill all non-static /// <see cref="Option"/> /// -tagged fields in the given set of objects with the given /// command-line arguments. /// </summary> /// <param name="instances"> /// The object instances containing /// <see cref="Option"/> /// -tagged fields which we should fill. /// </param> /// <param name="args">The command-line arguments to use to fill these fields.</param> public static void FillOptions(object[] instances, string[] args) { Properties options = StringUtils.ArgsToProperties(args); //get options FillOptionsImpl(null, BootstrapClasses, options, false, true); //bootstrap Type[] classes = Arrays.Stream(instances).Map(null).ToArray(null); FillOptionsImpl(instances, classes, options); }
private static Annotation TestAnnoation(string text, string[] args) { Annotation document = new Annotation(text); Properties props = StringUtils.ArgsToProperties(args); StanfordCoreNLP corenlp = new StanfordCoreNLP(props); corenlp.Annotate(document); Edu.Stanford.Nlp.Pipeline.HybridCorefAnnotator hcoref = new Edu.Stanford.Nlp.Pipeline.HybridCorefAnnotator(props); hcoref.Annotate(document); return(document); }
public static void Main(string[] args) { Properties @params = StringUtils.ArgsToProperties(args); if (@params.GetProperty("sentFile") != null) { log.Error("Parsing sentences to constituency trees is not supported for Chinese. " + "Please parse your sentences first and then convert them to dependency trees using the -treeFile option."); return; } GrammaticalStructureConversionUtils.ConvertTrees(args, "zh"); }
// simple testing code /// <exception cref="System.IO.IOException"/> public static void Main(string[] args) { Properties props = StringUtils.ArgsToProperties(args); Edu.Stanford.Nlp.IE.Machinereading.Domains.Ace.AceReader r = new Edu.Stanford.Nlp.IE.Machinereading.Domains.Ace.AceReader(new StanfordCoreNLP(props, false), false); r.SetLoggerLevel(Level.Info); r.Parse("/u/scr/nlp/data/ACE2005/"); // Annotation a = r.parse("/user/mengqiu/scr/twitter/nlp/corpus_prep/standalone/ar/data"); // BasicEntityExtractor.saveCoNLLFiles("/tmp/conll", a, false, false); log.Info("done"); }
/// <exception cref="System.Exception"/> public static void Main(string[] args) { // just a simple test, to make sure stuff works Properties props = StringUtils.ArgsToProperties(args); Edu.Stanford.Nlp.IE.Machinereading.Domains.Roth.RothCONLL04Reader reader = new Edu.Stanford.Nlp.IE.Machinereading.Domains.Roth.RothCONLL04Reader(); reader.SetLoggerLevel(Level.Info); reader.SetProcessor(new StanfordCoreNLP(props)); Annotation doc = reader.Parse("/u/nlp/data/RothCONLL04/conll04.corp"); System.Console.Out.WriteLine(AnnotationUtils.DatasetToString(doc)); }
public static void Main(string[] args) { StringUtils.LogInvocationString(log, args); Properties props = StringUtils.ArgsToProperties(args); string boundary = props.GetProperty("b", "-X-"); string delimiter = props.GetProperty("d", "\t"); string defaultPosTag = props.GetProperty("t", "I"); bool raw = bool.ValueOf(props.GetProperty("r", "false")); bool ignoreProvidedTag = bool.ValueOf(props.GetProperty("ignoreProvidedTag", "false")); string format = props.GetProperty("format", "conll"); string filename = props.GetProperty("i"); string backgroundLabel = props.GetProperty("k", "O"); try { MultiClassPrecisionRecallExtendedStats stats; if (raw) { stats = new MultiClassPrecisionRecallExtendedStats.MultiClassStringLabelStats(backgroundLabel); } else { Edu.Stanford.Nlp.Stats.MultiClassChunkEvalStats mstats = new Edu.Stanford.Nlp.Stats.MultiClassChunkEvalStats(backgroundLabel); mstats.GetChunker().SetDefaultPosTag(defaultPosTag); mstats.GetChunker().SetIgnoreProvidedTag(ignoreProvidedTag); stats = mstats; } if (filename != null) { stats.Score(filename, delimiter, boundary); } else { stats.Score(new BufferedReader(new InputStreamReader(Runtime.@in)), delimiter, boundary); } if (Sharpen.Runtime.EqualsIgnoreCase("conll", format)) { System.Console.Out.WriteLine(stats.GetConllEvalString()); } else { System.Console.Out.WriteLine(stats.GetDescription(6)); } } catch (IOException ex) { log.Info("Error processing file: " + ex.ToString()); Sharpen.Runtime.PrintStackTrace(ex, System.Console.Error); } }
/// <param name="args"/> public static void Main(string[] args) { Properties options = StringUtils.ArgsToProperties(args, argOptionDefs); if (!options.Contains(string.Empty) || options.Contains("help")) { log.Info(Usage()); return; } bool retainNER = PropertiesUtils.GetBool(options, "ner", false); bool normalize = PropertiesUtils.GetBool(options, "normalize", true); File treeFile = new File(options.GetProperty(string.Empty)); TwoDimensionalCounter <string, string> labelTerm = new TwoDimensionalCounter <string, string>(); TwoDimensionalCounter <string, string> termLabel = new TwoDimensionalCounter <string, string>(); TwoDimensionalCounter <string, string> labelPreterm = new TwoDimensionalCounter <string, string>(); TwoDimensionalCounter <string, string> pretermLabel = new TwoDimensionalCounter <string, string>(); TwoDimensionalCounter <string, string> unigramTagger = new TwoDimensionalCounter <string, string>(); try { BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), "UTF-8")); ITreeReaderFactory trf = new SpanishTreeReaderFactory(); ITreeReader tr = trf.NewTreeReader(br); for (Tree t; (t = tr.ReadTree()) != null;) { UpdateTagger(unigramTagger, t); } tr.Close(); //Closes the underlying reader System.Console.Out.WriteLine("Resolving DUMMY tags"); ResolveDummyTags(treeFile, unigramTagger, retainNER, normalize ? new SpanishTreeNormalizer(true, false, false) : null); System.Console.Out.WriteLine("#Unknown Word Types: " + MultiWordPreprocessor.ManualUWModel.nUnknownWordTypes); System.Console.Out.WriteLine(string.Format("#Missing POS: %d (fixed: %d, %.2f%%)", nMissingPOS, nFixedPOS, (double)nFixedPOS / nMissingPOS * 100)); System.Console.Out.WriteLine(string.Format("#Missing Phrasal: %d (fixed: %d, %.2f%%)", nMissingPhrasal, nFixedPhrasal, (double)nFixedPhrasal / nMissingPhrasal * 100)); System.Console.Out.WriteLine("Done!"); } catch (UnsupportedEncodingException e) { Sharpen.Runtime.PrintStackTrace(e); } catch (FileNotFoundException e) { Sharpen.Runtime.PrintStackTrace(e); } catch (IOException e) { Sharpen.Runtime.PrintStackTrace(e); } }
// end static class NERClient /// <summary>Starts this server on the specified port.</summary> /// <remarks> /// Starts this server on the specified port. The classifier used can be /// either a default one stored in the jar file from which this code is /// invoked or you can specify it as a filename or as another classifier /// resource name, which must correspond to the name of a resource in the /// /classifiers/ directory of the jar file. /// <p> /// Usage: <code>java edu.stanford.nlp.tagger.maxent.MaxentTaggerServer [-model file|-client] -port portNumber [other MaxentTagger options]</code> /// </remarks> /// <param name="args">Command-line arguments (described above)</param> /// <exception cref="System.Exception">If file or Java class problems with serialized classifier</exception> public static void Main(string[] args) { if (args.Length == 0) { log.Info(Usage); return; } // Use both Properties and TaggerConfig. It's okay. Properties props = StringUtils.ArgsToProperties(args); string client = props.GetProperty("client"); string portStr = props.GetProperty("port"); if (portStr == null || portStr.Equals(string.Empty)) { log.Info(Usage); return; } int port = 0; try { port = System.Convert.ToInt32(portStr); } catch (NumberFormatException) { log.Info("Non-numerical port"); log.Info(Usage); System.Environment.Exit(1); } if (client != null && !client.Equals(string.Empty)) { // run a test client for illustration/testing string host = props.GetProperty("host"); string encoding = props.GetProperty("encoding"); if (encoding == null || string.Empty.Equals(encoding)) { encoding = "utf-8"; } MaxentTaggerServer.TaggerClient.CommunicateWithMaxentTaggerServer(host, port, encoding); } else { TaggerConfig config = new TaggerConfig(args); MaxentTagger tagger = new MaxentTagger(config.GetModel(), config); // initializes tagger MaxentTagger.TaggerWrapper wrapper = new MaxentTagger.TaggerWrapper(tagger); new MaxentTaggerServer(port, wrapper, config.GetEncoding()).Run(); } }
public virtual void TestArgsToProperties() { Properties p1 = new Properties(); p1.SetProperty("fred", "-2"); p1.SetProperty(string.Empty, "joe"); Properties p2 = new Properties(); p2.SetProperty("fred", "true"); p2.SetProperty("2", "joe"); IDictionary <string, int> argNums = new Dictionary <string, int>(); argNums["fred"] = 1; NUnit.Framework.Assert.AreEqual(p2, StringUtils.ArgsToProperties("-fred", "-2", "joe")); NUnit.Framework.Assert.AreEqual(StringUtils.ArgsToProperties(new string[] { "-fred", "-2", "joe" }, argNums), p1); }
/// <exception cref="System.IO.IOException"/> public static void Main(string[] args) { Properties p = StringUtils.ArgsToProperties(args); if (p.Contains("input")) { FileInputStream fis = new FileInputStream(p.GetProperty("input")); InputStreamReader isr = new InputStreamReader(fis, "UTF-8"); BufferedReader reader = new BufferedReader(isr); string thisLine; while ((thisLine = reader.ReadLine()) != null) { EncodingPrintWriter.Out.Println(Normalize(thisLine), "UTF-8"); } } }