Exemple #1
0
        /// <summary>A fast, rule-based tokenizer for Modern Standard French.</summary>
        /// <remarks>
        /// A fast, rule-based tokenizer for Modern Standard French.
        /// Performs punctuation splitting and light tokenization by default.
        /// <p>
        /// Currently, this tokenizer does not do line splitting. It assumes that the input
        /// file is delimited by the system line separator. The output will be equivalently
        /// delimited.
        /// </remarks>
        /// <param name="args"/>
        public static void Main(string[] args)
        {
            Properties options = StringUtils.ArgsToProperties(args, ArgOptionDefs());

            if (options.Contains("help"))
            {
                log.Info(Usage());
                return;
            }
            // Lexer options
            ITokenizerFactory <CoreLabel> tf = options.Contains("ftb") ? FrenchTokenizer.FtbFactory() : FrenchTokenizer.Factory();
            string orthoOptions = options.GetProperty("options", string.Empty);

            // When called from this main method, split on newline. No options for
            // more granular sentence splitting.
            orthoOptions = orthoOptions.IsEmpty() ? "tokenizeNLs" : orthoOptions + ",tokenizeNLs";
            tf.SetOptions(orthoOptions);
            // Other options
            string encoding = options.GetProperty("encoding", "UTF-8");
            bool   toLower  = PropertiesUtils.GetBool(options, "lowerCase", false);
            // Read the file from stdin
            int  nLines    = 0;
            int  nTokens   = 0;
            long startTime = Runtime.NanoTime();

            try
            {
                ITokenizer <CoreLabel> tokenizer = tf.GetTokenizer(new InputStreamReader(Runtime.@in, encoding));
                bool printSpace = false;
                while (tokenizer.MoveNext())
                {
                    ++nTokens;
                    string word = tokenizer.Current.Word();
                    if (word.Equals(FrenchLexer.NewlineToken))
                    {
                        ++nLines;
                        printSpace = false;
                        System.Console.Out.WriteLine();
                    }
                    else
                    {
                        if (printSpace)
                        {
                            System.Console.Out.Write(" ");
                        }
                        string outputToken = toLower ? word.ToLower(Locale.French) : word;
                        System.Console.Out.Write(outputToken);
                        printSpace = true;
                    }
                }
            }
            catch (UnsupportedEncodingException e)
            {
                log.Error(e);
            }
            long   elapsedTime = Runtime.NanoTime() - startTime;
            double linesPerSec = (double)nLines / (elapsedTime / 1e9);

            System.Console.Error.Printf("Done! Tokenized %d lines (%d tokens) at %.2f lines/sec%n", nLines, nTokens, linesPerSec);
        }
        /// <exception cref="System.Exception"/>
        public static void Main(string[] args)
        {
            Runtime.SetOut(new TextWriter(System.Console.Out, true, "UTF-8"));
            Runtime.SetErr(new TextWriter(System.Console.Error, true, "UTF-8"));
            Properties config = StringUtils.ArgsToProperties(args);

            CheckArgs(config);
            Edu.Stanford.Nlp.Tagger.Util.CountClosedTags cct = new Edu.Stanford.Nlp.Tagger.Util.CountClosedTags(config);
            string trainFiles = config.GetProperty(TrainFileProperty);
            string testFiles  = config.GetProperty(TestFileProperty);
            IList <TaggedFileRecord> files = TaggedFileRecord.CreateRecords(config, trainFiles);

            foreach (TaggedFileRecord file in files)
            {
                cct.CountTrainingTags(file);
            }
            if (testFiles != null)
            {
                files = TaggedFileRecord.CreateRecords(config, testFiles);
                foreach (TaggedFileRecord file_1 in files)
                {
                    cct.CountTestTags(file_1);
                }
            }
            cct.Report();
        }
        /// <summary>Some basic testing of the ClassifierCombiner.</summary>
        /// <param name="args">Command-line arguments as properties: -loadClassifier1 serializedFile -loadClassifier2 serializedFile</param>
        /// <exception cref="System.Exception">If IO or serialization error loading classifiers</exception>
        public static void Main(string[] args)
        {
            Properties props = StringUtils.ArgsToProperties(args);

            Edu.Stanford.Nlp.IE.ClassifierCombiner ec = new Edu.Stanford.Nlp.IE.ClassifierCombiner(props);
            log.Info(ec.ClassifyToString("Marketing : Sony Hopes to Win Much Bigger Market For Wide Range of Small-Video Products --- By Andrew B. Cohen Staff Reporter of The Wall Street Journal"));
        }
        /// <exception cref="System.Exception"/>
        public static void Main(string[] args)
        {
            StringUtils.LogInvocationString(log, args);
            Properties props = StringUtils.ArgsToProperties(args);
            CRFClassifier <CoreLabel> crf = new CRFClassifier <CoreLabel>(props);
            string inputFile = crf.flags.trainFile;

            if (inputFile == null)
            {
                log.Info("Please provide input file using -trainFile");
                System.Environment.Exit(-1);
            }
            string outputFile = crf.flags.exportFeatures;

            if (outputFile == null)
            {
                log.Info("Please provide output file using -exportFeatures");
                System.Environment.Exit(-1);
            }
            Edu.Stanford.Nlp.IE.Crf.CRFFeatureExporter <CoreLabel> featureExporter = new Edu.Stanford.Nlp.IE.Crf.CRFFeatureExporter <CoreLabel>(crf);
            ICollection <IList <CoreLabel> > docs = crf.MakeObjectBankFromFile(inputFile, crf.MakeReaderAndWriter());

            crf.MakeAnswerArraysAndTagIndex(docs);
            featureExporter.PrintFeatures(outputFile, docs);
        }
        /// <exception cref="System.IO.IOException"/>
        public static void Main(string[] args)
        {
            if (args.Length < 1)
            {
                log.Info(usage);
                System.Environment.Exit(1);
            }
            Properties options    = StringUtils.ArgsToProperties(args, argOptionDefs);
            string     outputPath = options.GetProperty("o");

            if (outputPath == null)
            {
                throw new ArgumentException("-o argument (output path for built tagger) is required");
            }
            string[]     remainingArgs = options.GetProperty(string.Empty).Split(" ");
            IList <File> fileList      = new List <File>();

            foreach (string arg in remainingArgs)
            {
                fileList.Add(new File(arg));
            }
            Edu.Stanford.Nlp.International.Spanish.Pipeline.AnCoraPOSStats stats = new Edu.Stanford.Nlp.International.Spanish.Pipeline.AnCoraPOSStats(fileList, outputPath);
            stats.Process();
            ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(outputPath));
            TwoDimensionalCounter <string, string> tagger = stats.GetUnigramTagger();

            oos.WriteObject(tagger);
            System.Console.Out.Printf("Wrote tagger to %s%n", outputPath);
        }
        /// <exception cref="System.Exception"/>
        public static void Main(string[] args)
        {
            Properties props = StringUtils.ArgsToProperties(args);

            Edu.Stanford.Nlp.Coref.CorefSystem coref = new Edu.Stanford.Nlp.Coref.CorefSystem(props);
            coref.RunOnConll(props);
        }
        /// <exception cref="System.Exception"/>
        public static void Main(string[] args)
        {
            Properties      props    = StringUtils.ArgsToProperties(args);
            StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
            string          file     = props.GetProperty("file");
            string          loadFile = props.GetProperty("loadFile");

            if (loadFile != null && !loadFile.IsEmpty())
            {
                Edu.Stanford.Nlp.Pipeline.CustomAnnotationSerializer ser = new Edu.Stanford.Nlp.Pipeline.CustomAnnotationSerializer(false, false);
                InputStream @is = new FileInputStream(loadFile);
                Pair <Annotation, InputStream> pair = ser.Read(@is);
                pair.second.Close();
                Annotation anno = pair.first;
                System.Console.Out.WriteLine(anno.ToShorterString(StringUtils.EmptyStringArray));
                @is.Close();
            }
            else
            {
                if (file != null && !file.Equals(string.Empty))
                {
                    string     text = IOUtils.SlurpFile(file);
                    Annotation doc  = new Annotation(text);
                    pipeline.Annotate(doc);
                    Edu.Stanford.Nlp.Pipeline.CustomAnnotationSerializer ser = new Edu.Stanford.Nlp.Pipeline.CustomAnnotationSerializer(false, false);
                    TextWriter os = new TextWriter(new FileOutputStream(file + ".ser"));
                    ser.Write(doc, os).Close();
                    log.Info("Serialized annotation saved in " + file + ".ser");
                }
                else
                {
                    log.Info("usage: CustomAnnotationSerializer [-file file] [-loadFile file]");
                }
            }
        }
        /// <exception cref="System.Exception"/>
        public static void Main(string[] args)
        {
            Properties props;

            if (args.Length > 0)
            {
                props = StringUtils.ArgsToProperties(args);
            }
            else
            {
                props = new Properties();
            }
            if (!props.Contains("dcoref.conll2011"))
            {
                log.Info("-dcoref.conll2011 [input_CoNLL_corpus]: was not specified");
                return;
            }
            if (!props.Contains("singleton.predictor.output"))
            {
                log.Info("-singleton.predictor.output [output_model_file]: was not specified");
                return;
            }
            GeneralDataset <string, string>     data       = Edu.Stanford.Nlp.Coref.Misc.SingletonPredictor.GenerateFeatureVectors(props);
            LogisticClassifier <string, string> classifier = Edu.Stanford.Nlp.Coref.Misc.SingletonPredictor.Train(data);

            Edu.Stanford.Nlp.Coref.Misc.SingletonPredictor.SaveToSerialized(classifier, GetPathSingletonPredictor(props));
        }
        /// <exception cref="System.Exception"/>
        public static void Main(string[] args)
        {
            long   startTime = Runtime.CurrentTimeMillis();
            string text      = "俄罗斯 航空 公司 一 名 官员 在 9号 说 , " + "米洛舍维奇 的 儿子 马可·米洛舍维奇 9号 早上 持 外交 护照 从 俄国 首都 莫斯科 搭机 飞往 中国 大陆 北京 , " + "可是 就 在 稍后 就 返回 莫斯科 。 " + "这 名 俄国 航空 公司 官员 说 马可 是 因为 护照 问题 而 在 北京 机场 被 中共 遣返 莫斯科 。 " + "北京 机场 方面 的 这 项 举动 清楚 显示 中共 有意 放弃 在 总统 大选 落败 的 前 南斯拉夫 总统 米洛舍维奇 , "
                               + "因此 他 在 南斯拉夫 受到 民众 厌恶 的 儿子 马可 才 会 在 北京 机场 被 中共 当局 送回 莫斯科 。 " + "马可 持 外交 护照 能够 顺利 搭机 离开 莫斯科 , 但是 却 在 北京 受阻 , 可 算是 踢到 了 铁板 。 " + "可是 这 项 消息 和 先前 外界 谣传 中共 当局 准备 提供 米洛舍维奇 和 他 的 家人 安全 庇护所 有 着 很 大 的 出入 ," + " 一般 认为 在 去年 米洛舍维奇 挥兵 攻打 科索沃 境内 阿尔巴尼亚 一 分离主义 分子 的 时候 , "
                               + "强力 反对 北约 组织 攻击 南斯拉夫 的 中共 , 会 全力 保护 米洛舍维奇 和 他 的 家人 及 亲信 。 " + "可是 从 9号 马可 被 送回 莫斯科 一 事 看 起来 , 中共 很 可能 会 放弃 米洛舍维奇 。";

            args = new string[] { "-props", "edu/stanford/nlp/hcoref/properties/zh-coref-default.properties" };
            Annotation      document = new Annotation(text);
            Properties      props    = StringUtils.ArgsToProperties(args);
            StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

            pipeline.Annotate(document);
            System.Console.Out.WriteLine("---");
            System.Console.Out.WriteLine("coref chains");
            foreach (CorefChain cc in document.Get(typeof(CorefCoreAnnotations.CorefChainAnnotation)).Values)
            {
                System.Console.Out.WriteLine("\t" + cc);
            }
            foreach (ICoreMap sentence in document.Get(typeof(CoreAnnotations.SentencesAnnotation)))
            {
                System.Console.Out.WriteLine("---");
                System.Console.Out.WriteLine("mentions");
                foreach (Mention m in sentence.Get(typeof(CorefCoreAnnotations.CorefMentionsAnnotation)))
                {
                    System.Console.Out.WriteLine("\t" + m);
                }
            }
            long endTime = Runtime.CurrentTimeMillis();
            long time    = (endTime - startTime) / 1000;

            System.Console.Out.WriteLine("Running time " + time / 60 + "min " + time % 60 + "s");
        }
 public static void Main(string[] args)
 {
     try
     {
         Properties props = StringUtils.ArgsToProperties(args);
         props.SetProperty("annotators", "tokenize,ssplit,lemma,pos,parse,ner");
         StanfordCoreNLP pipeline = new StanfordCoreNLP();
         string          sentence = "Barack Obama lives in America. Obama works for the Federal Goverment.";
         Annotation      doc      = new Annotation(sentence);
         pipeline.Annotate(doc);
         Edu.Stanford.Nlp.Pipeline.RelationExtractorAnnotator r = new Edu.Stanford.Nlp.Pipeline.RelationExtractorAnnotator(props);
         r.Annotate(doc);
         foreach (ICoreMap s in doc.Get(typeof(CoreAnnotations.SentencesAnnotation)))
         {
             System.Console.Out.WriteLine("For sentence " + s.Get(typeof(CoreAnnotations.TextAnnotation)));
             IList <RelationMention> rls = s.Get(typeof(MachineReadingAnnotations.RelationMentionsAnnotation));
             foreach (RelationMention rl in rls)
             {
                 System.Console.Out.WriteLine(rl.ToString());
             }
         }
     }
     catch (Exception e)
     {
         Sharpen.Runtime.PrintStackTrace(e);
     }
 }
Exemple #11
0
        /// <summary>This runs a simple train and test regime.</summary>
        /// <remarks>
        /// This runs a simple train and test regime.
        /// The data file format is one item per line, space separated, with first the class label
        /// and then a bunch of (categorical) string features.
        /// </remarks>
        /// <param name="args">The arguments/flags are: -trainFile trainFile -testFile testFile [-l1reg num] [-biased]</param>
        /// <exception cref="System.Exception"/>
        public static void Main(string[] args)
        {
            Properties prop             = StringUtils.ArgsToProperties(args);
            double     l1reg            = double.ParseDouble(prop.GetProperty("l1reg", "0.0"));
            Dataset <string, string> ds = new Dataset <string, string>();

            foreach (string line in ObjectBank.GetLineIterator(new File(prop.GetProperty("trainFile"))))
            {
                string[]             bits = line.Split("\\s+");
                ICollection <string> f    = new LinkedList <string>(Arrays.AsList(bits).SubList(1, bits.Length));
                string l = bits[0];
                ds.Add(f, l);
            }
            ds.SummaryStatistics();
            bool biased = prop.GetProperty("biased", "false").Equals("true");
            LogisticClassifierFactory <string, string> factory = new LogisticClassifierFactory <string, string>();

            Edu.Stanford.Nlp.Classify.LogisticClassifier <string, string> lc = factory.TrainClassifier(ds, l1reg, 1e-4, biased);
            foreach (string line_1 in ObjectBank.GetLineIterator(new File(prop.GetProperty("testFile"))))
            {
                string[]             bits = line_1.Split("\\s+");
                ICollection <string> f    = new LinkedList <string>(Arrays.AsList(bits).SubList(1, bits.Length));
                //String l = bits[0];
                string g    = lc.ClassOf(f);
                double prob = lc.ProbabilityOf(f, g);
                System.Console.Out.Printf("%4.3f\t%s\t%s%n", prob, g, line_1);
            }
        }
        public static void Main(string[] args)
        {
            Edu.Stanford.Nlp.Trees.CoordinationTransformer transformer = new Edu.Stanford.Nlp.Trees.CoordinationTransformer(null);
            Treebank   tb           = new MemoryTreebank();
            Properties props        = StringUtils.ArgsToProperties(args);
            string     treeFileName = props.GetProperty("treeFile");

            if (treeFileName != null)
            {
                try
                {
                    ITreeReader tr = new PennTreeReader(new BufferedReader(new InputStreamReader(new FileInputStream(treeFileName))), new LabeledScoredTreeFactory());
                    for (Tree t; (t = tr.ReadTree()) != null;)
                    {
                        tb.Add(t);
                    }
                }
                catch (IOException e)
                {
                    throw new Exception("File problem: " + e);
                }
            }
            foreach (Tree t_1 in tb)
            {
                System.Console.Out.WriteLine("Original tree");
                t_1.PennPrint();
                System.Console.Out.WriteLine();
                System.Console.Out.WriteLine("Tree transformed");
                Tree tree = transformer.TransformTree(t_1);
                tree.PennPrint();
                System.Console.Out.WriteLine();
                System.Console.Out.WriteLine("----------------------------");
            }
        }
        /// <exception cref="System.Exception"/>
        public static void Main(string[] args)
        {
            Properties   props        = StringUtils.ArgsToProperties(new string[] { "-props", args[0] });
            Dictionaries dictionaries = new Dictionaries(props);

            CorefProperties.SetInput(props, CorefProperties.Dataset.Train);
            new MentionDetectionEvaluator().Run(props, dictionaries);
        }
        public static void Main(string[] args)
        {
            if (args.Length < minArgs)
            {
                System.Console.Out.WriteLine(Usage());
                System.Environment.Exit(-1);
            }
            Properties options             = StringUtils.ArgsToProperties(args, ArgDefs());
            Language   language            = PropertiesUtils.Get(options, "l", Language.English, typeof(Language));
            ITreebankLangParserParams tlpp = language.@params;
            DiskTreebank tb            = null;
            string       encoding      = options.GetProperty("l", "UTF-8");
            bool         removeBracket = PropertiesUtils.GetBool(options, "b", false);

            tlpp.SetInputEncoding(encoding);
            tlpp.SetOutputEncoding(encoding);
            tb = tlpp.DiskTreebank();
            string[] files = options.GetProperty(string.Empty, string.Empty).Split("\\s+");
            if (files.Length != 0)
            {
                foreach (string filename in files)
                {
                    tb.LoadPath(filename);
                }
            }
            else
            {
                log.Info(Usage());
                System.Environment.Exit(-1);
            }
            PrintWriter  pwo         = tlpp.Pw();
            string       startSymbol = tlpp.TreebankLanguagePack().StartSymbol();
            ITreeFactory tf          = new LabeledScoredTreeFactory();
            int          nTrees      = 0;

            foreach (Tree t in tb)
            {
                if (removeBracket)
                {
                    if (t.Value().Equals(startSymbol))
                    {
                        t = t.FirstChild();
                    }
                }
                else
                {
                    if (!t.Value().Equals(startSymbol))
                    {
                        //Add a bracket if it isn't already there
                        t = tf.NewTreeNode(startSymbol, Java.Util.Collections.SingletonList(t));
                    }
                }
                pwo.Println(t.ToString());
                nTrees++;
            }
            pwo.Close();
            System.Console.Error.Printf("Processed %d trees.%n", nTrees);
        }
        /// <summary>A fast, rule-based tokenizer for Modern Standard Arabic (UTF-8 encoding).</summary>
        /// <remarks>
        /// A fast, rule-based tokenizer for Modern Standard Arabic (UTF-8 encoding).
        /// Performs punctuation splitting and light tokenization by default.
        /// Orthographic normalization options are available, and can be enabled with
        /// command line options.
        /// <p>
        /// Currently, this tokenizer does not do line splitting. It normalizes non-printing
        /// line separators across platforms and prints the system default line splitter
        /// to the output.
        /// <p>
        /// The following normalization options are provided:
        /// <ul>
        /// <li>
        /// <c>useUTF8Ellipsis</c>
        /// : Replaces sequences of three or more full stops with \u2026</li>
        /// <li>
        /// <c>normArDigits</c>
        /// : Convert Arabic digits to ASCII equivalents</li>
        /// <li>
        /// <c>normArPunc</c>
        /// : Convert Arabic punctuation to ASCII equivalents</li>
        /// <li>
        /// <c>normAlif</c>
        /// : Change all alif forms to bare alif</li>
        /// <li>
        /// <c>normYa</c>
        /// : Map ya to alif maqsura</li>
        /// <li>
        /// <c>removeDiacritics</c>
        /// : Strip all diacritics</li>
        /// <li>
        /// <c>removeTatweel</c>
        /// : Strip tatweel elongation character</li>
        /// <li>
        /// <c>removeQuranChars</c>
        /// : Remove diacritics that appear in the Quran</li>
        /// <li>
        /// <c>removeProMarker</c>
        /// : Remove the ATB null pronoun marker</li>
        /// <li>
        /// <c>removeSegMarker</c>
        /// : Remove the ATB clitic segmentation marker</li>
        /// <li>
        /// <c>removeMorphMarker</c>
        /// : Remove the ATB morpheme boundary markers</li>
        /// <li>
        /// <c>removeLengthening</c>
        /// : Replace all sequences of three or more identical (non-period) characters with one copy</li>
        /// <li>
        /// <c>atbEscaping</c>
        /// : Replace left/right parentheses with ATB escape characters</li>
        /// </ul>
        /// </remarks>
        /// <param name="args"/>
        public static void Main(string[] args)
        {
            if (args.Length > 0 && args[0].Contains("help"))
            {
                System.Console.Error.Printf("Usage: java %s [OPTIONS] < file%n", typeof(ArabicTokenizer).FullName);
                System.Console.Error.Printf("%nOptions:%n");
                log.Info("   -help : Print this message. See javadocs for all normalization options.");
                log.Info("   -atb  : Tokenization for the parsing experiments in Green and Manning (2010)");
                System.Environment.Exit(-1);
            }
            // Process normalization options
            Properties tokenizerOptions      = StringUtils.ArgsToProperties(args);
            ITokenizerFactory <CoreLabel> tf = tokenizerOptions.Contains("atb") ? ArabicTokenizer.AtbFactory() : ArabicTokenizer.Factory();

            foreach (string option in tokenizerOptions.StringPropertyNames())
            {
                tf.SetOptions(option);
            }
            // Replace line separators with a token so that we can
            // count lines
            tf.SetOptions("tokenizeNLs");
            // Read the file
            int nLines  = 0;
            int nTokens = 0;

            try
            {
                string encoding = "UTF-8";
                ITokenizer <CoreLabel> tokenizer = tf.GetTokenizer(new InputStreamReader(Runtime.@in, encoding));
                bool printSpace = false;
                while (tokenizer.MoveNext())
                {
                    ++nTokens;
                    string word = tokenizer.Current.Word();
                    if (word.Equals(ArabicLexer.NewlineToken))
                    {
                        ++nLines;
                        printSpace = false;
                        System.Console.Out.WriteLine();
                    }
                    else
                    {
                        if (printSpace)
                        {
                            System.Console.Out.Write(" ");
                        }
                        System.Console.Out.Write(word);
                        printSpace = true;
                    }
                }
            }
            catch (UnsupportedEncodingException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            System.Console.Error.Printf("Done! Tokenized %d lines (%d tokens)%n", nLines, nTokens);
        }
Exemple #16
0
        /// <summary>
        /// Populate with the given command-line arguments all static
        /// <see cref="Option"/>
        /// -tagged fields in
        /// the given classes.
        /// </summary>
        /// <param name="classes">
        /// The classes to populate static
        /// <see cref="Option"/>
        /// -tagged fields in.
        /// </param>
        /// <param name="args">The command-line arguments to use to fill in additional properties.</param>
        public static void FillOptions(Type[] classes, params string[] args)
        {
            Properties options = StringUtils.ArgsToProperties(args);

            //get options
            FillOptionsImpl(null, BootstrapClasses, options, false, true);
            //bootstrap
            FillOptionsImpl(null, classes, options);
        }
        /// <exception cref="System.Exception"/>
        public static void Main(string[] args)
        {
            // Generate rules files
            Properties props   = StringUtils.ArgsToProperties(args);
            Options    options = new Options("qe", props);

            GeneratePrefixDefs(options.prefixFilename, options.prefixRulesFilename);
            GenerateUnitsStage0Rules(options.unitsFilename, options.text2UnitMapping, options.unitsRulesFilename);
        }
Exemple #18
0
        /// <summary>A debugging method to try entity linking sentences from the console.</summary>
        /// <exception cref="System.IO.IOException"/>
        public static void Main(string[] args)
        {
            Properties props = StringUtils.ArgsToProperties(args);

            props.SetProperty("annotators", "tokenize,ssplit,pos,lemma,ner,entitymentions,entitylink");
            StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

            IOUtils.Console("sentence> ", null);
        }
        /// <summary>A debugging method to try relation extraction from the console.</summary>
        /// <exception cref="System.IO.IOException">If any IO problem</exception>
        public static void Main(string[] args)
        {
            Properties props = StringUtils.ArgsToProperties(args);

            props.SetProperty("annotators", "tokenize,ssplit,pos,lemma,ner,regexner,parse,mention,coref,kbp");
            props.SetProperty("regexner.mapping", "ignorecase=true,validpospattern=^(NN|JJ).*,edu/stanford/nlp/models/kbp/regexner_caseless.tab;edu/stanford/nlp/models/kbp/regexner_cased.tab");
            StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

            IOUtils.Console("sentence> ", null);
        }
        /// <exception cref="System.Exception"/>
        public static void Main(string[] args)
        {
            Properties   props        = StringUtils.ArgsToProperties("-props", args[0]);
            Dictionaries dictionaries = new Dictionaries(props);
            string       outputPath   = args[1];

            ExportData(outputPath, CorefProperties.Dataset.Train, props, dictionaries);
            ExportData(outputPath, CorefProperties.Dataset.Dev, props, dictionaries);
            ExportData(outputPath, CorefProperties.Dataset.Test, props, dictionaries);
        }
Exemple #21
0
        /// <summary>
        /// Fill all non-static
        /// <see cref="Option"/>
        /// -tagged fields in the given set of objects with the given
        /// command-line arguments.
        /// </summary>
        /// <param name="instances">
        /// The object instances containing
        /// <see cref="Option"/>
        /// -tagged fields which we should fill.
        /// </param>
        /// <param name="args">The command-line arguments to use to fill these fields.</param>
        public static void FillOptions(object[] instances, string[] args)
        {
            Properties options = StringUtils.ArgsToProperties(args);

            //get options
            FillOptionsImpl(null, BootstrapClasses, options, false, true);
            //bootstrap
            Type[] classes = Arrays.Stream(instances).Map(null).ToArray(null);
            FillOptionsImpl(instances, classes, options);
        }
Exemple #22
0
        private static Annotation TestAnnoation(string text, string[] args)
        {
            Annotation      document = new Annotation(text);
            Properties      props    = StringUtils.ArgsToProperties(args);
            StanfordCoreNLP corenlp  = new StanfordCoreNLP(props);

            corenlp.Annotate(document);
            Edu.Stanford.Nlp.Pipeline.HybridCorefAnnotator hcoref = new Edu.Stanford.Nlp.Pipeline.HybridCorefAnnotator(props);
            hcoref.Annotate(document);
            return(document);
        }
        public static void Main(string[] args)
        {
            Properties @params = StringUtils.ArgsToProperties(args);

            if (@params.GetProperty("sentFile") != null)
            {
                log.Error("Parsing sentences to constituency trees is not supported for Chinese. " + "Please parse your sentences first and then convert them to dependency trees using the -treeFile option.");
                return;
            }
            GrammaticalStructureConversionUtils.ConvertTrees(args, "zh");
        }
Exemple #24
0
        // simple testing code
        /// <exception cref="System.IO.IOException"/>
        public static void Main(string[] args)
        {
            Properties props = StringUtils.ArgsToProperties(args);

            Edu.Stanford.Nlp.IE.Machinereading.Domains.Ace.AceReader r = new Edu.Stanford.Nlp.IE.Machinereading.Domains.Ace.AceReader(new StanfordCoreNLP(props, false), false);
            r.SetLoggerLevel(Level.Info);
            r.Parse("/u/scr/nlp/data/ACE2005/");
            // Annotation a = r.parse("/user/mengqiu/scr/twitter/nlp/corpus_prep/standalone/ar/data");
            // BasicEntityExtractor.saveCoNLLFiles("/tmp/conll", a, false, false);
            log.Info("done");
        }
        /// <exception cref="System.Exception"/>
        public static void Main(string[] args)
        {
            // just a simple test, to make sure stuff works
            Properties props = StringUtils.ArgsToProperties(args);

            Edu.Stanford.Nlp.IE.Machinereading.Domains.Roth.RothCONLL04Reader reader = new Edu.Stanford.Nlp.IE.Machinereading.Domains.Roth.RothCONLL04Reader();
            reader.SetLoggerLevel(Level.Info);
            reader.SetProcessor(new StanfordCoreNLP(props));
            Annotation doc = reader.Parse("/u/nlp/data/RothCONLL04/conll04.corp");

            System.Console.Out.WriteLine(AnnotationUtils.DatasetToString(doc));
        }
Exemple #26
0
        public static void Main(string[] args)
        {
            StringUtils.LogInvocationString(log, args);
            Properties props             = StringUtils.ArgsToProperties(args);
            string     boundary          = props.GetProperty("b", "-X-");
            string     delimiter         = props.GetProperty("d", "\t");
            string     defaultPosTag     = props.GetProperty("t", "I");
            bool       raw               = bool.ValueOf(props.GetProperty("r", "false"));
            bool       ignoreProvidedTag = bool.ValueOf(props.GetProperty("ignoreProvidedTag", "false"));
            string     format            = props.GetProperty("format", "conll");
            string     filename          = props.GetProperty("i");
            string     backgroundLabel   = props.GetProperty("k", "O");

            try
            {
                MultiClassPrecisionRecallExtendedStats stats;
                if (raw)
                {
                    stats = new MultiClassPrecisionRecallExtendedStats.MultiClassStringLabelStats(backgroundLabel);
                }
                else
                {
                    Edu.Stanford.Nlp.Stats.MultiClassChunkEvalStats mstats = new Edu.Stanford.Nlp.Stats.MultiClassChunkEvalStats(backgroundLabel);
                    mstats.GetChunker().SetDefaultPosTag(defaultPosTag);
                    mstats.GetChunker().SetIgnoreProvidedTag(ignoreProvidedTag);
                    stats = mstats;
                }
                if (filename != null)
                {
                    stats.Score(filename, delimiter, boundary);
                }
                else
                {
                    stats.Score(new BufferedReader(new InputStreamReader(Runtime.@in)), delimiter, boundary);
                }
                if (Sharpen.Runtime.EqualsIgnoreCase("conll", format))
                {
                    System.Console.Out.WriteLine(stats.GetConllEvalString());
                }
                else
                {
                    System.Console.Out.WriteLine(stats.GetDescription(6));
                }
            }
            catch (IOException ex)
            {
                log.Info("Error processing file: " + ex.ToString());
                Sharpen.Runtime.PrintStackTrace(ex, System.Console.Error);
            }
        }
Exemple #27
0
        /// <param name="args"/>
        public static void Main(string[] args)
        {
            Properties options = StringUtils.ArgsToProperties(args, argOptionDefs);

            if (!options.Contains(string.Empty) || options.Contains("help"))
            {
                log.Info(Usage());
                return;
            }
            bool retainNER = PropertiesUtils.GetBool(options, "ner", false);
            bool normalize = PropertiesUtils.GetBool(options, "normalize", true);
            File treeFile  = new File(options.GetProperty(string.Empty));
            TwoDimensionalCounter <string, string> labelTerm     = new TwoDimensionalCounter <string, string>();
            TwoDimensionalCounter <string, string> termLabel     = new TwoDimensionalCounter <string, string>();
            TwoDimensionalCounter <string, string> labelPreterm  = new TwoDimensionalCounter <string, string>();
            TwoDimensionalCounter <string, string> pretermLabel  = new TwoDimensionalCounter <string, string>();
            TwoDimensionalCounter <string, string> unigramTagger = new TwoDimensionalCounter <string, string>();

            try
            {
                BufferedReader     br  = new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), "UTF-8"));
                ITreeReaderFactory trf = new SpanishTreeReaderFactory();
                ITreeReader        tr  = trf.NewTreeReader(br);
                for (Tree t; (t = tr.ReadTree()) != null;)
                {
                    UpdateTagger(unigramTagger, t);
                }
                tr.Close();
                //Closes the underlying reader
                System.Console.Out.WriteLine("Resolving DUMMY tags");
                ResolveDummyTags(treeFile, unigramTagger, retainNER, normalize ? new SpanishTreeNormalizer(true, false, false) : null);
                System.Console.Out.WriteLine("#Unknown Word Types: " + MultiWordPreprocessor.ManualUWModel.nUnknownWordTypes);
                System.Console.Out.WriteLine(string.Format("#Missing POS: %d (fixed: %d, %.2f%%)", nMissingPOS, nFixedPOS, (double)nFixedPOS / nMissingPOS * 100));
                System.Console.Out.WriteLine(string.Format("#Missing Phrasal: %d (fixed: %d, %.2f%%)", nMissingPhrasal, nFixedPhrasal, (double)nFixedPhrasal / nMissingPhrasal * 100));
                System.Console.Out.WriteLine("Done!");
            }
            catch (UnsupportedEncodingException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (FileNotFoundException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (IOException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
        }
Exemple #28
0
        // end static class NERClient
        /// <summary>Starts this server on the specified port.</summary>
        /// <remarks>
        /// Starts this server on the specified port.  The classifier used can be
        /// either a default one stored in the jar file from which this code is
        /// invoked or you can specify it as a filename or as another classifier
        /// resource name, which must correspond to the name of a resource in the
        /// /classifiers/ directory of the jar file.
        /// <p>
        /// Usage: <code>java edu.stanford.nlp.tagger.maxent.MaxentTaggerServer [-model file|-client] -port portNumber [other MaxentTagger options]</code>
        /// </remarks>
        /// <param name="args">Command-line arguments (described above)</param>
        /// <exception cref="System.Exception">If file or Java class problems with serialized classifier</exception>
        public static void Main(string[] args)
        {
            if (args.Length == 0)
            {
                log.Info(Usage);
                return;
            }
            // Use both Properties and TaggerConfig.  It's okay.
            Properties props   = StringUtils.ArgsToProperties(args);
            string     client  = props.GetProperty("client");
            string     portStr = props.GetProperty("port");

            if (portStr == null || portStr.Equals(string.Empty))
            {
                log.Info(Usage);
                return;
            }
            int port = 0;

            try
            {
                port = System.Convert.ToInt32(portStr);
            }
            catch (NumberFormatException)
            {
                log.Info("Non-numerical port");
                log.Info(Usage);
                System.Environment.Exit(1);
            }
            if (client != null && !client.Equals(string.Empty))
            {
                // run a test client for illustration/testing
                string host     = props.GetProperty("host");
                string encoding = props.GetProperty("encoding");
                if (encoding == null || string.Empty.Equals(encoding))
                {
                    encoding = "utf-8";
                }
                MaxentTaggerServer.TaggerClient.CommunicateWithMaxentTaggerServer(host, port, encoding);
            }
            else
            {
                TaggerConfig config = new TaggerConfig(args);
                MaxentTagger tagger = new MaxentTagger(config.GetModel(), config);
                // initializes tagger
                MaxentTagger.TaggerWrapper wrapper = new MaxentTagger.TaggerWrapper(tagger);
                new MaxentTaggerServer(port, wrapper, config.GetEncoding()).Run();
            }
        }
        public virtual void TestArgsToProperties()
        {
            Properties p1 = new Properties();

            p1.SetProperty("fred", "-2");
            p1.SetProperty(string.Empty, "joe");
            Properties p2 = new Properties();

            p2.SetProperty("fred", "true");
            p2.SetProperty("2", "joe");
            IDictionary <string, int> argNums = new Dictionary <string, int>();

            argNums["fred"] = 1;
            NUnit.Framework.Assert.AreEqual(p2, StringUtils.ArgsToProperties("-fred", "-2", "joe"));
            NUnit.Framework.Assert.AreEqual(StringUtils.ArgsToProperties(new string[] { "-fred", "-2", "joe" }, argNums), p1);
        }
        /// <exception cref="System.IO.IOException"/>
        public static void Main(string[] args)
        {
            Properties p = StringUtils.ArgsToProperties(args);

            if (p.Contains("input"))
            {
                FileInputStream   fis    = new FileInputStream(p.GetProperty("input"));
                InputStreamReader isr    = new InputStreamReader(fis, "UTF-8");
                BufferedReader    reader = new BufferedReader(isr);
                string            thisLine;
                while ((thisLine = reader.ReadLine()) != null)
                {
                    EncodingPrintWriter.Out.Println(Normalize(thisLine), "UTF-8");
                }
            }
        }