/// <exception cref="System.Exception"/>
        public static void Main(string[] args)
        {
            if (args.Length != 2)
            {
                log.Info("usage: java TaggerDemo2 modelFile fileToTag");
                return;
            }
            MaxentTagger tagger = new MaxentTagger(args[0]);
            ITokenizerFactory <CoreLabel> ptbTokenizerFactory = PTBTokenizer.Factory(new CoreLabelTokenFactory(), "untokenizable=noneKeep");
            BufferedReader       r  = new BufferedReader(new InputStreamReader(new FileInputStream(args[1]), "utf-8"));
            PrintWriter          pw = new PrintWriter(new OutputStreamWriter(System.Console.Out, "utf-8"));
            DocumentPreprocessor documentPreprocessor = new DocumentPreprocessor(r);

            documentPreprocessor.SetTokenizerFactory(ptbTokenizerFactory);
            foreach (IList <IHasWord> sentence in documentPreprocessor)
            {
                IList <TaggedWord> tSentence = tagger.TagSentence(sentence);
                pw.Println(SentenceUtils.ListToString(tSentence, false));
            }
            // print the adjectives in one more sentence. This shows how to get at words and tags in a tagged sentence.
            IList <IHasWord>   sent       = SentenceUtils.ToWordList("The", "slimy", "slug", "crawled", "over", "the", "long", ",", "green", "grass", ".");
            IList <TaggedWord> taggedSent = tagger.TagSentence(sent);

            foreach (TaggedWord tw in taggedSent)
            {
                if (tw.Tag().StartsWith("JJ"))
                {
                    pw.Println(tw.Word());
                }
            }
            pw.Close();
        }
 public override IList <IHasWord> DefaultTestSentence()
 {
     string[] sent = new string[] { "Ésto", "es", "sólo", "una", "prueba", "." };
     return(SentenceUtils.ToWordList(sent));
 }
Beispiel #3
0
        public virtual IList <IHasWord> Segment(string line)
        {
            string segmentedString = SegmentString(line);

            return(SentenceUtils.ToWordList(segmentedString.Split("\\s+")));
        }
 public override IList <IHasWord> DefaultTestSentence()
 {
     string[] sent = new string[] { "Ceci", "est", "seulement", "un", "test", "." };
     return(SentenceUtils.ToWordList(sent));
 }
 public override IList <IHasWord> DefaultTestSentence()
 {
     string[] sent = new string[] { "H", "MWX", "MTPLC", "LA", "RQ", "M", "H", "TWPEH", "H", "MBIFH", "ALA", "GM", "M", "DRKI", "H", "HERMH", "yyDOT" };
     return(SentenceUtils.ToWordList(sent));
 }
Beispiel #6
0
 /// <summary>Returns the first sentence of TueBaDZ.</summary>
 public override IList <IHasWord> DefaultTestSentence()
 {
     return(SentenceUtils.ToWordList("Veruntreute", "die", "AWO", "Spendengeld", "?"));
 }
 /// <summary>Return a default sentence for the language (for testing)</summary>
 public override IList <IHasWord> DefaultTestSentence()
 {
     string[] sent = new string[] { "Solch", "einen", "Zuspruch", "hat", "Angela", "Merkel", "lange", "nicht", "mehr", "erlebt", "." };
     return(SentenceUtils.ToWordList(sent));
 }