Beispiel #1
0
        public static IEnumerable <POS> Extract(string text, ref NLPCount count)
        {
            string normalized = TwitterKoreanProcessor.normalize(text).toString();
            var    tokenized  = TwitterKoreanProcessor.tokenize(normalized);
            var    stemmed    = TwitterKoreanProcessor.stem(tokenized);

            return(stemmed.ToPosListFromTokens().Where(s => s.PosTag != "Punctuation" && s.PosTag != "Space"));
        }
        public static IEnumerable <string> TokensToStrings(this IEnumerable <KoreanToken> tokens)
        {
            var scalaTokenSeq = Utils.ScalaCSHelper.ReverseScalaSeqConverter <KoreanTokenizer.KoreanToken, KoreanToken>(tokens, t => t.ToScalaToken());
            var scalaResults  = TwitterKoreanProcessor.tokensToStrings(scalaTokenSeq);
            var results       = Utils.ScalaCSHelper.ScalaSeqStringConverter(scalaResults);

            return(results);
        }
        public static IEnumerable <KoreanToken> Tokenize(this string text)
        {
            var scalaResults           = TwitterKoreanProcessor.tokenize(text);
            List <KoreanToken> results = Utils.ScalaCSHelper.ScalaSeqConverter <KoreanToken, KoreanTokenizer.KoreanToken>(
                scalaResults, (scalaResult) => { return(new KoreanToken(scalaResult)); });

            return(results);
        }
        public static IEnumerable <KoreanToken> Stem(this IEnumerable <KoreanToken> tokens)
        {
            var scalaTokenSeq = Utils.ScalaCSHelper.ReverseScalaSeqConverter <KoreanTokenizer.KoreanToken, KoreanToken>(tokens, t => t.ToScalaToken());
            var scalaResults  = TwitterKoreanProcessor.stem(scalaTokenSeq);
            var results       = Utils.ScalaCSHelper.ScalaSeqConverter <KoreanToken, KoreanTokenizer.KoreanToken>(scalaResults, t => new KoreanToken(t));

            return(results);
        }
        public static List <KoreanPhrase> ExtractPhrases(this IEnumerable <KoreanToken> tokens, bool filterSpam = false, bool enableHashTags = true)
        {
            var scalaTokenSeq = Utils.ScalaCSHelper.ReverseScalaSeqConverter <KoreanTokenizer.KoreanToken, KoreanToken>(tokens, t => t.ToScalaToken());

            // returns: Seq[KoreanPhrase]
            var scalaResults = TwitterKoreanProcessor.extractPhrases(scalaTokenSeq, filterSpam, enableHashTags);
            var results      = Utils.ScalaCSHelper.ScalaSeqConverter <KoreanPhrase, ScalaKoreanPhrase>(scalaResults, p => new KoreanPhrase(p));


            return(results);
        }
        public static string Normalize(string text)
        {
            string result = TwitterKoreanProcessor.normalize(text).toString();

            return(result);
        }