示例#1
0
        public static ArrayList removePunctuation(String inputString)
        {
            StringReader reader            = new StringReader(inputString);
            Tokenizer    standardTokenizer = new LetterTokenizer(reader);

            TokenStream tokenStream   = new LengthFilter(standardTokenizer, 2, int.MaxValue);
            var         termAttribute = tokenStream.GetAttribute <ITermAttribute>();

            tokenStream.Reset();

            ArrayList tokenizedString = new ArrayList();

            while (tokenStream.IncrementToken())
            {
                tokenizedString.Add(termAttribute.Term);
            }

            tokenStream.End();
            tokenStream.Dispose();

            return(tokenizedString);
        }
示例#2
0
        public static ArrayList defaultSplit(String inputString)
        {
            StringReader reader = new StringReader(inputString);
            Tokenizer    whiteSpaceTokenizer = new WhitespaceTokenizer(reader);

            TokenStream tokenStream   = new LengthFilter(whiteSpaceTokenizer, 2, int.MaxValue);
            var         termAttribute = tokenStream.GetAttribute <ITermAttribute>();

            tokenStream.Reset();

            ArrayList tokenizedString = new ArrayList();

            while (tokenStream.IncrementToken())
            {
                tokenizedString.Add(termAttribute.Term);
            }

            tokenStream.End();
            tokenStream.Dispose();

            return(tokenizedString);
        }