/// <summary>
        /// process the utterance
        /// </summary>
        /// <param name="text">The text.</param>
        /// <exception cref="IllegalStateException"></exception>
        /// <returns>The utterance contain the tokens</returns>
        public virtual List <string> Expand(string text)
        {
            string simplifiedText = SimplifyChars(text);

            CharTokenizer tokenizer = new CharTokenizer();

            tokenizer.WhitespaceSymbols      = UsEnglish.WhitespaceSymbols;
            tokenizer.SingleCharSymbols      = UsEnglish.SingleCharSymbols;
            tokenizer.PrepunctuationSymbols  = UsEnglish.PrePunctuationSymbols;
            tokenizer.PostpunctuationSymbols = UsEnglish.PunctuationSymbols;
            tokenizer.SetInputText(simplifiedText);
            Utterance utterance = new Utterance(tokenizer);

            Relation tokenRelation;

            if ((tokenRelation = utterance.GetRelation(Relation.Token)) == null)
            {
                throw new IllegalStateException("token relation does not exist");
            }

            _wordRelation = WordRelation.CreateWordRelation(utterance, this);

            for (_tokenItem = tokenRelation.Head; _tokenItem != null; _tokenItem =
                     _tokenItem.GetNext())
            {
                FeatureSet featureSet = _tokenItem.Features;
                string     tokenVal   = featureSet.GetString("name");

                // convert the token into a list of words
                TokenToWords(tokenVal);
            }

            List <string> words = new List <string>();

            for (Item item = utterance.GetRelation(Relation.Word).Head; item != null; item =
                     item.GetNext())
            {
                if (!string.IsNullOrEmpty(item.ToString()) && !item.ToString().Contains("#"))
                {
                    words.Add(item.ToString());
                }
            }
            return(words);
        }
示例#2
0
        /// <summary>
        /// process the utterance
        /// </summary>
        /// <param name="text">The text.</param>
        /// <exception cref="IllegalStateException"></exception>
        /// <returns>The utterance contain the tokens</returns>
        public virtual List <string> expand(string text)
        {
            string simplifiedText = simplifyChars(text);

            CharTokenizer tokenizer = new CharTokenizer();

            tokenizer.setWhitespaceSymbols(UsEnglish.WHITESPACE_SYMBOLS);
            tokenizer.setSingleCharSymbols(UsEnglish.SINGLE_CHAR_SYMBOLS);
            tokenizer.setPrepunctuationSymbols(UsEnglish.PREPUNCTUATION_SYMBOLS);
            tokenizer.setPostpunctuationSymbols(UsEnglish.PUNCTUATION_SYMBOLS);
            tokenizer.setInputText(simplifiedText);
            Utterance utterance = new Utterance(tokenizer);

            Relation tokenRelation;

            if ((tokenRelation = utterance.getRelation(Relation.TOKEN)) == null)
            {
                throw new IllegalStateException("token relation does not exist");
            }

            wordRelation = WordRelation.createWordRelation(utterance, this);

            for (tokenItem = tokenRelation.getHead(); tokenItem != null; tokenItem =
                     tokenItem.getNext())
            {
                FeatureSet featureSet = tokenItem.getFeatures();
                string     tokenVal   = featureSet.getString("name");

                // convert the token into a list of words
                tokenToWords(tokenVal);
            }

            List <string> words = new List <string>();

            for (Item item = utterance.getRelation(Relation.WORD).getHead(); item != null; item =
                     item.getNext())
            {
                if (!string.IsNullOrEmpty(item.ToString()) && !item.ToString().Contains("#"))
                {
                    words.Add(item.ToString());
                }
            }
            return(words);
        }
示例#3
0
 /// <summary>
 /// Creates an utterance with the given set of tokenized text.
 /// </summary>
 /// <param name="tokenizer">The list of tokens for this utterance.</param>
 public Utterance(CharTokenizer tokenizer)
 {
     features  = new FeatureSet();
     relations = new FeatureSet();
     setTokenList(tokenizer);
 }