/// <summary> /// process the utterance /// </summary> /// <param name="text">The text.</param> /// <exception cref="IllegalStateException"></exception> /// <returns>The utterance contain the tokens</returns> public virtual List <string> Expand(string text) { string simplifiedText = SimplifyChars(text); CharTokenizer tokenizer = new CharTokenizer(); tokenizer.WhitespaceSymbols = UsEnglish.WhitespaceSymbols; tokenizer.SingleCharSymbols = UsEnglish.SingleCharSymbols; tokenizer.PrepunctuationSymbols = UsEnglish.PrePunctuationSymbols; tokenizer.PostpunctuationSymbols = UsEnglish.PunctuationSymbols; tokenizer.SetInputText(simplifiedText); Utterance utterance = new Utterance(tokenizer); Relation tokenRelation; if ((tokenRelation = utterance.GetRelation(Relation.Token)) == null) { throw new IllegalStateException("token relation does not exist"); } _wordRelation = WordRelation.CreateWordRelation(utterance, this); for (_tokenItem = tokenRelation.Head; _tokenItem != null; _tokenItem = _tokenItem.GetNext()) { FeatureSet featureSet = _tokenItem.Features; string tokenVal = featureSet.GetString("name"); // convert the token into a list of words TokenToWords(tokenVal); } List <string> words = new List <string>(); for (Item item = utterance.GetRelation(Relation.Word).Head; item != null; item = item.GetNext()) { if (!string.IsNullOrEmpty(item.ToString()) && !item.ToString().Contains("#")) { words.Add(item.ToString()); } } return(words); }
/// <summary> /// process the utterance /// </summary> /// <param name="text">The text.</param> /// <exception cref="IllegalStateException"></exception> /// <returns>The utterance contain the tokens</returns> public virtual List <string> expand(string text) { string simplifiedText = simplifyChars(text); CharTokenizer tokenizer = new CharTokenizer(); tokenizer.setWhitespaceSymbols(UsEnglish.WHITESPACE_SYMBOLS); tokenizer.setSingleCharSymbols(UsEnglish.SINGLE_CHAR_SYMBOLS); tokenizer.setPrepunctuationSymbols(UsEnglish.PREPUNCTUATION_SYMBOLS); tokenizer.setPostpunctuationSymbols(UsEnglish.PUNCTUATION_SYMBOLS); tokenizer.setInputText(simplifiedText); Utterance utterance = new Utterance(tokenizer); Relation tokenRelation; if ((tokenRelation = utterance.getRelation(Relation.TOKEN)) == null) { throw new IllegalStateException("token relation does not exist"); } wordRelation = WordRelation.createWordRelation(utterance, this); for (tokenItem = tokenRelation.getHead(); tokenItem != null; tokenItem = tokenItem.getNext()) { FeatureSet featureSet = tokenItem.getFeatures(); string tokenVal = featureSet.getString("name"); // convert the token into a list of words tokenToWords(tokenVal); } List <string> words = new List <string>(); for (Item item = utterance.getRelation(Relation.WORD).getHead(); item != null; item = item.getNext()) { if (!string.IsNullOrEmpty(item.ToString()) && !item.ToString().Contains("#")) { words.Add(item.ToString()); } } return(words); }
/// <summary> /// Creates an utterance with the given set of tokenized text. /// </summary> /// <param name="tokenizer">The list of tokens for this utterance.</param> public Utterance(CharTokenizer tokenizer) { features = new FeatureSet(); relations = new FeatureSet(); setTokenList(tokenizer); }