/// <summary> /// Adds the appropriate features for the token at the specified index with the /// specified array of previous outcomes to the specified list of features. /// </summary> /// <param name="features">The list of features to be added to.</param> /// <param name="tokens">The tokens of the sentence or other text unit being processed.</param> /// <param name="index">The index of the token which is currently being processed.</param> /// <param name="previousOutcomes">The outcomes for the tokens prior to the specified index.</param> public override void CreateFeatures(List <string> features, string[] tokens, int index, string[] previousOutcomes) { var wc = FeatureGeneratorUtil.TokenFeature(tokens[index]); // trigram features if (index > 1) { features.Add("ppw,pw,w=" + tokens[index - 2] + "," + tokens[index - 1] + "," + tokens[index]); var pwc = FeatureGeneratorUtil.TokenFeature(tokens[index - 1]); var ppwc = FeatureGeneratorUtil.TokenFeature(tokens[index - 2]); features.Add("ppwc,pwc,wc=" + ppwc + "," + pwc + "," + wc); } if (index + 2 < tokens.Length) { features.Add("w,nw,nnw=" + tokens[index] + "," + tokens[index + 1] + "," + tokens[index + 2]); var nwc = FeatureGeneratorUtil.TokenFeature(tokens[index + 1]); var nnwc = FeatureGeneratorUtil.TokenFeature(tokens[index + 2]); features.Add("wc,nwc,nnwc=" + wc + "," + nwc + "," + nnwc); } }
/// <summary> /// Adds the appropriate features for the token at the specified index with the /// specified array of previous outcomes to the specified list of features. /// </summary> /// <param name="features">The list of features to be added to.</param> /// <param name="tokens">The tokens of the sentence or other text unit being processed.</param> /// <param name="index">The index of the token which is currently being processed.</param> /// <param name="previousOutcomes">The outcomes for the tokens prior to the specified index.</param> public override void CreateFeatures(List <string> features, string[] tokens, int index, string[] previousOutcomes) { var tokenized = tokenizer.Tokenize(tokens[index]); if (tokenized.Length == 1) { features.Add("st=" + tokens[index].ToLowerInvariant()); return; } features.Add("stn=" + tokenized.Length); var sb = new StringBuilder(); for (int i = 0; i < tokenized.Length; i++) { if (i < tokenized.Length - 1) { features.Add("pt2=" + FeatureGeneratorUtil.TokenFeature(tokenized[i]) + FeatureGeneratorUtil.TokenFeature(tokenized[i + 1])); } if (i < tokenized.Length - 2) { features.Add("pt3=" + FeatureGeneratorUtil.TokenFeature(tokenized[i]) + FeatureGeneratorUtil.TokenFeature(tokenized[i + 1]) + FeatureGeneratorUtil.TokenFeature(tokenized[i + 2])); } sb.Append(FeatureGeneratorUtil.TokenFeature(tokenized[i])); if (!noLetters.IsMatch(tokenized[i])) { features.Add("st=" + tokenized[i].ToLowerInvariant()); } } features.Add("pta=" + sb); }