/// <summary> /// Adds the appropriate features for the token at the specified index with the /// specified array of previous outcomes to the specified list of features. /// </summary> /// <param name="features">The list of features to be added to.</param> /// <param name="tokens">The tokens of the sentence or other text unit being processed.</param> /// <param name="index">The index of the token which is currently being processed.</param> /// <param name="previousOutcomes">The outcomes for the tokens prior to the specified index.</param> public override void CreateFeatures(List <string> features, string[] tokens, int index, string[] previousOutcomes) { var wordShape = FeatureGeneratorUtil.TokenFeature(tokens[index]); var wordClasses = BrownTokenClasses.GetWordClasses(tokens[index], brownLexicon); features.AddRange(wordClasses.Select(brownClass => "c,browncluster=" + wordShape + "," + brownClass)); }
/// <summary> /// Adds the appropriate features for the token at the specified index with the /// specified array of previous outcomes to the specified list of features. /// </summary> /// <param name="features">The list of features to be added to.</param> /// <param name="tokens">The tokens of the sentence or other text unit being processed.</param> /// <param name="index">The index of the token which is currently being processed.</param> /// <param name="previousOutcomes">The outcomes for the tokens prior to the specified index.</param> public override void CreateFeatures(List <string> features, string[] tokens, int index, string[] previousOutcomes) { var feature = FeatureGeneratorUtil.TokenFeature(tokens[index]); features.Add($"{TOKEN_CLASS_PREFIX}={feature}"); if (generateWordAndClassFeature) { features.Add($"{TOKEN_AND_CLASS_PREFIX}={tokens[index].ToLowerInvariant()},{feature}"); } }
/// <summary> /// Adds the appropriate features for the token at the specified index with the /// specified array of previous outcomes to the specified list of features. /// </summary> /// <param name="features">The list of features to be added to.</param> /// <param name="tokens">The tokens of the sentence or other text unit being processed.</param> /// <param name="index">The index of the token which is currently being processed.</param> /// <param name="previousOutcomes">The outcomes for the tokens prior to the specified index.</param> public override void CreateFeatures(List <string> features, string[] tokens, int index, string[] previousOutcomes) { var feature = FeatureGeneratorUtil.TokenFeature(tokens[index]); features.Add(string.Format("{0}={1}", TOKEN_CLASS_PREFIX, feature)); if (generateWordAndClassFeature) { features.Add(string.Format("{0}={1},{2}", TOKEN_AND_CLASS_PREFIX, tokens[index].ToLowerInvariant(), feature)); } }
/// <summary> /// Adds the appropriate features for the token at the specified index with the /// specified array of previous outcomes to the specified list of features. /// </summary> /// <param name="features">The list of features to be added to.</param> /// <param name="tokens">The tokens of the sentence or other text unit being processed.</param> /// <param name="index">The index of the token which is currently being processed.</param> /// <param name="previousOutcomes">The outcomes for the tokens prior to the specified index.</param> public override void CreateFeatures(List <string> features, string[] tokens, int index, string[] previousOutcomes) { var wc = FeatureGeneratorUtil.TokenFeature(tokens[index]); //bi-gram features if (index > 0) { features.Add("pw,w=" + tokens[index - 1] + "," + tokens[index]); var pwc = FeatureGeneratorUtil.TokenFeature(tokens[index - 1]); features.Add("pwc,wc=" + pwc + "," + wc); } if (index + 1 < tokens.Length) { features.Add("w,nw=" + tokens[index] + "," + tokens[index + 1]); var nwc = FeatureGeneratorUtil.TokenFeature(tokens[index + 1]); features.Add("wc,nc=" + wc + "," + nwc); } }
/// <summary> /// Adds the appropriate features for the token at the specified index with the /// specified array of previous outcomes to the specified list of features. /// </summary> /// <param name="features">The list of features to be added to.</param> /// <param name="tokens">The tokens of the sentence or other text unit being processed.</param> /// <param name="index">The index of the token which is currently being processed.</param> /// <param name="previousOutcomes">The outcomes for the tokens prior to the specified index.</param> public override void CreateFeatures(List <string> features, string[] tokens, int index, string[] previousOutcomes) { var tokenized = tokenizer.Tokenize(tokens[index]); if (tokenized.Length == 1) { features.Add("st=" + tokens[index].ToLowerInvariant()); return; } features.Add("stn=" + tokenized.Length); var sb = new StringBuilder(); for (int i = 0; i < tokenized.Length; i++) { if (i < tokenized.Length - 1) { features.Add("pt2=" + FeatureGeneratorUtil.TokenFeature(tokenized[i]) + FeatureGeneratorUtil.TokenFeature(tokenized[i + 1])); } if (i < tokenized.Length - 2) { features.Add("pt3=" + FeatureGeneratorUtil.TokenFeature(tokenized[i]) + FeatureGeneratorUtil.TokenFeature(tokenized[i + 1]) + FeatureGeneratorUtil.TokenFeature(tokenized[i + 2])); } sb.Append(FeatureGeneratorUtil.TokenFeature(tokenized[i])); if (!noLetters.IsMatch(tokenized[i])) { features.Add("st=" + tokenized[i].ToLowerInvariant()); } } features.Add("pta=" + sb); }