Beispiel #1
0
        /// <summary>
        /// Adds the appropriate features for the token at the specified index with the
        /// specified array of previous outcomes to the specified list of features.
        /// </summary>
        /// <param name="features">The list of features to be added to.</param>
        /// <param name="tokens">The tokens of the sentence or other text unit being processed.</param>
        /// <param name="index">The index of the token which is currently being processed.</param>
        /// <param name="previousOutcomes">The outcomes for the tokens prior to the specified index.</param>
        public override void CreateFeatures(List <string> features, string[] tokens, int index, string[] previousOutcomes)
        {
            var wc = FeatureGeneratorUtil.TokenFeature(tokens[index]);

            // trigram features

            if (index > 1)
            {
                features.Add("ppw,pw,w=" + tokens[index - 2] + "," + tokens[index - 1] + "," + tokens[index]);
                var pwc  = FeatureGeneratorUtil.TokenFeature(tokens[index - 1]);
                var ppwc = FeatureGeneratorUtil.TokenFeature(tokens[index - 2]);
                features.Add("ppwc,pwc,wc=" + ppwc + "," + pwc + "," + wc);
            }

            if (index + 2 < tokens.Length)
            {
                features.Add("w,nw,nnw=" + tokens[index] + "," + tokens[index + 1] + "," + tokens[index + 2]);
                var nwc  = FeatureGeneratorUtil.TokenFeature(tokens[index + 1]);
                var nnwc = FeatureGeneratorUtil.TokenFeature(tokens[index + 2]);
                features.Add("wc,nwc,nnwc=" + wc + "," + nwc + "," + nnwc);
            }
        }
Beispiel #2
0
        /// <summary>
        /// Adds the appropriate features for the token at the specified index with the
        /// specified array of previous outcomes to the specified list of features.
        /// </summary>
        /// <param name="features">The list of features to be added to.</param>
        /// <param name="tokens">The tokens of the sentence or other text unit being processed.</param>
        /// <param name="index">The index of the token which is currently being processed.</param>
        /// <param name="previousOutcomes">The outcomes for the tokens prior to the specified index.</param>
        public override void CreateFeatures(List <string> features, string[] tokens, int index, string[] previousOutcomes)
        {
            var tokenized = tokenizer.Tokenize(tokens[index]);

            if (tokenized.Length == 1)
            {
                features.Add("st=" + tokens[index].ToLowerInvariant());
                return;
            }

            features.Add("stn=" + tokenized.Length);

            var sb = new StringBuilder();

            for (int i = 0; i < tokenized.Length; i++)
            {
                if (i < tokenized.Length - 1)
                {
                    features.Add("pt2=" + FeatureGeneratorUtil.TokenFeature(tokenized[i]) +
                                 FeatureGeneratorUtil.TokenFeature(tokenized[i + 1]));
                }

                if (i < tokenized.Length - 2)
                {
                    features.Add("pt3=" +
                                 FeatureGeneratorUtil.TokenFeature(tokenized[i]) +
                                 FeatureGeneratorUtil.TokenFeature(tokenized[i + 1]) +
                                 FeatureGeneratorUtil.TokenFeature(tokenized[i + 2]));
                }

                sb.Append(FeatureGeneratorUtil.TokenFeature(tokenized[i]));

                if (!noLetters.IsMatch(tokenized[i]))
                {
                    features.Add("st=" + tokenized[i].ToLowerInvariant());
                }
            }
            features.Add("pta=" + sb);
        }