/// <summary>
        /// Adds the appropriate features for the token at the specified index with the
        /// specified array of previous outcomes to the specified list of features.
        /// </summary>
        /// <param name="features">The list of features to be added to.</param>
        /// <param name="tokens">The tokens of the sentence or other text unit being processed.</param>
        /// <param name="index">The index of the token which is currently being processed.</param>
        /// <param name="previousOutcomes">The outcomes for the tokens prior to the specified index.</param>
        public override void CreateFeatures(List <string> features, string[] tokens, int index, string[] previousOutcomes)
        {
            var wordShape   = FeatureGeneratorUtil.TokenFeature(tokens[index]);
            var wordClasses = BrownTokenClasses.GetWordClasses(tokens[index], brownLexicon);

            features.AddRange(wordClasses.Select(brownClass => "c,browncluster=" + wordShape + "," + brownClass));
        }
예제 #2
0
        /// <summary>
        /// Adds the appropriate features for the token at the specified index with the
        /// specified array of previous outcomes to the specified list of features.
        /// </summary>
        /// <param name="features">The list of features to be added to.</param>
        /// <param name="tokens">The tokens of the sentence or other text unit being processed.</param>
        /// <param name="index">The index of the token which is currently being processed.</param>
        /// <param name="previousOutcomes">The outcomes for the tokens prior to the specified index.</param>
        public override void CreateFeatures(List <string> features, string[] tokens, int index, string[] previousOutcomes)
        {
            var feature = FeatureGeneratorUtil.TokenFeature(tokens[index]);

            features.Add($"{TOKEN_CLASS_PREFIX}={feature}");

            if (generateWordAndClassFeature)
            {
                features.Add($"{TOKEN_AND_CLASS_PREFIX}={tokens[index].ToLowerInvariant()},{feature}");
            }
        }
예제 #3
0
        /// <summary>
        /// Adds the appropriate features for the token at the specified index with the
        /// specified array of previous outcomes to the specified list of features.
        /// </summary>
        /// <param name="features">The list of features to be added to.</param>
        /// <param name="tokens">The tokens of the sentence or other text unit being processed.</param>
        /// <param name="index">The index of the token which is currently being processed.</param>
        /// <param name="previousOutcomes">The outcomes for the tokens prior to the specified index.</param>
        public override void CreateFeatures(List <string> features, string[] tokens, int index, string[] previousOutcomes)
        {
            var feature = FeatureGeneratorUtil.TokenFeature(tokens[index]);

            features.Add(string.Format("{0}={1}", TOKEN_CLASS_PREFIX, feature));

            if (generateWordAndClassFeature)
            {
                features.Add(string.Format("{0}={1},{2}", TOKEN_AND_CLASS_PREFIX, tokens[index].ToLowerInvariant(),
                                           feature));
            }
        }
예제 #4
0
        /// <summary>
        /// Adds the appropriate features for the token at the specified index with the
        /// specified array of previous outcomes to the specified list of features.
        /// </summary>
        /// <param name="features">The list of features to be added to.</param>
        /// <param name="tokens">The tokens of the sentence or other text unit being processed.</param>
        /// <param name="index">The index of the token which is currently being processed.</param>
        /// <param name="previousOutcomes">The outcomes for the tokens prior to the specified index.</param>
        public override void CreateFeatures(List <string> features, string[] tokens, int index, string[] previousOutcomes)
        {
            var wc = FeatureGeneratorUtil.TokenFeature(tokens[index]);

            //bi-gram features
            if (index > 0)
            {
                features.Add("pw,w=" + tokens[index - 1] + "," + tokens[index]);
                var pwc = FeatureGeneratorUtil.TokenFeature(tokens[index - 1]);
                features.Add("pwc,wc=" + pwc + "," + wc);
            }
            if (index + 1 < tokens.Length)
            {
                features.Add("w,nw=" + tokens[index] + "," + tokens[index + 1]);
                var nwc = FeatureGeneratorUtil.TokenFeature(tokens[index + 1]);
                features.Add("wc,nc=" + wc + "," + nwc);
            }
        }
예제 #5
0
        /// <summary>
        /// Adds the appropriate features for the token at the specified index with the
        /// specified array of previous outcomes to the specified list of features.
        /// </summary>
        /// <param name="features">The list of features to be added to.</param>
        /// <param name="tokens">The tokens of the sentence or other text unit being processed.</param>
        /// <param name="index">The index of the token which is currently being processed.</param>
        /// <param name="previousOutcomes">The outcomes for the tokens prior to the specified index.</param>
        public override void CreateFeatures(List <string> features, string[] tokens, int index, string[] previousOutcomes)
        {
            var tokenized = tokenizer.Tokenize(tokens[index]);

            if (tokenized.Length == 1)
            {
                features.Add("st=" + tokens[index].ToLowerInvariant());
                return;
            }

            features.Add("stn=" + tokenized.Length);

            var sb = new StringBuilder();

            for (int i = 0; i < tokenized.Length; i++)
            {
                if (i < tokenized.Length - 1)
                {
                    features.Add("pt2=" + FeatureGeneratorUtil.TokenFeature(tokenized[i]) +
                                 FeatureGeneratorUtil.TokenFeature(tokenized[i + 1]));
                }

                if (i < tokenized.Length - 2)
                {
                    features.Add("pt3=" +
                                 FeatureGeneratorUtil.TokenFeature(tokenized[i]) +
                                 FeatureGeneratorUtil.TokenFeature(tokenized[i + 1]) +
                                 FeatureGeneratorUtil.TokenFeature(tokenized[i + 2]));
                }

                sb.Append(FeatureGeneratorUtil.TokenFeature(tokenized[i]));

                if (!noLetters.IsMatch(tokenized[i]))
                {
                    features.Add("st=" + tokenized[i].ToLowerInvariant());
                }
            }
            features.Add("pta=" + sb);
        }