Esempio n. 1
0
        private Features GetFeatures(List <FeatureTemplate> list, Token token, IndexedSentence sentence, bool?local = null)
        {
            Features features = new Features(list.Count);

            foreach (FeatureTemplate template in list)
            {
                if (local == null || local == template.IsLocal)
                {
                    features.Add(template.Name, template.GetValue(token, sentence));
                }
            }

            return(features);
        }
Esempio n. 2
0
        public void Tag(IEnumerable <Sentence> sentences)
        {
            if (perceptronMsd == null)
            {
                throw new InvalidOperationException("Model not loaded.");
            }

            Parallel.ForEach(sentences, (sentence, index) =>
            {
                IndexedSentence normalizedSentence =
                    new IndexedSentence((Reverse ? (sentence as IEnumerable <Token>).Reverse() : sentence).Select(t => Normalize(t)));

                foreach (Token token in normalizedSentence)
                {
                    Tag[] possibleTags = token.PossibleTags;
                    if (possibleTags == null || possibleTags.Length == 0)
                    {
                        possibleTags = perceptronMsd.Tags.ToArray();
                    }

                    if (possibleTags.Length == 1)
                    {
                        token.PredictedTag = possibleTags[0];
                    }
                    else
                    {
                        double?maxScoreMsd = null;
                        Tag bestMsd        = null;

                        Features localFeaturesTag = GetFeatures(featureTemplatesTag, token, normalizedSentence, true);

                        foreach (Tag tag in possibleTags)
                        {
                            Tag tagMsd         = new Tag(tag.Msd);
                            token.PredictedTag = tagMsd;

                            Features featuresTag = GetFeatures(featureTemplatesTag, token, normalizedSentence, false);
                            featuresTag.AddRange(localFeaturesTag);

                            double scoreMsd = perceptronMsd.Score(featuresTag, tagMsd);
                            if (maxScoreMsd == null || scoreMsd > maxScoreMsd.Value)
                            {
                                maxScoreMsd = scoreMsd;
                                bestMsd     = tagMsd;
                            }
                        }

                        double?maxScoreLemma = null;
                        string bestLemma     = null;

                        token.PredictedTag          = bestMsd;
                        Features localFeaturesLemma = GetFeatures(featureTemplatesLemma, token, normalizedSentence, true);

                        foreach (Tag tag in possibleTags)
                        {
                            if (tag.Msd != bestMsd.Msd || tag.Lemma == null)
                            {
                                continue;
                            }

                            token.PredictedTag     = new Tag(token.CorrectTag.Msd, tag.Lemma);
                            Features featuresLemma = GetFeatures(featureTemplatesLemma, token, normalizedSentence, false);
                            featuresLemma.AddRange(localFeaturesLemma);

                            double scoreLemma = perceptronLemma.Score(featuresLemma, tag.Lemma);
                            if (maxScoreLemma == null || scoreLemma > maxScoreLemma.Value)
                            {
                                maxScoreLemma = scoreLemma;
                                bestLemma     = tag.Lemma;
                            }
                        }

                        token.PredictedTag = new Tag(bestMsd.Msd, bestLemma);
                    }

                    int pos = normalizedSentence[token];
                    if (Reverse)
                    {
                        pos = normalizedSentence.Count - pos - 1;
                    }
                    Token original        = sentence[pos];
                    original.PredictedTag = token.PredictedTag;
                    Unnormalize(original);
                }
            });
        }
Esempio n. 3
0
 public abstract string GetValue(Token token, IndexedSentence sentence);