private Features GetFeatures(List <FeatureTemplate> list, Token token, IndexedSentence sentence, bool?local = null) { Features features = new Features(list.Count); foreach (FeatureTemplate template in list) { if (local == null || local == template.IsLocal) { features.Add(template.Name, template.GetValue(token, sentence)); } } return(features); }
public void Tag(IEnumerable <Sentence> sentences) { if (perceptronMsd == null) { throw new InvalidOperationException("Model not loaded."); } Parallel.ForEach(sentences, (sentence, index) => { IndexedSentence normalizedSentence = new IndexedSentence((Reverse ? (sentence as IEnumerable <Token>).Reverse() : sentence).Select(t => Normalize(t))); foreach (Token token in normalizedSentence) { Tag[] possibleTags = token.PossibleTags; if (possibleTags == null || possibleTags.Length == 0) { possibleTags = perceptronMsd.Tags.ToArray(); } if (possibleTags.Length == 1) { token.PredictedTag = possibleTags[0]; } else { double?maxScoreMsd = null; Tag bestMsd = null; Features localFeaturesTag = GetFeatures(featureTemplatesTag, token, normalizedSentence, true); foreach (Tag tag in possibleTags) { Tag tagMsd = new Tag(tag.Msd); token.PredictedTag = tagMsd; Features featuresTag = GetFeatures(featureTemplatesTag, token, normalizedSentence, false); featuresTag.AddRange(localFeaturesTag); double scoreMsd = perceptronMsd.Score(featuresTag, tagMsd); if (maxScoreMsd == null || scoreMsd > maxScoreMsd.Value) { maxScoreMsd = scoreMsd; bestMsd = tagMsd; } } double?maxScoreLemma = null; string bestLemma = null; token.PredictedTag = bestMsd; Features localFeaturesLemma = GetFeatures(featureTemplatesLemma, token, normalizedSentence, true); foreach (Tag tag in possibleTags) { if (tag.Msd != bestMsd.Msd || tag.Lemma == null) { continue; } token.PredictedTag = new Tag(token.CorrectTag.Msd, tag.Lemma); Features featuresLemma = GetFeatures(featureTemplatesLemma, token, normalizedSentence, false); featuresLemma.AddRange(localFeaturesLemma); double scoreLemma = perceptronLemma.Score(featuresLemma, tag.Lemma); if (maxScoreLemma == null || scoreLemma > maxScoreLemma.Value) { maxScoreLemma = scoreLemma; bestLemma = tag.Lemma; } } token.PredictedTag = new Tag(bestMsd.Msd, bestLemma); } int pos = normalizedSentence[token]; if (Reverse) { pos = normalizedSentence.Count - pos - 1; } Token original = sentence[pos]; original.PredictedTag = token.PredictedTag; Unnormalize(original); } }); }
public abstract string GetValue(Token token, IndexedSentence sentence);