コード例 #1
0
        public CrossValidationResults Evaluate()
        {
            Helpers.XorShiftRandom random    = new Helpers.XorShiftRandom(RandomSeed);
            List <Sentence>        sentences = Randomize ? Sentences.OrderBy(s => random.NextUInt()).ToList() : Sentences;

            List <Sentence>[] folds = new List <Sentence> [Folds];
            for (int i = 0; i < Folds; i++)
            {
                folds[i] = new List <Sentence>();
            }

            for (int i = 0; i < sentences.Count; i++)
            {
                folds[i % Folds].Add(sentences[i]);
            }

            CrossValidationResults results = new CrossValidationResults();
            DateTime start = DateTime.Now;

            int iterationsCount = 0;

            Parallel.For(0, Folds, (i) =>
            {
                CrossValidationResults.Result result = new CrossValidationResults.Result();
                result.Fold  = i + 1;
                result.Test  = folds[i].Select(sentence => sentence.Clone()).ToList();
                result.Train = new List <Sentence>();

                for (int j = 0; j < Folds; j++)
                {
                    if (j != i)
                    {
                        result.Train.AddRange(folds[j].Select(sentence => sentence.Clone()));
                    }
                }

                ITrainableTagger tagger = Activator.CreateInstance <T>();

                if (tagger is Tagging.Perceptron.PerceptronTagger)
                {
                    Tagging.Perceptron.PerceptronTagger ptagger = tagger as Tagging.Perceptron.PerceptronTagger;
                    ptagger.IterationFinished += (it) =>
                    {
                        lock (folds) iterationsCount++;
                        if (Progress != null)
                        {
                            Progress(((double)iterationsCount / (ptagger.Iterations * Folds)));
                        }
                    };
                }

                tagger.Train(result.Train);
                tagger.Tag(result.Test);

                results.Add(result);
            });

            results.Duration = DateTime.Now - start;

            return(results);
        }
コード例 #2
0
        public void Train(IEnumerable <Sentence> sentences)
        {
            perceptronMsd   = new Perceptron();
            perceptronLemma = new PerceptronString();

            IndexedSentence[] normalizedSentences =
                sentences.Select(s => new IndexedSentence((Reverse ? (s as IEnumerable <Token>).Reverse() : s).Select(t => Normalize(t)))).ToArray();

            HashSet <Tag> alltags = new HashSet <Tag>();

            foreach (IndexedSentence sentence in normalizedSentences)
            {
                foreach (Token token in sentence)
                {
                    alltags.Add(new Tag(token.CorrectTag.Msd, null));
                    foreach (Tag tag in token.PossibleTags)
                    {
                        alltags.Add(new Tag(tag.Msd, null));
                    }
                }
            }

            for (int iteration = 0; iteration < Iterations; iteration++)
            {
                if (IterationStarted != null)
                {
                    IterationStarted(iteration + 1);
                }

                foreach (IndexedSentence sentence in normalizedSentences.OrderBy(s => random.NextUInt()))
                {
                    foreach (Token token in sentence)
                    {
                        if (token.PossibleTags != null && token.PossibleTags.Length == 1)
                        {
                            token.PredictedTag = token.CorrectTag; // todo: possibleTags[0]?? what if it's not correct tag?
                            continue;
                        }

                        IEnumerable <Tag> possibleTags = token.PossibleTags.OrderBy(t => t.Msd).ToArray();
                        if (possibleTags == null || !possibleTags.Any())
                        {
                            possibleTags = alltags;
                        }

                        double?  bestMsdScore    = null;
                        Tag      bestMsd         = null;
                        Features bestMsdFeatures = null;

                        Features localFeaturesTag = GetFeatures(featureTemplatesTag, token, sentence, true);

                        foreach (Tag tag in possibleTags)
                        {
                            Tag tagMsd = new Tag(tag.Msd);
                            token.PredictedTag = tagMsd;

                            Features featuresTag = GetFeatures(featureTemplatesTag, token, sentence, false);
                            featuresTag.AddRange(localFeaturesTag);

                            double score = perceptronMsd.Score(featuresTag, tagMsd);
                            if (bestMsdScore == null || score > bestMsdScore.Value)
                            {
                                bestMsdScore    = score;
                                bestMsd         = tagMsd;
                                bestMsdFeatures = featuresTag;
                            }
                        }

                        perceptronMsd.Update(bestMsdFeatures, new Tag(token.CorrectTag.Msd), bestMsd);

                        if (token.CorrectTag.Lemma != null)
                        {
                            double?  bestLemmaScore    = null;
                            string   bestLemma         = null;
                            Features bestLemmaFeatures = null;

                            token.PredictedTag = new Tag(token.CorrectTag.Msd);
                            Features localFeaturesLemma = GetFeatures(featureTemplatesLemma, token, sentence, true);

                            foreach (Tag tag in possibleTags)
                            {
                                if (tag.Msd != bestMsd.Msd || tag.Lemma == null)
                                {
                                    continue;
                                }
                                token.PredictedTag = new Tag(token.CorrectTag.Msd, tag.Lemma);
                                Features featuresLemma = GetFeatures(featureTemplatesLemma, token, sentence, false);
                                featuresLemma.AddRange(localFeaturesLemma);
                                double scoreLemma = perceptronLemma.Score(featuresLemma, tag.Lemma);
                                if (bestLemmaScore == null || scoreLemma > bestLemmaScore.Value)
                                {
                                    bestLemmaScore    = scoreLemma;
                                    bestLemma         = tag.Lemma;
                                    bestLemmaFeatures = featuresLemma;
                                }
                            }

                            if (bestLemmaFeatures != null)
                            {
                                perceptronLemma.Update(bestLemmaFeatures, token.CorrectTag.Lemma, bestLemma);
                            }
                        }

                        token.PredictedTag = token.CorrectTag;
                    }
                }

                if (IterationFinished != null)
                {
                    IterationFinished(iteration + 1);
                }
            }

            if (Average)
            {
                perceptronMsd.AverageWeights();
                perceptronLemma.AverageWeights();
            }

            perceptronMsd.RemoveInsignificantWeights(WeightThreshold);
            perceptronLemma.RemoveInsignificantWeights(WeightThreshold);
        }