Пример #1
0
        public static LinguisticObjectDetectionMetrics MetricsFor(LinguisticObjectMatch objectMatch,
            Sentence targetSentence, Sentence sampleSentence, LinguisticObjectType objectType)
        {
            if (objectMatch.TargetObjects == null) return _metricsForMissedObject(objectMatch, sampleSentence);
            if (objectMatch.SampleObjects == null)
                return _metricsForSurplusTargetObject(objectMatch, sampleSentence, targetSentence, objectType);

            var targetWords = objectMatch.TargetObjects.Words;
            var sampleWords = objectMatch.SampleObjects.Words;
            var detectedWords = _getDetectedWords(targetWords, sampleWords);
            var surplusWords = targetWords.Where(word => !detectedWords.Contains(word)).ToList();
            var missedWords =
                sampleWords.Where(word => targetWords.All(targetWord => targetWord.Text != word.Text)).ToList();

            var essentialSurpluses = surplusWords.Where(word => !word.IsAuxilary).ToList();
            var essentialSurplusText = essentialSurpluses.Select(word => word.Text).ToList();
            var essentialSurplusIndexes = essentialSurpluses.Select(targetSentence.WordIndex).ToList();

            var nonEssentialSurpluses = surplusWords.Where(word => word.IsAuxilary).ToList();
            var nonEssentialSurplusTexts = nonEssentialSurpluses.Select(word => word.Text).ToList();
            var nonEssentialSurplusIndexes = nonEssentialSurpluses.Select(targetSentence.WordIndex).ToList();

            var essentialMises = missedWords.Where(word => !word.IsAuxilary).ToList();
            var essentialMissTexts = essentialMises.Select(word => word.Text).ToList();
            var essentialMissIndexes = essentialMises.Select(sampleSentence.WordIndex).ToList();

            var nonEssentialMises = missedWords.Where(word => word.IsAuxilary).ToList();
            var nonEssentialMissTexts = nonEssentialMises.Select(word => word.Text).ToList();
            var nonEssentialMissIndexes = nonEssentialMises.Select(sampleSentence.WordIndex).ToList();

            return new LinguisticObjectDetectionMetrics
            {
                SampleWords = sampleWords.Select(word => word.Text).ToArray(),
                TargetWords = targetWords.Select(word => word.Text).ToArray(),
                SampleWordsCount = sampleWords.Count,
                SampleWordIndexes = sampleWords.Select(sampleSentence.WordIndex).ToArray(),
                TargetWordsCount = targetWords.Count,
                TargetWordIndexes = targetWords.Select(targetSentence.WordIndex).ToArray(),
                NonEssentialErrors = nonEssentialSurplusTexts.Concat(nonEssentialMissTexts).ToArray(),
                NonEssentialErrorIndexes = nonEssentialSurplusIndexes.Concat(nonEssentialMissIndexes).ToArray(),
                EssentialErrors = essentialSurplusText.Concat(essentialMissTexts).ToArray(),
                EssentialErrorIndexes = essentialSurplusIndexes.Concat(essentialMissIndexes).ToArray(),
                SurplusNonEssentialWords = nonEssentialSurplusTexts.ToArray(),
                SurplusNonEssentialWordIndexes = nonEssentialSurplusIndexes.ToArray(),
                MissedNonEssentialWords = nonEssentialMissTexts.ToArray(),
                MissedNonEssentialWordIndexes = nonEssentialMissIndexes.ToArray(),
                SurplusEssentialWords = essentialSurplusText.ToArray(),
                SurplusEssentialWordIndexes = essentialSurplusIndexes.ToArray(),
                MissedEssentialWords = essentialMissTexts.ToArray(),
                MissedEssentialWordIndexes = essentialMissIndexes.ToArray(),
                CorrectDetectionsCount = detectedWords.Count,
                Type = ErrorType.DetectionDefect,
                DetectionQuality =
                    detectedWords.Any()
                        ? _getQualityMeasure(detectedWords.Count, essentialSurpluses.Count, sampleWords.Count)
                        : 0d
            };
        }
Пример #2
0
        private static LinguisticObjectDetectionMetrics _metricsForMissedObject(LinguisticObjectMatch objectMatch,
            Sentence sampleSentence)
        {
            var sampleWords = objectMatch.SampleObjects.Words;

            var essentialMises = sampleWords.Where(word => !word.IsAuxilary).ToList();
            var essentialMissTexts = essentialMises.Select(word => word.Text).ToList();
            var essentialMissIndexes = essentialMises.Select(sampleSentence.WordIndex).ToList();

            var nonEssentialMises = sampleWords.Where(word => word.IsAuxilary).ToList();
            var nonEssentialMissTexts = nonEssentialMises.Select(word => word.Text).ToList();
            var nonEssentialMissIndexes = nonEssentialMises.Select(sampleSentence.WordIndex).ToList();

            return new LinguisticObjectDetectionMetrics
            {
                SampleWords = sampleWords.Select(word => word.Text).ToArray(),
                TargetWords = new string[] { },
                SampleWordsCount = sampleWords.Count,
                SampleWordIndexes = sampleWords.Select(sampleSentence.WordIndex).ToArray(),
                TargetWordsCount = 0,
                TargetWordIndexes = new int[] { },
                NonEssentialErrors = nonEssentialMissTexts.ToArray(),
                NonEssentialErrorIndexes = nonEssentialMissIndexes.ToArray(),
                EssentialErrors = essentialMissTexts.ToArray(),
                EssentialErrorIndexes = essentialMissIndexes.ToArray(),
                SurplusNonEssentialWords = new string[] { },
                SurplusNonEssentialWordIndexes = new int[] { },
                MissedNonEssentialWords = nonEssentialMissTexts.ToArray(),
                MissedNonEssentialWordIndexes = nonEssentialMissIndexes.ToArray(),
                SurplusEssentialWords = new string[] { },
                SurplusEssentialWordIndexes = new int[] { },
                MissedEssentialWords = essentialMissTexts.ToArray(),
                MissedEssentialWordIndexes = essentialMissIndexes.ToArray(),
                CorrectDetectionsCount = 0,
                DetectionQuality = 0d,
                Type = ErrorType.MissedLinguisticObject
            };
        }
Пример #3
0
        private static LinguisticObjectDetectionMetrics _metricsForSurplusTargetObject(LinguisticObjectMatch objectMatch, Sentence sampleSentence, Sentence targetSentence, LinguisticObjectType type)
        {
            var magicNumber = _getArageWordsCountInObjectsOfType(sampleSentence, type);

            var targetWords = objectMatch.TargetObjects.Words;

            var essentialSurpluses = targetWords.Where(word => !word.IsAuxilary).ToList();
            var essentialSurplusText = essentialSurpluses.Select(word => word.Text).ToList();
            var essentialSurplusIndexes = essentialSurpluses.Select(targetSentence.WordIndex).ToList();

            var nonEssentialSurpluses = targetWords.Where(word => word.IsAuxilary).ToList();
            var nonEssentialSurplusTexts = nonEssentialSurpluses.Select(word => word.Text).ToList();
            var nonEssentialSurplusIndexes = nonEssentialSurpluses.Select(targetSentence.WordIndex).ToList();

            return new LinguisticObjectDetectionMetrics
            {
                SampleWords = new string[]{},
                TargetWords = targetWords.Select(word => word.Text).ToArray(),
                SampleWordsCount = 0,
                SampleWordIndexes = new int[]{},
                TargetWordsCount = targetWords.Count,
                TargetWordIndexes = targetWords.Select(targetSentence.WordIndex).ToArray(),
                NonEssentialErrors = nonEssentialSurplusTexts.ToArray(),
                NonEssentialErrorIndexes = nonEssentialSurplusIndexes.ToArray(),
                EssentialErrors = essentialSurplusText.ToArray(),
                EssentialErrorIndexes = essentialSurplusIndexes.ToArray(),
                SurplusNonEssentialWords = nonEssentialSurplusTexts.ToArray(),
                SurplusNonEssentialWordIndexes = nonEssentialSurplusIndexes.ToArray(),
                MissedNonEssentialWords = new string[] { },
                MissedNonEssentialWordIndexes = new int[] { },
                SurplusEssentialWords = essentialSurplusText.ToArray(),
                SurplusEssentialWordIndexes = essentialSurplusIndexes.ToArray(),
                MissedEssentialWords = new string[] { },
                MissedEssentialWordIndexes = new int[] { },
                CorrectDetectionsCount = 0,
                DetectionQuality = magicNumber/(magicNumber + 0.5*essentialSurpluses.Count),
                Type = ErrorType.SurplusLinguisticObject
            };
        }
Пример #4
0
 private void _processSubject(LinguisticObjectMatch subjectsMatch)
 {
     _processSimpleObjectMatch(subjectsMatch, stats => stats.Subjects.Objects, LinguisticObjectType.Subject);
 }
Пример #5
0
 private void _processUniforms(LinguisticObjectMatch uniformsMatch)
 {
     _processSimpleObjectMatch(uniformsMatch, stats => stats.Uniforms.Objects, LinguisticObjectType.Uniform);
 }
Пример #6
0
        private void _processSimpleObjectMatch(LinguisticObjectMatch match, Func<SimpleSentenceStats, List<LinguisticObject>> statsGroupSelector, LinguisticObjectType objectType)
        {
            var senteceId = _getSenteceId(match);

            if(senteceId == -1) return;

            var simpleSentenceStats = _simpleSentencesStats[senteceId];

            statsGroupSelector(simpleSentenceStats).Add(new LinguisticObject
            {
                MappedObjects = match,
                Metrics = LoMatchMetricsCalculator.MetricsFor(match, _target, _sample, objectType)
            });
        }
Пример #7
0
 private void _processPredicate(LinguisticObjectMatch predicateMatch)
 {
     _processSimpleObjectMatch(predicateMatch, stats => stats.Predicates.Objects, LinguisticObjectType.Predicate);
 }
Пример #8
0
 private void _processMeaningPart(LinguisticObjectMatch meaningPartMatch)
 {
     _processSimpleObjectMatch(meaningPartMatch, stats => stats.MeaningAuxParts.Objects, LinguisticObjectType.MeaningPart);
 }
Пример #9
0
 private int _getSenteceId(LinguisticObjectMatch match)
 {
     return match.SampleObjects != null ? _getSenteceId(match.SampleObjects, _sample) : _getSenteceId(match.TargetObjects, _target);
 }
Пример #10
0
        private static LinguisticObjectMatch _processBestMatch(List<SimplePartsSimilarity> similarityTable, SimplePartsSimilarity bestMatch)
        {
            var result = new LinguisticObjectMatch
            {
                SampleObjects = bestMatch.Sample,
                TargetObjects = bestMatch.Target
            };

            similarityTable.Remove(bestMatch);

            similarityTable.Where(similarity => similarity.Sample == bestMatch.Sample).ForEach(similarity =>
            {
                similarity.Sample = null;
                similarity.Value = 0;
            });

            similarityTable.Where(similarity => similarity.Target == bestMatch.Target).ForEach(similarity =>
            {
                similarity.Target = null;
                similarity.Value = 0;
            });

            similarityTable.RemoveAll(similarity => similarity.Sample == null && similarity.Target == null);

            return result;
        }