Ejemplo n.º 1
0
        private static LoSummaryStats _emulatedStatsForLObjects(SentencesPack pack,
            Func<SimpleSentenceStats, int> statsLoCounter, Func<SimpleSentenceStats, double> sentenceLoAverageQuality,
            int sentecesCount,
            IEnumerable<ClasterizedSentenceError> packClasterizedErrors,
            int correctionClass, LinguisticObjectType loType)
        {
            var simpleSentencesStats =
                pack.ComparisonResults.SelectMany(result => result.SimpleSentenceStats)
                    .Select(stats => new {Stats = stats, LoCount = statsLoCounter(stats)})
                    .ToList();

            var totalLoCount = simpleSentencesStats.Aggregate(0, (i, stats) => i + stats.LoCount);
            var normalAverageTotalQuality = simpleSentencesStats.Aggregate(0d,
                (statsQualitySum, stats) =>
                    statsQualitySum + (stats.LoCount != 0 ? sentenceLoAverageQuality(stats.Stats) : 0));

            var normalAverageQuality = normalAverageTotalQuality/totalLoCount;

            var defectsShare = 1d - normalAverageQuality;

            var emulationEffect = _calcEmulationEffect(packClasterizedErrors, correctionClass, loType);

            return new LoSummaryStats
            {
                AverageCount = ((double) totalLoCount)/sentecesCount,
                AverageQuality = 1d - (defectsShare*(1d - emulationEffect)),
                WasEmulated = Math.Abs(emulationEffect) > 0.05
            };
        }
Ejemplo n.º 2
0
        private static double _calcEmulationEffect(
            IEnumerable<ClasterizedSentenceError> packClasterizedErrors,
            int correctionClass, LinguisticObjectType loType)
        {
            var loErrors = packClasterizedErrors.Where(error => error.ObjectType == loType).ToList();

            return loErrors.Count != 0 ? ((double)loErrors.Count(error => error.ErrorClass <= correctionClass)) / loErrors.Count : 0d;
        }
Ejemplo n.º 3
0
        public static LinguisticObjectDetectionMetrics MetricsFor(LinguisticObjectMatch objectMatch,
            Sentence targetSentence, Sentence sampleSentence, LinguisticObjectType objectType)
        {
            if (objectMatch.TargetObjects == null) return _metricsForMissedObject(objectMatch, sampleSentence);
            if (objectMatch.SampleObjects == null)
                return _metricsForSurplusTargetObject(objectMatch, sampleSentence, targetSentence, objectType);

            var targetWords = objectMatch.TargetObjects.Words;
            var sampleWords = objectMatch.SampleObjects.Words;
            var detectedWords = _getDetectedWords(targetWords, sampleWords);
            var surplusWords = targetWords.Where(word => !detectedWords.Contains(word)).ToList();
            var missedWords =
                sampleWords.Where(word => targetWords.All(targetWord => targetWord.Text != word.Text)).ToList();

            var essentialSurpluses = surplusWords.Where(word => !word.IsAuxilary).ToList();
            var essentialSurplusText = essentialSurpluses.Select(word => word.Text).ToList();
            var essentialSurplusIndexes = essentialSurpluses.Select(targetSentence.WordIndex).ToList();

            var nonEssentialSurpluses = surplusWords.Where(word => word.IsAuxilary).ToList();
            var nonEssentialSurplusTexts = nonEssentialSurpluses.Select(word => word.Text).ToList();
            var nonEssentialSurplusIndexes = nonEssentialSurpluses.Select(targetSentence.WordIndex).ToList();

            var essentialMises = missedWords.Where(word => !word.IsAuxilary).ToList();
            var essentialMissTexts = essentialMises.Select(word => word.Text).ToList();
            var essentialMissIndexes = essentialMises.Select(sampleSentence.WordIndex).ToList();

            var nonEssentialMises = missedWords.Where(word => word.IsAuxilary).ToList();
            var nonEssentialMissTexts = nonEssentialMises.Select(word => word.Text).ToList();
            var nonEssentialMissIndexes = nonEssentialMises.Select(sampleSentence.WordIndex).ToList();

            return new LinguisticObjectDetectionMetrics
            {
                SampleWords = sampleWords.Select(word => word.Text).ToArray(),
                TargetWords = targetWords.Select(word => word.Text).ToArray(),
                SampleWordsCount = sampleWords.Count,
                SampleWordIndexes = sampleWords.Select(sampleSentence.WordIndex).ToArray(),
                TargetWordsCount = targetWords.Count,
                TargetWordIndexes = targetWords.Select(targetSentence.WordIndex).ToArray(),
                NonEssentialErrors = nonEssentialSurplusTexts.Concat(nonEssentialMissTexts).ToArray(),
                NonEssentialErrorIndexes = nonEssentialSurplusIndexes.Concat(nonEssentialMissIndexes).ToArray(),
                EssentialErrors = essentialSurplusText.Concat(essentialMissTexts).ToArray(),
                EssentialErrorIndexes = essentialSurplusIndexes.Concat(essentialMissIndexes).ToArray(),
                SurplusNonEssentialWords = nonEssentialSurplusTexts.ToArray(),
                SurplusNonEssentialWordIndexes = nonEssentialSurplusIndexes.ToArray(),
                MissedNonEssentialWords = nonEssentialMissTexts.ToArray(),
                MissedNonEssentialWordIndexes = nonEssentialMissIndexes.ToArray(),
                SurplusEssentialWords = essentialSurplusText.ToArray(),
                SurplusEssentialWordIndexes = essentialSurplusIndexes.ToArray(),
                MissedEssentialWords = essentialMissTexts.ToArray(),
                MissedEssentialWordIndexes = essentialMissIndexes.ToArray(),
                CorrectDetectionsCount = detectedWords.Count,
                Type = ErrorType.DetectionDefect,
                DetectionQuality =
                    detectedWords.Any()
                        ? _getQualityMeasure(detectedWords.Count, essentialSurpluses.Count, sampleWords.Count)
                        : 0d
            };
        }
Ejemplo n.º 4
0
 private Color? _getLoColor(LinguisticObjectType type)
 {
     switch (type)
     {
     case LinguisticObjectType.Subject:
         return Colors.SubjectHeader;
     case LinguisticObjectType.Predicate:
         return Colors.PredicateHeader;
     case LinguisticObjectType.Uniform:
         return Colors.UniformsHeader;
     case LinguisticObjectType.MeaningPart:
         return Colors.MeaningHeader;
     case LinguisticObjectType.ChainFromSubject:
         return Colors.SubjectHeader;
     case LinguisticObjectType.ChainFromPredicate:
         return Colors.PredicateHeader;
     case LinguisticObjectType.ChainFromMeaningPart:
         return Colors.MeaningHeader;
     default:
         return null;
     }
 }
Ejemplo n.º 5
0
        private void _writeSecontHeaderRow(LinguisticObjectType type, int objectsCount)
        {
            XlHelper.WriteValue("");
            XlHelper.WriteValue(Labels.LoName);

            var prefix = _getLoPrefix(type);
            var color = _getLoColor(type);

            for (var i = 0; i < objectsCount; ++i)
            {
                XlHelper.WriteValue(prefix + i, color);
            }
        }
Ejemplo n.º 6
0
        private void _writeHeader(Sentence sentence, LinguisticObjectType type, int sentenceId, int objectsCount)
        {
            XlHelper.WriteRow(() =>
            {
                XlHelper.WriteValue(Labels.SampleSentence);
                XlHelper.WriteValue(sentence.Text);
            });

            XlHelper.WriteRow(() => _writeFirstHeaderRow(type, objectsCount));
            XlHelper.WriteRow(() => _writeSecontHeaderRow(type, objectsCount));
            XlHelper.WriteRow(() =>
            {
                XlHelper.WriteValue(sentenceId + 1, Colors.SummaryRow);
                XlHelper.WriteValue(Labels.Summary.Evaluation, Colors.SummaryRow);
                for (int i = 0; i < objectsCount; ++i)
                {
                    XlHelper.WriteValue("", Colors.SummaryRow);
                }
            });
        }
Ejemplo n.º 7
0
        private void _writeFirstHeaderRow(LinguisticObjectType type, int objectsCount)
        {
            XlHelper.WriteValue("");
            XlHelper.WriteValue(Labels.LoNum);

            var loNum = _getLoNum(type);
            for (var i = 0; i < objectsCount; ++i)
            {
                XlHelper.WriteValue(loNum);
            }
        }
Ejemplo n.º 8
0
        private void _write(IGrouping<int, SentenceErrors> errorsGroup, LinguisticObjectType type)
        {
            //if (errorsGroup.Count() != 1)
            //    throw new Exception("для одного предложения объектов ошибок одного типа должно быть ровно 1");

            XlHelper.AddMargin();

            var group = errorsGroup.First(errors => errors.ErrorObjects.ContainsKey(type));

            var errorObjects = group.ErrorObjects[type];
            _writeHeader(group.Sentence, type, errorsGroup.Key, errorObjects.Count());
            _writeBody(errorObjects);
        }
Ejemplo n.º 9
0
        private void _write(List<SentenceErrors> data, LinguisticObjectType type)
        {
            XlHelper.SetActiveSheet((int)type, _getLoPrefix(type));

            var errorsBySentences = data.GroupBy(errors => errors.SentenceId).ToList();

            var markerId = errorsBySentences.Count(errors => errors.Any(sentenceErrors => sentenceErrors.ErrorObjects.ContainsKey(type))) / 5;

            var index = 0;

            errorsBySentences.ForEach(errors =>
            {
                if (errors.All(errorsGroup => !errorsGroup.ErrorObjects.ContainsKey(type))) return;

                _write(errors, type);
                if (index <= markerId && index + 1 > markerId)
                {
                    _writeMarker();
                }

                index++;
            });
        }
Ejemplo n.º 10
0
 private string _getLoPrefix(LinguisticObjectType type)
 {
     switch (type)
     {
         case LinguisticObjectType.Subject:
             return Labels.LoLableSubject;
         case LinguisticObjectType.Predicate:
             return Labels.LoLablePredicate;
         case LinguisticObjectType.Uniform:
             return Labels.LoLableUniform;
         case LinguisticObjectType.MeaningPart:
             return Labels.LoLableMeaning;
         case LinguisticObjectType.ChainFromSubject:
             return Labels.LoLableChainPrefixSubject;
         case LinguisticObjectType.ChainFromPredicate:
             return Labels.LoLableChainPrefixPredicate;
         case LinguisticObjectType.ChainFromMeaningPart:
             return Labels.LoLableChainPrefixMeaning;
         case LinguisticObjectType.SimpleSentence:
             return Labels.LoLableSimpleSentence;
         default:
             return "Unknown";
     }
 }
Ejemplo n.º 11
0
 private Color? _averageQColor(List<PackSummary> data, LinguisticObjectType loType)
 {
     return data.Select(summary => summary.Stats[loType]).Any(stats => stats.WasEmulated)
         ? Colors.EmulatedQuality
         : Colors.SummaryRow;
 }
Ejemplo n.º 12
0
        private static LinguisticObjectDetectionMetrics _metricsForSurplusTargetObject(LinguisticObjectMatch objectMatch, Sentence sampleSentence, Sentence targetSentence, LinguisticObjectType type)
        {
            var magicNumber = _getArageWordsCountInObjectsOfType(sampleSentence, type);

            var targetWords = objectMatch.TargetObjects.Words;

            var essentialSurpluses = targetWords.Where(word => !word.IsAuxilary).ToList();
            var essentialSurplusText = essentialSurpluses.Select(word => word.Text).ToList();
            var essentialSurplusIndexes = essentialSurpluses.Select(targetSentence.WordIndex).ToList();

            var nonEssentialSurpluses = targetWords.Where(word => word.IsAuxilary).ToList();
            var nonEssentialSurplusTexts = nonEssentialSurpluses.Select(word => word.Text).ToList();
            var nonEssentialSurplusIndexes = nonEssentialSurpluses.Select(targetSentence.WordIndex).ToList();

            return new LinguisticObjectDetectionMetrics
            {
                SampleWords = new string[]{},
                TargetWords = targetWords.Select(word => word.Text).ToArray(),
                SampleWordsCount = 0,
                SampleWordIndexes = new int[]{},
                TargetWordsCount = targetWords.Count,
                TargetWordIndexes = targetWords.Select(targetSentence.WordIndex).ToArray(),
                NonEssentialErrors = nonEssentialSurplusTexts.ToArray(),
                NonEssentialErrorIndexes = nonEssentialSurplusIndexes.ToArray(),
                EssentialErrors = essentialSurplusText.ToArray(),
                EssentialErrorIndexes = essentialSurplusIndexes.ToArray(),
                SurplusNonEssentialWords = nonEssentialSurplusTexts.ToArray(),
                SurplusNonEssentialWordIndexes = nonEssentialSurplusIndexes.ToArray(),
                MissedNonEssentialWords = new string[] { },
                MissedNonEssentialWordIndexes = new int[] { },
                SurplusEssentialWords = essentialSurplusText.ToArray(),
                SurplusEssentialWordIndexes = essentialSurplusIndexes.ToArray(),
                MissedEssentialWords = new string[] { },
                MissedEssentialWordIndexes = new int[] { },
                CorrectDetectionsCount = 0,
                DetectionQuality = magicNumber/(magicNumber + 0.5*essentialSurpluses.Count),
                Type = ErrorType.SurplusLinguisticObject
            };
        }
Ejemplo n.º 13
0
 private static double _getArageWordsCountInObjectsOfType(Sentence sampleSentence, LinguisticObjectType type)
 {
     switch (type)
     {
         case LinguisticObjectType.Subject:
             return _countAverageWordsInLo(sampleSentence.Subjects);
         case LinguisticObjectType.Predicate:
             return _countAverageWordsInLo(sampleSentence.Predicates);
         case LinguisticObjectType.Uniform:
             return _countAverageWordsInLo(sampleSentence.Uniforms);
         case LinguisticObjectType.MeaningPart:
             return _countAverageWordsInLo(sampleSentence.ValuableAuxParts);
         case LinguisticObjectType.ChainFromSubject:
             return _countAverageWordsInLo(sampleSentence.LinkedChains, SentenceWordType.Subject);
         case LinguisticObjectType.ChainFromPredicate:
             return _countAverageWordsInLo(sampleSentence.LinkedChains, SentenceWordType.Predicate);
         case LinguisticObjectType.ChainFromMeaningPart:
             return _countAverageWordsInLo(sampleSentence.LinkedChains, SentenceWordType.MeaningPart);
         case LinguisticObjectType.SimpleSentence:
             return
                 _countAverageWordsInLo(
                     sampleSentence.SimpleSentences.Select(sentence => sentence as SentenceWordChain).ToList());
         default:
             throw new Exception("Неизвестный тип ЛО");
     }
 }
Ejemplo n.º 14
0
        private void _processSimpleObjectMatch(LinguisticObjectMatch match, Func<SimpleSentenceStats, List<LinguisticObject>> statsGroupSelector, LinguisticObjectType objectType)
        {
            var senteceId = _getSenteceId(match);

            if(senteceId == -1) return;

            var simpleSentenceStats = _simpleSentencesStats[senteceId];

            statsGroupSelector(simpleSentenceStats).Add(new LinguisticObject
            {
                MappedObjects = match,
                Metrics = LoMatchMetricsCalculator.MetricsFor(match, _target, _sample, objectType)
            });
        }
Ejemplo n.º 15
0
 private double _getMeanAverageQuality(List<PackSummary> data, LinguisticObjectType type, int totalSentencesCount)
 {
     return data.Aggregate(0d, (d, summary) => d + summary.Stats[type].AverageQuality*summary.SentencesCount)/
            totalSentencesCount;
 }
Ejemplo n.º 16
0
        private static LoSummaryStats _statsForLObjects(SentencesPack pack, Func<SimpleSentenceStats, int> statsLoCounter,
            Func<SimpleSentenceStats, double> sentenceLoAverageQuality, int sentecesCount, LinguisticObjectType loType)
        {
            var simpleSentencesStats =
                pack.ComparisonResults.SelectMany(result => result.SimpleSentenceStats)
                    .Select(stats => new {Stats = stats, LoCount = statsLoCounter(stats)})
                    .ToList();

            var totalLoCount = simpleSentencesStats.Aggregate(0, (i, stats) => i + stats.LoCount);
            //var totalQuality = simpleSentencesStats.Aggregate(0d,
            //    (statsQualitySum, stats) => statsQualitySum + (stats.LoCount != 0 ? sentenceLoAverageQuality(stats.Stats) : 0));
            var totalQuality = simpleSentencesStats.Aggregate(0d,
                (statsQualitySum, stats) => statsQualitySum + (stats.LoCount != 0 ? stats.LoCount* sentenceLoAverageQuality(stats.Stats) : 0));

            return new LoSummaryStats
            {
                AverageCount = ((double) totalLoCount)/sentecesCount,
                //AverageQuality = totalQuality / simpleSentencesStats.Count(stats => stats.LoCount != 0)
                AverageQuality = totalQuality / totalLoCount
            };
        }
Ejemplo n.º 17
0
 private string _getLoNum(LinguisticObjectType type)
 {
     switch (type)
     {
         case LinguisticObjectType.Subject:
             return "1";
         case LinguisticObjectType.Predicate:
             return "2";
         case LinguisticObjectType.Uniform:
             return "3";
         case LinguisticObjectType.MeaningPart:
             return "4";
         default:
             return "5";
     }
 }
Ejemplo n.º 18
0
 private void _writeStatsForType(Dictionary<LinguisticObjectType, LoSummaryStats> data, LinguisticObjectType type)
 {
     XlHelper.WriteValue(data[type].AverageCount, Colors.SummaryRow);
     XlHelper.WriteValue(data[type].AverageQuality,
         data[type].WasEmulated ? Colors.EmulatedQuality : Colors.SummaryRow, XlsxHelper.PercentageFormat);
 }