private static LoSummaryStats _emulatedStatsForLObjects(SentencesPack pack, Func<SimpleSentenceStats, int> statsLoCounter, Func<SimpleSentenceStats, double> sentenceLoAverageQuality, int sentecesCount, IEnumerable<ClasterizedSentenceError> packClasterizedErrors, int correctionClass, LinguisticObjectType loType) { var simpleSentencesStats = pack.ComparisonResults.SelectMany(result => result.SimpleSentenceStats) .Select(stats => new {Stats = stats, LoCount = statsLoCounter(stats)}) .ToList(); var totalLoCount = simpleSentencesStats.Aggregate(0, (i, stats) => i + stats.LoCount); var normalAverageTotalQuality = simpleSentencesStats.Aggregate(0d, (statsQualitySum, stats) => statsQualitySum + (stats.LoCount != 0 ? sentenceLoAverageQuality(stats.Stats) : 0)); var normalAverageQuality = normalAverageTotalQuality/totalLoCount; var defectsShare = 1d - normalAverageQuality; var emulationEffect = _calcEmulationEffect(packClasterizedErrors, correctionClass, loType); return new LoSummaryStats { AverageCount = ((double) totalLoCount)/sentecesCount, AverageQuality = 1d - (defectsShare*(1d - emulationEffect)), WasEmulated = Math.Abs(emulationEffect) > 0.05 }; }
private static double _calcEmulationEffect( IEnumerable<ClasterizedSentenceError> packClasterizedErrors, int correctionClass, LinguisticObjectType loType) { var loErrors = packClasterizedErrors.Where(error => error.ObjectType == loType).ToList(); return loErrors.Count != 0 ? ((double)loErrors.Count(error => error.ErrorClass <= correctionClass)) / loErrors.Count : 0d; }
public static LinguisticObjectDetectionMetrics MetricsFor(LinguisticObjectMatch objectMatch, Sentence targetSentence, Sentence sampleSentence, LinguisticObjectType objectType) { if (objectMatch.TargetObjects == null) return _metricsForMissedObject(objectMatch, sampleSentence); if (objectMatch.SampleObjects == null) return _metricsForSurplusTargetObject(objectMatch, sampleSentence, targetSentence, objectType); var targetWords = objectMatch.TargetObjects.Words; var sampleWords = objectMatch.SampleObjects.Words; var detectedWords = _getDetectedWords(targetWords, sampleWords); var surplusWords = targetWords.Where(word => !detectedWords.Contains(word)).ToList(); var missedWords = sampleWords.Where(word => targetWords.All(targetWord => targetWord.Text != word.Text)).ToList(); var essentialSurpluses = surplusWords.Where(word => !word.IsAuxilary).ToList(); var essentialSurplusText = essentialSurpluses.Select(word => word.Text).ToList(); var essentialSurplusIndexes = essentialSurpluses.Select(targetSentence.WordIndex).ToList(); var nonEssentialSurpluses = surplusWords.Where(word => word.IsAuxilary).ToList(); var nonEssentialSurplusTexts = nonEssentialSurpluses.Select(word => word.Text).ToList(); var nonEssentialSurplusIndexes = nonEssentialSurpluses.Select(targetSentence.WordIndex).ToList(); var essentialMises = missedWords.Where(word => !word.IsAuxilary).ToList(); var essentialMissTexts = essentialMises.Select(word => word.Text).ToList(); var essentialMissIndexes = essentialMises.Select(sampleSentence.WordIndex).ToList(); var nonEssentialMises = missedWords.Where(word => word.IsAuxilary).ToList(); var nonEssentialMissTexts = nonEssentialMises.Select(word => word.Text).ToList(); var nonEssentialMissIndexes = nonEssentialMises.Select(sampleSentence.WordIndex).ToList(); return new LinguisticObjectDetectionMetrics { SampleWords = sampleWords.Select(word => word.Text).ToArray(), TargetWords = targetWords.Select(word => word.Text).ToArray(), SampleWordsCount = sampleWords.Count, SampleWordIndexes = sampleWords.Select(sampleSentence.WordIndex).ToArray(), TargetWordsCount = targetWords.Count, TargetWordIndexes = targetWords.Select(targetSentence.WordIndex).ToArray(), NonEssentialErrors = nonEssentialSurplusTexts.Concat(nonEssentialMissTexts).ToArray(), NonEssentialErrorIndexes = nonEssentialSurplusIndexes.Concat(nonEssentialMissIndexes).ToArray(), EssentialErrors = essentialSurplusText.Concat(essentialMissTexts).ToArray(), EssentialErrorIndexes = essentialSurplusIndexes.Concat(essentialMissIndexes).ToArray(), SurplusNonEssentialWords = nonEssentialSurplusTexts.ToArray(), SurplusNonEssentialWordIndexes = nonEssentialSurplusIndexes.ToArray(), MissedNonEssentialWords = nonEssentialMissTexts.ToArray(), MissedNonEssentialWordIndexes = nonEssentialMissIndexes.ToArray(), SurplusEssentialWords = essentialSurplusText.ToArray(), SurplusEssentialWordIndexes = essentialSurplusIndexes.ToArray(), MissedEssentialWords = essentialMissTexts.ToArray(), MissedEssentialWordIndexes = essentialMissIndexes.ToArray(), CorrectDetectionsCount = detectedWords.Count, Type = ErrorType.DetectionDefect, DetectionQuality = detectedWords.Any() ? _getQualityMeasure(detectedWords.Count, essentialSurpluses.Count, sampleWords.Count) : 0d }; }
private Color? _getLoColor(LinguisticObjectType type) { switch (type) { case LinguisticObjectType.Subject: return Colors.SubjectHeader; case LinguisticObjectType.Predicate: return Colors.PredicateHeader; case LinguisticObjectType.Uniform: return Colors.UniformsHeader; case LinguisticObjectType.MeaningPart: return Colors.MeaningHeader; case LinguisticObjectType.ChainFromSubject: return Colors.SubjectHeader; case LinguisticObjectType.ChainFromPredicate: return Colors.PredicateHeader; case LinguisticObjectType.ChainFromMeaningPart: return Colors.MeaningHeader; default: return null; } }
private void _writeSecontHeaderRow(LinguisticObjectType type, int objectsCount) { XlHelper.WriteValue(""); XlHelper.WriteValue(Labels.LoName); var prefix = _getLoPrefix(type); var color = _getLoColor(type); for (var i = 0; i < objectsCount; ++i) { XlHelper.WriteValue(prefix + i, color); } }
private void _writeHeader(Sentence sentence, LinguisticObjectType type, int sentenceId, int objectsCount) { XlHelper.WriteRow(() => { XlHelper.WriteValue(Labels.SampleSentence); XlHelper.WriteValue(sentence.Text); }); XlHelper.WriteRow(() => _writeFirstHeaderRow(type, objectsCount)); XlHelper.WriteRow(() => _writeSecontHeaderRow(type, objectsCount)); XlHelper.WriteRow(() => { XlHelper.WriteValue(sentenceId + 1, Colors.SummaryRow); XlHelper.WriteValue(Labels.Summary.Evaluation, Colors.SummaryRow); for (int i = 0; i < objectsCount; ++i) { XlHelper.WriteValue("", Colors.SummaryRow); } }); }
private void _writeFirstHeaderRow(LinguisticObjectType type, int objectsCount) { XlHelper.WriteValue(""); XlHelper.WriteValue(Labels.LoNum); var loNum = _getLoNum(type); for (var i = 0; i < objectsCount; ++i) { XlHelper.WriteValue(loNum); } }
private void _write(IGrouping<int, SentenceErrors> errorsGroup, LinguisticObjectType type) { //if (errorsGroup.Count() != 1) // throw new Exception("для одного предложения объектов ошибок одного типа должно быть ровно 1"); XlHelper.AddMargin(); var group = errorsGroup.First(errors => errors.ErrorObjects.ContainsKey(type)); var errorObjects = group.ErrorObjects[type]; _writeHeader(group.Sentence, type, errorsGroup.Key, errorObjects.Count()); _writeBody(errorObjects); }
private void _write(List<SentenceErrors> data, LinguisticObjectType type) { XlHelper.SetActiveSheet((int)type, _getLoPrefix(type)); var errorsBySentences = data.GroupBy(errors => errors.SentenceId).ToList(); var markerId = errorsBySentences.Count(errors => errors.Any(sentenceErrors => sentenceErrors.ErrorObjects.ContainsKey(type))) / 5; var index = 0; errorsBySentences.ForEach(errors => { if (errors.All(errorsGroup => !errorsGroup.ErrorObjects.ContainsKey(type))) return; _write(errors, type); if (index <= markerId && index + 1 > markerId) { _writeMarker(); } index++; }); }
private string _getLoPrefix(LinguisticObjectType type) { switch (type) { case LinguisticObjectType.Subject: return Labels.LoLableSubject; case LinguisticObjectType.Predicate: return Labels.LoLablePredicate; case LinguisticObjectType.Uniform: return Labels.LoLableUniform; case LinguisticObjectType.MeaningPart: return Labels.LoLableMeaning; case LinguisticObjectType.ChainFromSubject: return Labels.LoLableChainPrefixSubject; case LinguisticObjectType.ChainFromPredicate: return Labels.LoLableChainPrefixPredicate; case LinguisticObjectType.ChainFromMeaningPart: return Labels.LoLableChainPrefixMeaning; case LinguisticObjectType.SimpleSentence: return Labels.LoLableSimpleSentence; default: return "Unknown"; } }
private Color? _averageQColor(List<PackSummary> data, LinguisticObjectType loType) { return data.Select(summary => summary.Stats[loType]).Any(stats => stats.WasEmulated) ? Colors.EmulatedQuality : Colors.SummaryRow; }
private static LinguisticObjectDetectionMetrics _metricsForSurplusTargetObject(LinguisticObjectMatch objectMatch, Sentence sampleSentence, Sentence targetSentence, LinguisticObjectType type) { var magicNumber = _getArageWordsCountInObjectsOfType(sampleSentence, type); var targetWords = objectMatch.TargetObjects.Words; var essentialSurpluses = targetWords.Where(word => !word.IsAuxilary).ToList(); var essentialSurplusText = essentialSurpluses.Select(word => word.Text).ToList(); var essentialSurplusIndexes = essentialSurpluses.Select(targetSentence.WordIndex).ToList(); var nonEssentialSurpluses = targetWords.Where(word => word.IsAuxilary).ToList(); var nonEssentialSurplusTexts = nonEssentialSurpluses.Select(word => word.Text).ToList(); var nonEssentialSurplusIndexes = nonEssentialSurpluses.Select(targetSentence.WordIndex).ToList(); return new LinguisticObjectDetectionMetrics { SampleWords = new string[]{}, TargetWords = targetWords.Select(word => word.Text).ToArray(), SampleWordsCount = 0, SampleWordIndexes = new int[]{}, TargetWordsCount = targetWords.Count, TargetWordIndexes = targetWords.Select(targetSentence.WordIndex).ToArray(), NonEssentialErrors = nonEssentialSurplusTexts.ToArray(), NonEssentialErrorIndexes = nonEssentialSurplusIndexes.ToArray(), EssentialErrors = essentialSurplusText.ToArray(), EssentialErrorIndexes = essentialSurplusIndexes.ToArray(), SurplusNonEssentialWords = nonEssentialSurplusTexts.ToArray(), SurplusNonEssentialWordIndexes = nonEssentialSurplusIndexes.ToArray(), MissedNonEssentialWords = new string[] { }, MissedNonEssentialWordIndexes = new int[] { }, SurplusEssentialWords = essentialSurplusText.ToArray(), SurplusEssentialWordIndexes = essentialSurplusIndexes.ToArray(), MissedEssentialWords = new string[] { }, MissedEssentialWordIndexes = new int[] { }, CorrectDetectionsCount = 0, DetectionQuality = magicNumber/(magicNumber + 0.5*essentialSurpluses.Count), Type = ErrorType.SurplusLinguisticObject }; }
private static double _getArageWordsCountInObjectsOfType(Sentence sampleSentence, LinguisticObjectType type) { switch (type) { case LinguisticObjectType.Subject: return _countAverageWordsInLo(sampleSentence.Subjects); case LinguisticObjectType.Predicate: return _countAverageWordsInLo(sampleSentence.Predicates); case LinguisticObjectType.Uniform: return _countAverageWordsInLo(sampleSentence.Uniforms); case LinguisticObjectType.MeaningPart: return _countAverageWordsInLo(sampleSentence.ValuableAuxParts); case LinguisticObjectType.ChainFromSubject: return _countAverageWordsInLo(sampleSentence.LinkedChains, SentenceWordType.Subject); case LinguisticObjectType.ChainFromPredicate: return _countAverageWordsInLo(sampleSentence.LinkedChains, SentenceWordType.Predicate); case LinguisticObjectType.ChainFromMeaningPart: return _countAverageWordsInLo(sampleSentence.LinkedChains, SentenceWordType.MeaningPart); case LinguisticObjectType.SimpleSentence: return _countAverageWordsInLo( sampleSentence.SimpleSentences.Select(sentence => sentence as SentenceWordChain).ToList()); default: throw new Exception("Неизвестный тип ЛО"); } }
private void _processSimpleObjectMatch(LinguisticObjectMatch match, Func<SimpleSentenceStats, List<LinguisticObject>> statsGroupSelector, LinguisticObjectType objectType) { var senteceId = _getSenteceId(match); if(senteceId == -1) return; var simpleSentenceStats = _simpleSentencesStats[senteceId]; statsGroupSelector(simpleSentenceStats).Add(new LinguisticObject { MappedObjects = match, Metrics = LoMatchMetricsCalculator.MetricsFor(match, _target, _sample, objectType) }); }
private double _getMeanAverageQuality(List<PackSummary> data, LinguisticObjectType type, int totalSentencesCount) { return data.Aggregate(0d, (d, summary) => d + summary.Stats[type].AverageQuality*summary.SentencesCount)/ totalSentencesCount; }
private static LoSummaryStats _statsForLObjects(SentencesPack pack, Func<SimpleSentenceStats, int> statsLoCounter, Func<SimpleSentenceStats, double> sentenceLoAverageQuality, int sentecesCount, LinguisticObjectType loType) { var simpleSentencesStats = pack.ComparisonResults.SelectMany(result => result.SimpleSentenceStats) .Select(stats => new {Stats = stats, LoCount = statsLoCounter(stats)}) .ToList(); var totalLoCount = simpleSentencesStats.Aggregate(0, (i, stats) => i + stats.LoCount); //var totalQuality = simpleSentencesStats.Aggregate(0d, // (statsQualitySum, stats) => statsQualitySum + (stats.LoCount != 0 ? sentenceLoAverageQuality(stats.Stats) : 0)); var totalQuality = simpleSentencesStats.Aggregate(0d, (statsQualitySum, stats) => statsQualitySum + (stats.LoCount != 0 ? stats.LoCount* sentenceLoAverageQuality(stats.Stats) : 0)); return new LoSummaryStats { AverageCount = ((double) totalLoCount)/sentecesCount, //AverageQuality = totalQuality / simpleSentencesStats.Count(stats => stats.LoCount != 0) AverageQuality = totalQuality / totalLoCount }; }
private string _getLoNum(LinguisticObjectType type) { switch (type) { case LinguisticObjectType.Subject: return "1"; case LinguisticObjectType.Predicate: return "2"; case LinguisticObjectType.Uniform: return "3"; case LinguisticObjectType.MeaningPart: return "4"; default: return "5"; } }
private void _writeStatsForType(Dictionary<LinguisticObjectType, LoSummaryStats> data, LinguisticObjectType type) { XlHelper.WriteValue(data[type].AverageCount, Colors.SummaryRow); XlHelper.WriteValue(data[type].AverageQuality, data[type].WasEmulated ? Colors.EmulatedQuality : Colors.SummaryRow, XlsxHelper.PercentageFormat); }