private static LoSummaryStats _emulatedStatsForLObjects(SentencesPack pack, Func<SimpleSentenceStats, int> statsLoCounter, Func<SimpleSentenceStats, double> sentenceLoAverageQuality, int sentecesCount, IEnumerable<ClasterizedSentenceError> packClasterizedErrors, int correctionClass, LinguisticObjectType loType) { var simpleSentencesStats = pack.ComparisonResults.SelectMany(result => result.SimpleSentenceStats) .Select(stats => new {Stats = stats, LoCount = statsLoCounter(stats)}) .ToList(); var totalLoCount = simpleSentencesStats.Aggregate(0, (i, stats) => i + stats.LoCount); var normalAverageTotalQuality = simpleSentencesStats.Aggregate(0d, (statsQualitySum, stats) => statsQualitySum + (stats.LoCount != 0 ? sentenceLoAverageQuality(stats.Stats) : 0)); var normalAverageQuality = normalAverageTotalQuality/totalLoCount; var defectsShare = 1d - normalAverageQuality; var emulationEffect = _calcEmulationEffect(packClasterizedErrors, correctionClass, loType); return new LoSummaryStats { AverageCount = ((double) totalLoCount)/sentecesCount, AverageQuality = 1d - (defectsShare*(1d - emulationEffect)), WasEmulated = Math.Abs(emulationEffect) > 0.05 }; }
private static PackSummary _emulatedSummaryFor(SentencesPack sentencesPack, IEnumerable<ClasterizedSentenceError> packClasterizedErrors, int correctionClass) { return _summaryFor(sentencesPack, (pack, sentenceLoCounter, sentenceLoAverageQuality, sentecesCount, loType) => _emulatedStatsForLObjects(pack, sentenceLoCounter, sentenceLoAverageQuality, sentecesCount, packClasterizedErrors, correctionClass, loType)); }
public static SentencesPack CorrectErrors(SentencesPack originalPack, IEnumerable<IGrouping<int, ClasterizedSentenceError>> analyzedErrors, int correctionClass) { var errorsBySentenceId = analyzedErrors != null ? analyzedErrors.ToDictionary(errors => errors.Key) : new Dictionary<int, IGrouping<int, ClasterizedSentenceError>>(); var resultPack = new SentencesPack { Sample = originalPack.Sample, Target = originalPack.Target, ComparisonResults = originalPack.ComparisonResults.Select( (result, i) => errorsBySentenceId.ContainsKey(i + 1) ? _correctErrors(result, errorsBySentenceId[i + 1], correctionClass) : result).ToList() }; return resultPack; }
private static List<string> _runComparison(SentencesPack sentencesPack) { List<string> errors; sentencesPack.ComparisonResults = Core.Compare(sentencesPack.Target, sentencesPack.Sample, out errors); return errors; }
private static void _exportSimpleSentences(SentencesPack dataPack, string resultsDir, string s) { DataExporter.WriteSSToFile(dataPack.ComparisonResults, Path.Combine(resultsDir, s)); }
private static void _exportPackSimpleSentencesSummary(SentencesPack dataPack, PackSummary summary, string resultsDir, string fileName) { DataExporter.WritePackSimpleSentencesSummary(dataPack.ComparisonResults, summary, Path.Combine(resultsDir, fileName)); }
private static void _exportPackData(SentencesPack sentencesPack, string resultsDir, string fileName) { DataExporter.WriteToFile(sentencesPack.ComparisonResults, Path.Combine(resultsDir, fileName)); }
public static List<SentenceErrors> Process(SentencesPack sentences) { return sentences.ComparisonResults.SelectMany(_getErrors).ToList(); }
private static LoSummaryStats _statsForLObjects(SentencesPack pack, Func<SimpleSentenceStats, int> statsLoCounter, Func<SimpleSentenceStats, double> sentenceLoAverageQuality, int sentecesCount, LinguisticObjectType loType) { var simpleSentencesStats = pack.ComparisonResults.SelectMany(result => result.SimpleSentenceStats) .Select(stats => new {Stats = stats, LoCount = statsLoCounter(stats)}) .ToList(); var totalLoCount = simpleSentencesStats.Aggregate(0, (i, stats) => i + stats.LoCount); //var totalQuality = simpleSentencesStats.Aggregate(0d, // (statsQualitySum, stats) => statsQualitySum + (stats.LoCount != 0 ? sentenceLoAverageQuality(stats.Stats) : 0)); var totalQuality = simpleSentencesStats.Aggregate(0d, (statsQualitySum, stats) => statsQualitySum + (stats.LoCount != 0 ? stats.LoCount* sentenceLoAverageQuality(stats.Stats) : 0)); return new LoSummaryStats { AverageCount = ((double) totalLoCount)/sentecesCount, //AverageQuality = totalQuality / simpleSentencesStats.Count(stats => stats.LoCount != 0) AverageQuality = totalQuality / totalLoCount }; }
private static PackSummary _summaryFor(SentencesPack pack, Func<SentencesPack, Func<SimpleSentenceStats, int>, Func<SimpleSentenceStats, double>, int, LinguisticObjectType, LoSummaryStats> statsForLObjects) { var result = new PackSummary { SentencesCount = pack.Sample.Count, Stats = new Dictionary<LinguisticObjectType, LoSummaryStats>() }; var simpleSentences = pack.ComparisonResults.SelectMany(comparisonResult => comparisonResult.SimpleSentenceStats).ToList(); var simpleSentecesCount = simpleSentences.Count(); result.Stats[LinguisticObjectType.Subject] = statsForLObjects(pack, stats => stats.Subjects.Objects.Count, stats => stats.Subjects.AverageDetectionQuality, simpleSentecesCount, LinguisticObjectType.Subject); result.Stats[LinguisticObjectType.Predicate] = statsForLObjects(pack, stats => stats.Predicates.Objects.Count, stats => stats.Predicates.AverageDetectionQuality, simpleSentecesCount, LinguisticObjectType.Predicate); result.Stats[LinguisticObjectType.Uniform] = statsForLObjects(pack, stats => stats.Uniforms.Objects.Count, stats => stats.Uniforms.AverageDetectionQuality, simpleSentecesCount, LinguisticObjectType.Uniform); result.Stats[LinguisticObjectType.MeaningPart] = statsForLObjects(pack, stats => stats.MeaningAuxParts.Objects.Count, stats => stats.MeaningAuxParts.AverageDetectionQuality, simpleSentecesCount, LinguisticObjectType.MeaningPart); result.Stats[LinguisticObjectType.ChainFromSubject] = statsForLObjects(pack, stats => stats.ChainsFromSubjects.Values.SelectMany(list => list).Count(), stats => stats.ChainsFromSubjects.AverageDetectionQuality, simpleSentecesCount, LinguisticObjectType.ChainFromSubject); result.Stats[LinguisticObjectType.ChainFromPredicate] = statsForLObjects(pack, stats => stats.ChainsFromPredicates.Values.SelectMany(list => list).Count(), stats => stats.ChainsFromPredicates.AverageDetectionQuality, simpleSentecesCount, LinguisticObjectType.ChainFromPredicate); result.Stats[LinguisticObjectType.ChainFromMeaningPart] = statsForLObjects(pack, stats => stats.ChainsFromMeaningParts.Values.SelectMany(list => list).Count(), stats => stats.ChainsFromMeaningParts.AverageDetectionQuality, simpleSentecesCount, LinguisticObjectType.ChainFromMeaningPart); result.Stats[LinguisticObjectType.SimpleSentence] = _statsForSimpleSentences(pack, simpleSentecesCount); result.AverageLoCount = simpleSentences.Aggregate(0d, (sampleObjectsCount, stats) => sampleObjectsCount + stats.AllObjects.Count)/ simpleSentecesCount; //result.AverageSentenceQuality = simpleSentences.Aggregate(0d, // (d, stats) => d + stats.AverageDetectionQuality) / simpleSentecesCount; result.AverageSentenceQuality = _calcAverageAverageQuality(result.Stats); return result; }
private static PackSummary _summaryFor(SentencesPack pack) { return _summaryFor(pack, _statsForLObjects); }
private static LoSummaryStats _statsForSimpleSentences(SentencesPack pack, int simpleSentecesCount) { var sampleSimpleSentences = pack.ComparisonResults.SelectMany(result => result.SimpleSentecesComparison.Objects) .Where(o => o.MappedObjects.SampleObjects != null) .ToList(); return new LoSummaryStats { AverageCount = ((double) simpleSentecesCount)/pack.Sample.Count, AverageQuality = sampleSimpleSentences.Aggregate(0d, (d, o) => d + o.Metrics.DetectionQuality) / sampleSimpleSentences.Count }; }