public static LinguisticObjectDetectionMetrics MetricsFor(LinguisticObjectMatch objectMatch, Sentence targetSentence, Sentence sampleSentence, LinguisticObjectType objectType) { if (objectMatch.TargetObjects == null) return _metricsForMissedObject(objectMatch, sampleSentence); if (objectMatch.SampleObjects == null) return _metricsForSurplusTargetObject(objectMatch, sampleSentence, targetSentence, objectType); var targetWords = objectMatch.TargetObjects.Words; var sampleWords = objectMatch.SampleObjects.Words; var detectedWords = _getDetectedWords(targetWords, sampleWords); var surplusWords = targetWords.Where(word => !detectedWords.Contains(word)).ToList(); var missedWords = sampleWords.Where(word => targetWords.All(targetWord => targetWord.Text != word.Text)).ToList(); var essentialSurpluses = surplusWords.Where(word => !word.IsAuxilary).ToList(); var essentialSurplusText = essentialSurpluses.Select(word => word.Text).ToList(); var essentialSurplusIndexes = essentialSurpluses.Select(targetSentence.WordIndex).ToList(); var nonEssentialSurpluses = surplusWords.Where(word => word.IsAuxilary).ToList(); var nonEssentialSurplusTexts = nonEssentialSurpluses.Select(word => word.Text).ToList(); var nonEssentialSurplusIndexes = nonEssentialSurpluses.Select(targetSentence.WordIndex).ToList(); var essentialMises = missedWords.Where(word => !word.IsAuxilary).ToList(); var essentialMissTexts = essentialMises.Select(word => word.Text).ToList(); var essentialMissIndexes = essentialMises.Select(sampleSentence.WordIndex).ToList(); var nonEssentialMises = missedWords.Where(word => word.IsAuxilary).ToList(); var nonEssentialMissTexts = nonEssentialMises.Select(word => word.Text).ToList(); var nonEssentialMissIndexes = nonEssentialMises.Select(sampleSentence.WordIndex).ToList(); return new LinguisticObjectDetectionMetrics { SampleWords = sampleWords.Select(word => word.Text).ToArray(), TargetWords = targetWords.Select(word => word.Text).ToArray(), SampleWordsCount = sampleWords.Count, SampleWordIndexes = sampleWords.Select(sampleSentence.WordIndex).ToArray(), TargetWordsCount = targetWords.Count, TargetWordIndexes = targetWords.Select(targetSentence.WordIndex).ToArray(), NonEssentialErrors = nonEssentialSurplusTexts.Concat(nonEssentialMissTexts).ToArray(), NonEssentialErrorIndexes = nonEssentialSurplusIndexes.Concat(nonEssentialMissIndexes).ToArray(), EssentialErrors = essentialSurplusText.Concat(essentialMissTexts).ToArray(), EssentialErrorIndexes = essentialSurplusIndexes.Concat(essentialMissIndexes).ToArray(), SurplusNonEssentialWords = nonEssentialSurplusTexts.ToArray(), SurplusNonEssentialWordIndexes = nonEssentialSurplusIndexes.ToArray(), MissedNonEssentialWords = nonEssentialMissTexts.ToArray(), MissedNonEssentialWordIndexes = nonEssentialMissIndexes.ToArray(), SurplusEssentialWords = essentialSurplusText.ToArray(), SurplusEssentialWordIndexes = essentialSurplusIndexes.ToArray(), MissedEssentialWords = essentialMissTexts.ToArray(), MissedEssentialWordIndexes = essentialMissIndexes.ToArray(), CorrectDetectionsCount = detectedWords.Count, Type = ErrorType.DetectionDefect, DetectionQuality = detectedWords.Any() ? _getQualityMeasure(detectedWords.Count, essentialSurpluses.Count, sampleWords.Count) : 0d }; }
private static LinguisticObjectDetectionMetrics _metricsForMissedObject(LinguisticObjectMatch objectMatch, Sentence sampleSentence) { var sampleWords = objectMatch.SampleObjects.Words; var essentialMises = sampleWords.Where(word => !word.IsAuxilary).ToList(); var essentialMissTexts = essentialMises.Select(word => word.Text).ToList(); var essentialMissIndexes = essentialMises.Select(sampleSentence.WordIndex).ToList(); var nonEssentialMises = sampleWords.Where(word => word.IsAuxilary).ToList(); var nonEssentialMissTexts = nonEssentialMises.Select(word => word.Text).ToList(); var nonEssentialMissIndexes = nonEssentialMises.Select(sampleSentence.WordIndex).ToList(); return new LinguisticObjectDetectionMetrics { SampleWords = sampleWords.Select(word => word.Text).ToArray(), TargetWords = new string[] { }, SampleWordsCount = sampleWords.Count, SampleWordIndexes = sampleWords.Select(sampleSentence.WordIndex).ToArray(), TargetWordsCount = 0, TargetWordIndexes = new int[] { }, NonEssentialErrors = nonEssentialMissTexts.ToArray(), NonEssentialErrorIndexes = nonEssentialMissIndexes.ToArray(), EssentialErrors = essentialMissTexts.ToArray(), EssentialErrorIndexes = essentialMissIndexes.ToArray(), SurplusNonEssentialWords = new string[] { }, SurplusNonEssentialWordIndexes = new int[] { }, MissedNonEssentialWords = nonEssentialMissTexts.ToArray(), MissedNonEssentialWordIndexes = nonEssentialMissIndexes.ToArray(), SurplusEssentialWords = new string[] { }, SurplusEssentialWordIndexes = new int[] { }, MissedEssentialWords = essentialMissTexts.ToArray(), MissedEssentialWordIndexes = essentialMissIndexes.ToArray(), CorrectDetectionsCount = 0, DetectionQuality = 0d, Type = ErrorType.MissedLinguisticObject }; }
private static LinguisticObjectDetectionMetrics _metricsForSurplusTargetObject(LinguisticObjectMatch objectMatch, Sentence sampleSentence, Sentence targetSentence, LinguisticObjectType type) { var magicNumber = _getArageWordsCountInObjectsOfType(sampleSentence, type); var targetWords = objectMatch.TargetObjects.Words; var essentialSurpluses = targetWords.Where(word => !word.IsAuxilary).ToList(); var essentialSurplusText = essentialSurpluses.Select(word => word.Text).ToList(); var essentialSurplusIndexes = essentialSurpluses.Select(targetSentence.WordIndex).ToList(); var nonEssentialSurpluses = targetWords.Where(word => word.IsAuxilary).ToList(); var nonEssentialSurplusTexts = nonEssentialSurpluses.Select(word => word.Text).ToList(); var nonEssentialSurplusIndexes = nonEssentialSurpluses.Select(targetSentence.WordIndex).ToList(); return new LinguisticObjectDetectionMetrics { SampleWords = new string[]{}, TargetWords = targetWords.Select(word => word.Text).ToArray(), SampleWordsCount = 0, SampleWordIndexes = new int[]{}, TargetWordsCount = targetWords.Count, TargetWordIndexes = targetWords.Select(targetSentence.WordIndex).ToArray(), NonEssentialErrors = nonEssentialSurplusTexts.ToArray(), NonEssentialErrorIndexes = nonEssentialSurplusIndexes.ToArray(), EssentialErrors = essentialSurplusText.ToArray(), EssentialErrorIndexes = essentialSurplusIndexes.ToArray(), SurplusNonEssentialWords = nonEssentialSurplusTexts.ToArray(), SurplusNonEssentialWordIndexes = nonEssentialSurplusIndexes.ToArray(), MissedNonEssentialWords = new string[] { }, MissedNonEssentialWordIndexes = new int[] { }, SurplusEssentialWords = essentialSurplusText.ToArray(), SurplusEssentialWordIndexes = essentialSurplusIndexes.ToArray(), MissedEssentialWords = new string[] { }, MissedEssentialWordIndexes = new int[] { }, CorrectDetectionsCount = 0, DetectionQuality = magicNumber/(magicNumber + 0.5*essentialSurpluses.Count), Type = ErrorType.SurplusLinguisticObject }; }
private void _processSubject(LinguisticObjectMatch subjectsMatch) { _processSimpleObjectMatch(subjectsMatch, stats => stats.Subjects.Objects, LinguisticObjectType.Subject); }
private void _processUniforms(LinguisticObjectMatch uniformsMatch) { _processSimpleObjectMatch(uniformsMatch, stats => stats.Uniforms.Objects, LinguisticObjectType.Uniform); }
private void _processSimpleObjectMatch(LinguisticObjectMatch match, Func<SimpleSentenceStats, List<LinguisticObject>> statsGroupSelector, LinguisticObjectType objectType) { var senteceId = _getSenteceId(match); if(senteceId == -1) return; var simpleSentenceStats = _simpleSentencesStats[senteceId]; statsGroupSelector(simpleSentenceStats).Add(new LinguisticObject { MappedObjects = match, Metrics = LoMatchMetricsCalculator.MetricsFor(match, _target, _sample, objectType) }); }
private void _processPredicate(LinguisticObjectMatch predicateMatch) { _processSimpleObjectMatch(predicateMatch, stats => stats.Predicates.Objects, LinguisticObjectType.Predicate); }
private void _processMeaningPart(LinguisticObjectMatch meaningPartMatch) { _processSimpleObjectMatch(meaningPartMatch, stats => stats.MeaningAuxParts.Objects, LinguisticObjectType.MeaningPart); }
private int _getSenteceId(LinguisticObjectMatch match) { return match.SampleObjects != null ? _getSenteceId(match.SampleObjects, _sample) : _getSenteceId(match.TargetObjects, _target); }
private static LinguisticObjectMatch _processBestMatch(List<SimplePartsSimilarity> similarityTable, SimplePartsSimilarity bestMatch) { var result = new LinguisticObjectMatch { SampleObjects = bestMatch.Sample, TargetObjects = bestMatch.Target }; similarityTable.Remove(bestMatch); similarityTable.Where(similarity => similarity.Sample == bestMatch.Sample).ForEach(similarity => { similarity.Sample = null; similarity.Value = 0; }); similarityTable.Where(similarity => similarity.Target == bestMatch.Target).ForEach(similarity => { similarity.Target = null; similarity.Value = 0; }); similarityTable.RemoveAll(similarity => similarity.Sample == null && similarity.Target == null); return result; }