public ArchiveDocumentType? FindApproximation(string line)
        {
            var item = new ApproximationItem(ArchiveDocumentType.Other, line);

            var foundProximity = 0.5;
            ApproximationItem foundItem = null;

            foreach (var approximationItem in Basis)
            {
                var proximity = item.FindProximity(approximationItem);
                if (proximity > foundProximity)
                {
                    foundProximity = proximity;
                    foundItem = approximationItem;
                }
            }

            return foundItem != null ? foundItem.Type : default(ArchiveDocumentType?);
        }
Пример #2
0
        public double FindProximity(ApproximationItem item)
        {
            var oneLetterOccurrenceCounter = 0;
            var oneLetterOccurrenceTotal = item.OneLetterOccurrence.Count;
            foreach (var occurrence in item.OneLetterOccurrence)
            {
                if (!OneLetterOccurrence.ContainsKey(occurrence.Key)) continue;
                var foundOccurrence =  OneLetterOccurrence.First(x => x.Key == occurrence.Key);
                var value = (foundOccurrence.Value <= occurrence.Value)
                    ? foundOccurrence.Value
                    : 2*occurrence.Value - foundOccurrence.Value;
                oneLetterOccurrenceCounter += value;
            }

            var twoLetterOccurrenceCounter = 0;
            var twoLetterOccurrenceTotal = item.TwoLetterOccurrence.Count;
            foreach (var occurrence in item.TwoLetterOccurrence)
            {
                if (!TwoLetterOccurrence.ContainsKey(occurrence.Key)) continue;
                var foundOccurrence = TwoLetterOccurrence.First(x => x.Key == occurrence.Key);
                var value = (foundOccurrence.Value <= occurrence.Value)
                    ? foundOccurrence.Value
                    : 2 * occurrence.Value - foundOccurrence.Value;
                twoLetterOccurrenceCounter += value;
            }

            var threeLetterOccurrenceCounter = 0;
            var threeLetterOccurrenceTotal = item.ThreeLetterOccurrence.Count;
            foreach (var occurrence in item.ThreeLetterOccurrence)
            {
                if (!ThreeLetterOccurrence.ContainsKey(occurrence.Key)) continue;
                var foundOccurrence = ThreeLetterOccurrence.First(x => x.Key == occurrence.Key);
                var value = (foundOccurrence.Value <= occurrence.Value)
                    ? foundOccurrence.Value
                    : 2 * occurrence.Value - foundOccurrence.Value;
                threeLetterOccurrenceCounter += value;
            }

            return (double)(oneLetterOccurrenceCounter + 2*twoLetterOccurrenceCounter + 3*threeLetterOccurrenceCounter)/
                (double)(oneLetterOccurrenceTotal + 2*twoLetterOccurrenceTotal + 3*threeLetterOccurrenceTotal);
        }