Пример #1
0
 public AsciiLineStructure this[AsciiLineAnalysisOption separationStrategy]
 {
     get
     {
         return(_structureForSeparation[separationStrategy]);
     }
 }
Пример #2
0
		public AsciiLineStructure this[AsciiLineAnalysisOption separationStrategy]
		{
			get
			{
				return _structureForSeparation[separationStrategy];
			}
		}
Пример #3
0
        /// <summary>
        /// Evaluates the highest scored separation strategy, and stores the winning separation strategy in <see cref="_highestScoredLineAnalysisOption"/> and the corresponding line structure in <see cref="_highestScoredLineStructure"/>.
        /// </summary>
        private void EvaluateHighestScoredLineAnalysisOption()
        {
            // determine, which of the separation strategies results in the topmost total priority (product of number of lines and best line priority)
            double maxScore       = int.MinValue;
            var    maxScoredEntry = _lineAnalysisOptionsScoring.First();

            foreach (var entry in _lineAnalysisOptionsScoring)
            {
                double score = (double)entry.Value.NumberOfLines * entry.Value.LineStructure.LineStructureScoring;

                if (score > maxScore)
                {
                    maxScore       = score;
                    maxScoredEntry = entry;
                }
                else if (score == maxScore && entry.Value.NumberOfLines > maxScoredEntry.Value.NumberOfLines)
                {
                    maxScoredEntry = entry;
                }
            }
            _highestScoredLineAnalysisOption = maxScoredEntry.Key;
            _highestScoredLineStructure      = maxScoredEntry.Value.LineStructure;
        }
Пример #4
0
        /// <summary>
        /// Determines, which lines are the most fr
        /// </summary>
        /// <param name="analysisOption"></param>
        /// <param name="result"></param>
        /// <param name="excludeLineStructureHashes"></param>
        /// <param name="maxNumberOfEqualLines"></param>
        /// <param name="bestLine"></param>
        public static void CalculateScoreOfLineAnalysisOption(AsciiLineAnalysisOption analysisOption, IList <AsciiLineAnalysis> result, HashSet <int> excludeLineStructureHashes, out int maxNumberOfEqualLines, out AsciiLineStructure bestLine)
        {
            // Dictionary, Key is the hash of the line structure hash, Value is the number of lines that have this hash
            var numberOfLinesForLineStructureHash = new Dictionary <int, int>();

            bestLine = null;
            for (int i = 0; i < result.Count; i++)
            {
                AsciiLineAnalysis lineResults = result[i];
                int lineStructureHash         = lineResults[analysisOption].GetHashCode(); // and hash code
                if (numberOfLinesForLineStructureHash.ContainsKey(lineStructureHash))
                {
                    numberOfLinesForLineStructureHash[lineStructureHash] = 1 + numberOfLinesForLineStructureHash[lineStructureHash];
                }
                else
                {
                    numberOfLinesForLineStructureHash.Add(lineStructureHash, 1);
                }
            }

            // determine, which of the line structures is the most frequent one
            maxNumberOfEqualLines = 0;
            int hashOfMostFrequentStructure = 0;

            foreach (var dictEntry in numberOfLinesForLineStructureHash)
            {
                int lineStructureHash = dictEntry.Key;

                if (null != excludeLineStructureHashes && excludeLineStructureHashes.Contains(lineStructureHash))
                {
                    continue;
                }

                int numberOfLines = dictEntry.Value;
                if (maxNumberOfEqualLines < numberOfLines)
                {
                    maxNumberOfEqualLines       = numberOfLines;
                    hashOfMostFrequentStructure = lineStructureHash;
                }
            } // for each

            // search for the maximum priority of those lines with the most frequent structure
            int maxPriorityOfMostFrequentLines = 0;

            for (int i = 0; i < result.Count; i++)
            {
                AsciiLineAnalysis lineResults = result[i];
                if (hashOfMostFrequentStructure == lineResults[analysisOption].GetHashCode())
                {
                    int prty = lineResults[analysisOption].LineStructureScoring;
                    if (prty >= maxPriorityOfMostFrequentLines)
                    {
                        maxPriorityOfMostFrequentLines = prty;
                        bestLine = lineResults[analysisOption];
                    }
                } // if
            }     // for

            // if the bestLine is a line with a column count of zero, we should use the next best line
            // we achieve this by adding the best hash to a list of excluded hashes and call the function again
            if (bestLine != null && bestLine.Count == 0)
            {
                if (null != excludeLineStructureHashes && !excludeLineStructureHashes.Contains(hashOfMostFrequentStructure))
                {
                    excludeLineStructureHashes.Add(hashOfMostFrequentStructure);
                    CalculateScoreOfLineAnalysisOption(analysisOption, result, excludeLineStructureHashes, out maxNumberOfEqualLines, out bestLine);
                    return;
                }
                else if (null == excludeLineStructureHashes)
                {
                    excludeLineStructureHashes = new HashSet <int>()
                    {
                        hashOfMostFrequentStructure
                    };
                    CalculateScoreOfLineAnalysisOption(analysisOption, result, excludeLineStructureHashes, out maxNumberOfEqualLines, out bestLine);
                    return;
                }
            }
        }
Пример #5
0
 /// <summary>
 /// Determines, which lines are the most fr
 /// </summary>
 /// <param name="analysisOption"></param>
 /// <param name="result"></param>
 /// <param name="maxNumberOfEqualLines"></param>
 /// <param name="bestLine"></param>
 public static void CalculateScoreOfLineAnalysisOption(AsciiLineAnalysisOption analysisOption, IList <AsciiLineAnalysis> result, out int maxNumberOfEqualLines, out AsciiLineStructure bestLine)
 {
     CalculateScoreOfLineAnalysisOption(analysisOption, result, null, out maxNumberOfEqualLines, out bestLine);
 }
Пример #6
0
		/// <summary>
		/// Determines, which lines are the most fr
		/// </summary>
		/// <param name="analysisOption"></param>
		/// <param name="result"></param>
		/// <param name="excludeLineStructureHashes"></param>
		/// <param name="maxNumberOfEqualLines"></param>
		/// <param name="bestLine"></param>
		public static void CalculateScoreOfLineAnalysisOption(AsciiLineAnalysisOption analysisOption, IList<AsciiLineAnalysis> result, HashSet<int> excludeLineStructureHashes, out int maxNumberOfEqualLines, out AsciiLineStructure bestLine)
		{
			// Dictionary, Key is the hash of the line structure hash, Value is the number of lines that have this hash
			Dictionary<int, int> numberOfLinesForLineStructureHash = new Dictionary<int, int>();

			bestLine = null;
			for (int i = 0; i < result.Count; i++)
			{
				AsciiLineAnalysis lineResults = result[i];
				int lineStructureHash = lineResults[analysisOption].GetHashCode(); // and hash code
				if (numberOfLinesForLineStructureHash.ContainsKey(lineStructureHash))
					numberOfLinesForLineStructureHash[lineStructureHash] = 1 + numberOfLinesForLineStructureHash[lineStructureHash];
				else
					numberOfLinesForLineStructureHash.Add(lineStructureHash, 1);
			}

			// determine, which of the line structures is the most frequent one
			maxNumberOfEqualLines = 0;
			int hashOfMostFrequentStructure = 0;
			foreach (var dictEntry in numberOfLinesForLineStructureHash)
			{
				int lineStructureHash = dictEntry.Key;

				if (null != excludeLineStructureHashes && excludeLineStructureHashes.Contains(lineStructureHash))
					continue;

				int numberOfLines = dictEntry.Value;
				if (maxNumberOfEqualLines < numberOfLines)
				{
					maxNumberOfEqualLines = numberOfLines;
					hashOfMostFrequentStructure = lineStructureHash;
				}
			} // for each

			// search for the maximum priority of those lines with the most frequent structure
			int maxPriorityOfMostFrequentLines = 0;
			for (int i = 0; i < result.Count; i++)
			{
				AsciiLineAnalysis lineResults = result[i];
				if (hashOfMostFrequentStructure == lineResults[analysisOption].GetHashCode())
				{
					int prty = lineResults[analysisOption].LineStructureScoring;
					if (prty >= maxPriorityOfMostFrequentLines)
					{
						maxPriorityOfMostFrequentLines = prty;
						bestLine = lineResults[analysisOption];
					}
				}// if
			} // for

			// if the bestLine is a line with a column count of zero, we should use the next best line
			// we achieve this by adding the best hash to a list of excluded hashes and call the function again
			if (bestLine != null && bestLine.Count == 0)
			{
				if (null != excludeLineStructureHashes && !excludeLineStructureHashes.Contains(hashOfMostFrequentStructure))
				{
					excludeLineStructureHashes.Add(hashOfMostFrequentStructure);
					CalculateScoreOfLineAnalysisOption(analysisOption, result, excludeLineStructureHashes, out maxNumberOfEqualLines, out bestLine);
					return;
				}
				else if (null == excludeLineStructureHashes)
				{
					excludeLineStructureHashes = new HashSet<int>() { hashOfMostFrequentStructure };
					CalculateScoreOfLineAnalysisOption(analysisOption, result, excludeLineStructureHashes, out maxNumberOfEqualLines, out bestLine);
					return;
				}
			}
		}
Пример #7
0
		/// <summary>
		/// Determines, which lines are the most fr
		/// </summary>
		/// <param name="analysisOption"></param>
		/// <param name="result"></param>
		/// <param name="maxNumberOfEqualLines"></param>
		/// <param name="bestLine"></param>
		public static void CalculateScoreOfLineAnalysisOption(AsciiLineAnalysisOption analysisOption, IList<AsciiLineAnalysis> result, out int maxNumberOfEqualLines, out AsciiLineStructure bestLine)
		{
			CalculateScoreOfLineAnalysisOption(analysisOption, result, null, out maxNumberOfEqualLines, out bestLine);
		}
Пример #8
0
		/// <summary>
		/// Evaluates the highest scored separation strategy, and stores the winning separation strategy in <see cref="_highestScoredLineAnalysisOption"/> and the corresponding line structure in <see cref="_highestScoredLineStructure"/>.
		/// </summary>
		private void EvaluateHighestScoredLineAnalysisOption()
		{
			// determine, which of the separation strategies results in the topmost total priority (product of number of lines and best line priority)
			double maxScore = int.MinValue;
			var maxScoredEntry = _lineAnalysisOptionsScoring.First();
			foreach (var entry in _lineAnalysisOptionsScoring)
			{
				double score = (double)entry.Value.NumberOfLines * entry.Value.LineStructure.LineStructureScoring;

				if (score > maxScore)
				{
					maxScore = score;
					maxScoredEntry = entry;
				}
				else if (score == maxScore && entry.Value.NumberOfLines > maxScoredEntry.Value.NumberOfLines)
				{
					maxScoredEntry = entry;
				}
			}
			_highestScoredLineAnalysisOption = maxScoredEntry.Key;
			_highestScoredLineStructure = maxScoredEntry.Value.LineStructure;
		}