C# (CSharp) Altaxo.Serialization.Ascii AsciiLineAnalysis 예제들

프로그래밍 언어: C# (CSharp)

네임스페이스/패키지 이름: Altaxo.Serialization.Ascii

클래스/타입: AsciiLineAnalysis

hotexamples.com에서의 예제들: 3

C# (CSharp) Altaxo.Serialization.Ascii AsciiLineAnalysis - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 C# (CSharp)의 Altaxo.Serialization.Ascii.AsciiLineAnalysis에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

        /// <summary>
        /// Determines, which lines are the most fr
        /// </summary>
        /// <param name="analysisOption"></param>
        /// <param name="result"></param>
        /// <param name="excludeLineStructureHashes"></param>
        /// <param name="maxNumberOfEqualLines"></param>
        /// <param name="bestLine"></param>
        public static void CalculateScoreOfLineAnalysisOption(AsciiLineAnalysisOption analysisOption, IList <AsciiLineAnalysis> result, HashSet <int> excludeLineStructureHashes, out int maxNumberOfEqualLines, out AsciiLineStructure bestLine)
        {
            // Dictionary, Key is the hash of the line structure hash, Value is the number of lines that have this hash
            var numberOfLinesForLineStructureHash = new Dictionary <int, int>();

            bestLine = null;
            for (int i = 0; i < result.Count; i++)
            {
                AsciiLineAnalysis lineResults = result[i];
                int lineStructureHash         = lineResults[analysisOption].GetHashCode(); // and hash code
                if (numberOfLinesForLineStructureHash.ContainsKey(lineStructureHash))
                {
                    numberOfLinesForLineStructureHash[lineStructureHash] = 1 + numberOfLinesForLineStructureHash[lineStructureHash];
                }
                else
                {
                    numberOfLinesForLineStructureHash.Add(lineStructureHash, 1);
                }
            }

            // determine, which of the line structures is the most frequent one
            maxNumberOfEqualLines = 0;
            int hashOfMostFrequentStructure = 0;

            foreach (var dictEntry in numberOfLinesForLineStructureHash)
            {
                int lineStructureHash = dictEntry.Key;

                if (null != excludeLineStructureHashes && excludeLineStructureHashes.Contains(lineStructureHash))
                {
                    continue;
                }

                int numberOfLines = dictEntry.Value;
                if (maxNumberOfEqualLines < numberOfLines)
                {
                    maxNumberOfEqualLines       = numberOfLines;
                    hashOfMostFrequentStructure = lineStructureHash;
                }
            } // for each

            // search for the maximum priority of those lines with the most frequent structure
            int maxPriorityOfMostFrequentLines = 0;

            for (int i = 0; i < result.Count; i++)
            {
                AsciiLineAnalysis lineResults = result[i];
                if (hashOfMostFrequentStructure == lineResults[analysisOption].GetHashCode())
                {
                    int prty = lineResults[analysisOption].LineStructureScoring;
                    if (prty >= maxPriorityOfMostFrequentLines)
                    {
                        maxPriorityOfMostFrequentLines = prty;
                        bestLine = lineResults[analysisOption];
                    }
                } // if
            }     // for

            // if the bestLine is a line with a column count of zero, we should use the next best line
            // we achieve this by adding the best hash to a list of excluded hashes and call the function again
            if (bestLine != null && bestLine.Count == 0)
            {
                if (null != excludeLineStructureHashes && !excludeLineStructureHashes.Contains(hashOfMostFrequentStructure))
                {
                    excludeLineStructureHashes.Add(hashOfMostFrequentStructure);
                    CalculateScoreOfLineAnalysisOption(analysisOption, result, excludeLineStructureHashes, out maxNumberOfEqualLines, out bestLine);
                    return;
                }
                else if (null == excludeLineStructureHashes)
                {
                    excludeLineStructureHashes = new HashSet <int>()
                    {
                        hashOfMostFrequentStructure
                    };
                    CalculateScoreOfLineAnalysisOption(analysisOption, result, excludeLineStructureHashes, out maxNumberOfEqualLines, out bestLine);
                    return;
                }
            }
        }

예제 #2

파일 보기

        /// <summary>
        /// Analyzes the first <code>nLines</code> of the ascii stream.
        /// </summary>
        /// <param name="importOptions">The import options. This can already contain known values. On return, this instance should be ready to be used to import ascii data, i.e. all fields should contain values unequal to <c>null</c>.</param>
        /// <param name="stream">The ascii stream to analyze.</param>
        /// <param name="analysisOptions">Options that specify how many lines are analyzed, and what number formats and date/time formats will be tested.</param>
        public void InternalAnalyze(AsciiImportOptions importOptions, System.IO.Stream stream, AsciiDocumentAnalysisOptions analysisOptions)
        {
            if (null == stream)
            {
                throw new ArgumentNullException("Stream");
            }
            if (null == analysisOptions)
            {
                throw new ArgumentNullException("analysisOptions");
            }
            if (null == importOptions)
            {
                throw new ArgumentNullException("importOptions");
            }

            // Read-in the lines into _bodyLines. If the number of header lines is already known, those header lines are read into _headerLines
            ReadLinesToAnalyze(stream, analysisOptions.NumberOfLinesToAnalyze, importOptions.NumberOfMainHeaderLines);

            if (_bodyLines.Count == 0)
            {
                return; // there is nothing to analyze
            }
            // Analyze the whitespace structure of the body lines, find out if there is a fixed column width
            _globalStructure = new AsciiGlobalStructureAnalysis(_bodyLines);

            // Sets all separation strategies to test for. If importOptions already contain a separation strategy, only this separation strategy is set
            SetLineAnalysisOptionsToTest(importOptions, analysisOptions);

            // Analyze each of the first few lines with all possible separation strategies
            _lineAnalysisOfBodyLines = new AsciiLineAnalysis[_bodyLines.Count];

            // Do the analysis itself in parallel for each of the lines
            System.Threading.Tasks.Parallel.For(0, _bodyLines.Count, (i) => _lineAnalysisOfBodyLines[i] = new AsciiLineAnalysis(i, _bodyLines[i], _lineAnalysisOptionsToTest));

            // for debugging activate the next line and paste the data into notepad:
            // PutRecognizedStructuresToClipboard(result, separationStrategies);

            EvaluateScoringOfAllLineAnalysisOptions();

            // Evaluate the best separation strategy. Store the value in _highestScoredSeparationStrategy and the corresponding line structure in _highestScoredLineStructure;
            EvaluateHighestScoredLineAnalysisOption();

            // look how many header lines are in the file by comparing the structure of the first lines  with the _highestScoredLineStructure
            if (null == importOptions.NumberOfMainHeaderLines)
            {
                EvaluateNumberOfMainHeaderLines();
            }
            else
            {
                _numberOfMainHeaderLines = importOptions.NumberOfMainHeaderLines.Value;
            }

            // get the index of the caption line
            if (null == importOptions.IndexOfCaptionLine)
            {
                EvaluateIndexOfCaptionLine();
            }
            else
            {
                _indexOfCaptionLine = importOptions.IndexOfCaptionLine.Value;
            }

            importOptions.NumberOfMainHeaderLines = _numberOfMainHeaderLines;
            importOptions.IndexOfCaptionLine      = _indexOfCaptionLine;

            importOptions.SeparationStrategy    = _highestScoredLineAnalysisOption.SeparationStrategy;
            importOptions.NumberFormatCulture   = _highestScoredLineAnalysisOption.NumberFormat;
            importOptions.DateTimeFormatCulture = _highestScoredLineAnalysisOption.DateTimeFormat;

            importOptions.RecognizedStructure = _lineAnalysisOptionsScoring[_highestScoredLineAnalysisOption].LineStructure;
        }

예제 #3

파일 보기

파일: AsciiDocumentAnalysis.cs 프로젝트: Altaxo/Altaxo

		/// <summary>
		/// Analyzes the first <code>nLines</code> of the ascii stream.
		/// </summary>
		/// <param name="importOptions">The import options. This can already contain known values. On return, this instance should be ready to be used to import ascii data, i.e. all fields should contain values unequal to <c>null</c>.</param>
		/// <param name="stream">The ascii stream to analyze.</param>
		/// <param name="analysisOptions">Options that specify how many lines are analyzed, and what number formats and date/time formats will be tested.</param>
		public void InternalAnalyze(AsciiImportOptions importOptions, System.IO.Stream stream, AsciiDocumentAnalysisOptions analysisOptions)
		{
			if (null == stream)
				throw new ArgumentNullException("Stream");
			if (null == analysisOptions)
				throw new ArgumentNullException("analysisOptions");
			if (null == importOptions)
				throw new ArgumentNullException("importOptions");

			// Read-in the lines into _bodyLines. If the number of header lines is already known, those header lines are read into _headerLines
			ReadLinesToAnalyze(stream, analysisOptions.NumberOfLinesToAnalyze, importOptions.NumberOfMainHeaderLines);

			if (_bodyLines.Count == 0)
				return; // there is nothing to analyze

			// Analyze the whitespace structure of the body lines, find out if there is a fixed column width
			_globalStructure = new AsciiGlobalStructureAnalysis(_bodyLines);

			// Sets all separation strategies to test for. If importOptions already contain a separation strategy, only this separation strategy is set
			SetLineAnalysisOptionsToTest(importOptions, analysisOptions);

			// Analyze each of the first few lines with all possible separation strategies
			_lineAnalysisOfBodyLines = new AsciiLineAnalysis[_bodyLines.Count];

			// Do the analysis itself in parallel for each of the lines
			System.Threading.Tasks.Parallel.For(0, _bodyLines.Count, (i) => _lineAnalysisOfBodyLines[i] = new AsciiLineAnalysis(i, _bodyLines[i], _lineAnalysisOptionsToTest));

			// for debugging activate the next line and paste the data into notepad:
			// PutRecognizedStructuresToClipboard(result, separationStrategies);

			EvaluateScoringOfAllLineAnalysisOptions();

			// Evaluate the best separation strategy. Store the value in _highestScoredSeparationStrategy and the corresponding line structure in _highestScoredLineStructure;
			EvaluateHighestScoredLineAnalysisOption();

			// look how many header lines are in the file by comparing the structure of the first lines  with the _highestScoredLineStructure
			if (null == importOptions.NumberOfMainHeaderLines)
				EvaluateNumberOfMainHeaderLines();
			else
				_numberOfMainHeaderLines = importOptions.NumberOfMainHeaderLines.Value;

			// get the index of the caption line
			if (null == importOptions.IndexOfCaptionLine)
				EvaluateIndexOfCaptionLine();
			else
				_indexOfCaptionLine = importOptions.IndexOfCaptionLine.Value;

			importOptions.NumberOfMainHeaderLines = _numberOfMainHeaderLines;
			importOptions.IndexOfCaptionLine = _indexOfCaptionLine;

			importOptions.SeparationStrategy = _highestScoredLineAnalysisOption.SeparationStrategy;
			importOptions.NumberFormatCulture = _highestScoredLineAnalysisOption.NumberFormat;
			importOptions.DateTimeFormatCulture = _highestScoredLineAnalysisOption.DateTimeFormat;

			importOptions.RecognizedStructure = _lineAnalysisOptionsScoring[_highestScoredLineAnalysisOption].LineStructure;
		}