/// <summary> /// Analyzes the first <code>nLines</code> of the ascii stream. /// </summary> /// <param name="importOptions">The import options. This can already contain known values. On return, this instance should be ready to be used to import ascii data, i.e. all fields should contain values unequal to <c>null</c>.</param> /// <param name="stream">The ascii stream to analyze.</param> /// <param name="analysisOptions">Options that specify how many lines are analyzed, and what number formats and date/time formats will be tested.</param> public void InternalAnalyze(AsciiImportOptions importOptions, System.IO.Stream stream, AsciiDocumentAnalysisOptions analysisOptions) { if (null == stream) { throw new ArgumentNullException("Stream"); } if (null == analysisOptions) { throw new ArgumentNullException("analysisOptions"); } if (null == importOptions) { throw new ArgumentNullException("importOptions"); } // Read-in the lines into _bodyLines. If the number of header lines is already known, those header lines are read into _headerLines ReadLinesToAnalyze(stream, analysisOptions.NumberOfLinesToAnalyze, importOptions.NumberOfMainHeaderLines); if (_bodyLines.Count == 0) { return; // there is nothing to analyze } // Analyze the whitespace structure of the body lines, find out if there is a fixed column width _globalStructure = new AsciiGlobalStructureAnalysis(_bodyLines); // Sets all separation strategies to test for. If importOptions already contain a separation strategy, only this separation strategy is set SetLineAnalysisOptionsToTest(importOptions, analysisOptions); // Analyze each of the first few lines with all possible separation strategies _lineAnalysisOfBodyLines = new AsciiLineAnalysis[_bodyLines.Count]; // Do the analysis itself in parallel for each of the lines System.Threading.Tasks.Parallel.For(0, _bodyLines.Count, (i) => _lineAnalysisOfBodyLines[i] = new AsciiLineAnalysis(i, _bodyLines[i], _lineAnalysisOptionsToTest)); // for debugging activate the next line and paste the data into notepad: // PutRecognizedStructuresToClipboard(result, separationStrategies); EvaluateScoringOfAllLineAnalysisOptions(); // Evaluate the best separation strategy. Store the value in _highestScoredSeparationStrategy and the corresponding line structure in _highestScoredLineStructure; EvaluateHighestScoredLineAnalysisOption(); // look how many header lines are in the file by comparing the structure of the first lines with the _highestScoredLineStructure if (null == importOptions.NumberOfMainHeaderLines) { EvaluateNumberOfMainHeaderLines(); } else { _numberOfMainHeaderLines = importOptions.NumberOfMainHeaderLines.Value; } // get the index of the caption line if (null == importOptions.IndexOfCaptionLine) { EvaluateIndexOfCaptionLine(); } else { _indexOfCaptionLine = importOptions.IndexOfCaptionLine.Value; } importOptions.NumberOfMainHeaderLines = _numberOfMainHeaderLines; importOptions.IndexOfCaptionLine = _indexOfCaptionLine; importOptions.SeparationStrategy = _highestScoredLineAnalysisOption.SeparationStrategy; importOptions.NumberFormatCulture = _highestScoredLineAnalysisOption.NumberFormat; importOptions.DateTimeFormatCulture = _highestScoredLineAnalysisOption.DateTimeFormat; importOptions.RecognizedStructure = _lineAnalysisOptionsScoring[_highestScoredLineAnalysisOption].LineStructure; }
/// <summary> /// Analyzes the first <code>nLines</code> of the ascii stream. /// </summary> /// <param name="importOptions">The import options. This can already contain known values. On return, this instance should be ready to be used to import ascii data, i.e. all fields should contain values unequal to <c>null</c>.</param> /// <param name="stream">The ascii stream to analyze.</param> /// <param name="analysisOptions">Options that specify how many lines are analyzed, and what number formats and date/time formats will be tested.</param> public void InternalAnalyze(AsciiImportOptions importOptions, System.IO.Stream stream, AsciiDocumentAnalysisOptions analysisOptions) { if (null == stream) throw new ArgumentNullException("Stream"); if (null == analysisOptions) throw new ArgumentNullException("analysisOptions"); if (null == importOptions) throw new ArgumentNullException("importOptions"); // Read-in the lines into _bodyLines. If the number of header lines is already known, those header lines are read into _headerLines ReadLinesToAnalyze(stream, analysisOptions.NumberOfLinesToAnalyze, importOptions.NumberOfMainHeaderLines); if (_bodyLines.Count == 0) return; // there is nothing to analyze // Analyze the whitespace structure of the body lines, find out if there is a fixed column width _globalStructure = new AsciiGlobalStructureAnalysis(_bodyLines); // Sets all separation strategies to test for. If importOptions already contain a separation strategy, only this separation strategy is set SetLineAnalysisOptionsToTest(importOptions, analysisOptions); // Analyze each of the first few lines with all possible separation strategies _lineAnalysisOfBodyLines = new AsciiLineAnalysis[_bodyLines.Count]; // Do the analysis itself in parallel for each of the lines System.Threading.Tasks.Parallel.For(0, _bodyLines.Count, (i) => _lineAnalysisOfBodyLines[i] = new AsciiLineAnalysis(i, _bodyLines[i], _lineAnalysisOptionsToTest)); // for debugging activate the next line and paste the data into notepad: // PutRecognizedStructuresToClipboard(result, separationStrategies); EvaluateScoringOfAllLineAnalysisOptions(); // Evaluate the best separation strategy. Store the value in _highestScoredSeparationStrategy and the corresponding line structure in _highestScoredLineStructure; EvaluateHighestScoredLineAnalysisOption(); // look how many header lines are in the file by comparing the structure of the first lines with the _highestScoredLineStructure if (null == importOptions.NumberOfMainHeaderLines) EvaluateNumberOfMainHeaderLines(); else _numberOfMainHeaderLines = importOptions.NumberOfMainHeaderLines.Value; // get the index of the caption line if (null == importOptions.IndexOfCaptionLine) EvaluateIndexOfCaptionLine(); else _indexOfCaptionLine = importOptions.IndexOfCaptionLine.Value; importOptions.NumberOfMainHeaderLines = _numberOfMainHeaderLines; importOptions.IndexOfCaptionLine = _indexOfCaptionLine; importOptions.SeparationStrategy = _highestScoredLineAnalysisOption.SeparationStrategy; importOptions.NumberFormatCulture = _highestScoredLineAnalysisOption.NumberFormat; importOptions.DateTimeFormatCulture = _highestScoredLineAnalysisOption.DateTimeFormat; importOptions.RecognizedStructure = _lineAnalysisOptionsScoring[_highestScoredLineAnalysisOption].LineStructure; }