public static AsciiDocumentAnalysisOptions GetOptionsForCultures(params System.Globalization.CultureInfo[] cultures) { var options = new AsciiDocumentAnalysisOptions(); InitializeWithCultures(options, cultures); return(options); }
public static AsciiDocumentAnalysisOptions GetDefaultSystemOptions() { var options = new AsciiDocumentAnalysisOptions(); InitializeDefaultSystemValues(options); return(options); }
/// <summary> /// Analyzes the first <code>nLines</code> of the ascii stream. /// </summary> /// <param name="importOptions">The import options. Some of the field can already be filled with useful values. Since it is not neccessary to determine the value of those known fields, the analysis will be run faster then.</param> /// <param name="stream">The ascii stream to analyze.</param> /// <param name="analysisOptions">Options that specify how many lines are analyzed, and what number formats and date/time formats will be tested.</param> /// <returns>Import options that can be used in a following step to read in the ascii stream. If the stream contains no data, the returned import options will be not fully specified. /// The same instance is returned as given by the parameter <paramref name="importOptions"/>. If <paramref name="importOptions"/> was <c>null</c>, a new instance is created.</returns> public static AsciiImportOptions Analyze(AsciiImportOptions importOptions, System.IO.Stream stream, AsciiDocumentAnalysisOptions analysisOptions) { if (importOptions == null) importOptions = new AsciiImportOptions(); var analysis = new AsciiDocumentAnalysis(); analysis.InternalAnalyze(importOptions, stream, analysisOptions); return importOptions; }
/// <summary> /// Initializes an instance of <see cref="AsciiDocumentAnalysisOptions"/> with the default system values. /// </summary> /// <param name="options">The options.</param> protected static void InitializeDefaultSystemValues(AsciiDocumentAnalysisOptions options) { InitializeWithCultures( options, System.Globalization.CultureInfo.InvariantCulture, System.Globalization.CultureInfo.CurrentCulture, System.Globalization.CultureInfo.CurrentUICulture, System.Globalization.CultureInfo.InstalledUICulture ); }
/// <summary> /// Initializes an instance of <see cref="AsciiDocumentAnalysisOptions"/> with the default system values. /// </summary> /// <param name="options">The options.</param> /// <param name="cultures">The cultures to test.</param> protected static void InitializeWithCultures(AsciiDocumentAnalysisOptions options, params CultureInfo[] cultures) { options._numberOfLinesToAnalyze = 30; options._numberFormatsToTest.Clear(); options._dateTimeFormatsToTest.Clear(); foreach (var culture in cultures) { options._numberFormatsToTest.Add(culture); options._dateTimeFormatsToTest.Add(culture); } }
/// <summary> /// Tests all member variables and adjusts them to valid values. /// </summary> /// <param name="options">The options.</param> protected static void TestAndAdjustMembersToValidValues(AsciiDocumentAnalysisOptions options) { // Test the deserialized instance for appropriate member values if (options.NumberOfLinesToAnalyze <= 0) { options.NumberOfLinesToAnalyze = DefaultNumberOfLinesToAnalyze; } if (options.NumberFormatsToTest.Count == 0) { options.NumberFormatsToTest.Add(CultureInfo.InvariantCulture); } if (options.DateTimeFormatsToTest.Count == 0) { options.DateTimeFormatsToTest.Add(CultureInfo.InvariantCulture); } }
/// <summary> /// Analyzes the first <code>nLines</code> of the ascii stream. /// </summary> /// <param name="importOptions">The import options. This can already contain known values. On return, this instance should be ready to be used to import ascii data, i.e. all fields should contain values unequal to <c>null</c>.</param> /// <param name="stream">The ascii stream to analyze.</param> /// <param name="analysisOptions">Options that specify how many lines are analyzed, and what number formats and date/time formats will be tested.</param> public void InternalAnalyze(AsciiImportOptions importOptions, System.IO.Stream stream, AsciiDocumentAnalysisOptions analysisOptions) { if (null == stream) throw new ArgumentNullException("Stream"); if (null == analysisOptions) throw new ArgumentNullException("analysisOptions"); if (null == importOptions) throw new ArgumentNullException("importOptions"); // Read-in the lines into _bodyLines. If the number of header lines is already known, those header lines are read into _headerLines ReadLinesToAnalyze(stream, analysisOptions.NumberOfLinesToAnalyze, importOptions.NumberOfMainHeaderLines); if (_bodyLines.Count == 0) return; // there is nothing to analyze // Analyze the whitespace structure of the body lines, find out if there is a fixed column width _globalStructure = new AsciiGlobalStructureAnalysis(_bodyLines); // Sets all separation strategies to test for. If importOptions already contain a separation strategy, only this separation strategy is set SetLineAnalysisOptionsToTest(importOptions, analysisOptions); // Analyze each of the first few lines with all possible separation strategies _lineAnalysisOfBodyLines = new AsciiLineAnalysis[_bodyLines.Count]; // Do the analysis itself in parallel for each of the lines System.Threading.Tasks.Parallel.For(0, _bodyLines.Count, (i) => _lineAnalysisOfBodyLines[i] = new AsciiLineAnalysis(i, _bodyLines[i], _lineAnalysisOptionsToTest)); // for debugging activate the next line and paste the data into notepad: // PutRecognizedStructuresToClipboard(result, separationStrategies); EvaluateScoringOfAllLineAnalysisOptions(); // Evaluate the best separation strategy. Store the value in _highestScoredSeparationStrategy and the corresponding line structure in _highestScoredLineStructure; EvaluateHighestScoredLineAnalysisOption(); // look how many header lines are in the file by comparing the structure of the first lines with the _highestScoredLineStructure if (null == importOptions.NumberOfMainHeaderLines) EvaluateNumberOfMainHeaderLines(); else _numberOfMainHeaderLines = importOptions.NumberOfMainHeaderLines.Value; // get the index of the caption line if (null == importOptions.IndexOfCaptionLine) EvaluateIndexOfCaptionLine(); else _indexOfCaptionLine = importOptions.IndexOfCaptionLine.Value; importOptions.NumberOfMainHeaderLines = _numberOfMainHeaderLines; importOptions.IndexOfCaptionLine = _indexOfCaptionLine; importOptions.SeparationStrategy = _highestScoredLineAnalysisOption.SeparationStrategy; importOptions.NumberFormatCulture = _highestScoredLineAnalysisOption.NumberFormat; importOptions.DateTimeFormatCulture = _highestScoredLineAnalysisOption.DateTimeFormat; importOptions.RecognizedStructure = _lineAnalysisOptionsScoring[_highestScoredLineAnalysisOption].LineStructure; }
private void SetLineAnalysisOptionsToTest(AsciiImportOptions importOptions, AsciiDocumentAnalysisOptions analysisOptions) { var numberFormatsToTest = new List<System.Globalization.CultureInfo>(); var dateTimeFormatsToTest = new List<System.Globalization.CultureInfo>(); var separationStrategiesToTest = new List<IAsciiSeparationStrategy>(); // all number formats to test if (null != importOptions.NumberFormatCulture) { numberFormatsToTest.Add(importOptions.NumberFormatCulture); } else { numberFormatsToTest.AddRange(analysisOptions.NumberFormatsToTest); if (0 == numberFormatsToTest.Count) numberFormatsToTest.Add(System.Globalization.CultureInfo.InvariantCulture); } // all DateTime formats to test if (null != importOptions.DateTimeFormatCulture) { dateTimeFormatsToTest.Add(importOptions.DateTimeFormatCulture); } else { dateTimeFormatsToTest.AddRange(analysisOptions.DateTimeFormatsToTest); if (0 == dateTimeFormatsToTest.Count) dateTimeFormatsToTest.Add(System.Globalization.CultureInfo.InvariantCulture); } // all separation strategies to test if (importOptions.SeparationStrategy != null) // if a separation strategy is given use only this { separationStrategiesToTest.Add(importOptions.SeparationStrategy); } else // no separation strategy given - we include the possible strategies here { if (_globalStructure.ContainsTabs) separationStrategiesToTest.Add(new SingleCharSeparationStrategy('\t')); if (_globalStructure.ContainsCommas) separationStrategiesToTest.Add(new SingleCharSeparationStrategy(',')); if (_globalStructure.ContainsSemicolons) separationStrategiesToTest.Add(new SingleCharSeparationStrategy(';')); if (_globalStructure.FixedBoundaries != null) { if (_globalStructure.RecognizedTabSize == 1) separationStrategiesToTest.Add(new FixedColumnWidthWithoutTabSeparationStrategy(_globalStructure.FixedBoundaries)); else separationStrategiesToTest.Add(new FixedColumnWidthWithTabSeparationStrategy(_globalStructure.FixedBoundaries, _globalStructure.RecognizedTabSize)); } if (separationStrategiesToTest.Count == 0) separationStrategiesToTest.Add(new SkipWhiteSpaceSeparationStrategy()); separationStrategiesToTest.Add(new SingleLineSeparationStrategy()); // this separation strategy must always be considered } // make a full outer join of all three categories var optionsToTest = new HashSet<AsciiLineAnalysisOption>(); foreach (var s in separationStrategiesToTest) foreach (var n in numberFormatsToTest) foreach (var d in dateTimeFormatsToTest) optionsToTest.Add(new AsciiLineAnalysisOption(s, n, d)); // remove all those keys where the char of the single char separation strategy is equal to the number format's decimal separator foreach (AsciiLineAnalysisOption k in optionsToTest.ToArray()) { if ( (k.SeparationStrategy is SingleCharSeparationStrategy) && (((SingleCharSeparationStrategy)k.SeparationStrategy).SeparatorChar.ToString() == k.NumberFormat.NumberFormat.NumberDecimalSeparator) ) optionsToTest.Remove(k); } _lineAnalysisOptionsToTest = new List<AsciiLineAnalysisOption>(optionsToTest); }
public static AsciiDocumentAnalysisOptions GetOptionsForCultures(params System.Globalization.CultureInfo[] cultures) { var options = new AsciiDocumentAnalysisOptions(); InitializeWithCultures(options, cultures); return options; }
public static AsciiDocumentAnalysisOptions GetDefaultSystemOptions() { var options = new AsciiDocumentAnalysisOptions(); InitializeDefaultSystemValues(options); return options; }
/// <summary> /// Analyzes the first <code>nLines</code> of the ascii stream. /// </summary> /// <param name="importOptions">The import options. This can already contain known values. On return, this instance should be ready to be used to import ascii data, i.e. all fields should contain values unequal to <c>null</c>.</param> /// <param name="stream">The ascii stream to analyze.</param> /// <param name="analysisOptions">Options that specify how many lines are analyzed, and what number formats and date/time formats will be tested.</param> public void InternalAnalyze(AsciiImportOptions importOptions, System.IO.Stream stream, AsciiDocumentAnalysisOptions analysisOptions) { if (null == stream) { throw new ArgumentNullException("Stream"); } if (null == analysisOptions) { throw new ArgumentNullException("analysisOptions"); } if (null == importOptions) { throw new ArgumentNullException("importOptions"); } // Read-in the lines into _bodyLines. If the number of header lines is already known, those header lines are read into _headerLines ReadLinesToAnalyze(stream, analysisOptions.NumberOfLinesToAnalyze, importOptions.NumberOfMainHeaderLines); if (_bodyLines.Count == 0) { return; // there is nothing to analyze } // Analyze the whitespace structure of the body lines, find out if there is a fixed column width _globalStructure = new AsciiGlobalStructureAnalysis(_bodyLines); // Sets all separation strategies to test for. If importOptions already contain a separation strategy, only this separation strategy is set SetLineAnalysisOptionsToTest(importOptions, analysisOptions); // Analyze each of the first few lines with all possible separation strategies _lineAnalysisOfBodyLines = new AsciiLineAnalysis[_bodyLines.Count]; // Do the analysis itself in parallel for each of the lines System.Threading.Tasks.Parallel.For(0, _bodyLines.Count, (i) => _lineAnalysisOfBodyLines[i] = new AsciiLineAnalysis(i, _bodyLines[i], _lineAnalysisOptionsToTest)); // for debugging activate the next line and paste the data into notepad: // PutRecognizedStructuresToClipboard(result, separationStrategies); EvaluateScoringOfAllLineAnalysisOptions(); // Evaluate the best separation strategy. Store the value in _highestScoredSeparationStrategy and the corresponding line structure in _highestScoredLineStructure; EvaluateHighestScoredLineAnalysisOption(); // look how many header lines are in the file by comparing the structure of the first lines with the _highestScoredLineStructure if (null == importOptions.NumberOfMainHeaderLines) { EvaluateNumberOfMainHeaderLines(); } else { _numberOfMainHeaderLines = importOptions.NumberOfMainHeaderLines.Value; } // get the index of the caption line if (null == importOptions.IndexOfCaptionLine) { EvaluateIndexOfCaptionLine(); } else { _indexOfCaptionLine = importOptions.IndexOfCaptionLine.Value; } importOptions.NumberOfMainHeaderLines = _numberOfMainHeaderLines; importOptions.IndexOfCaptionLine = _indexOfCaptionLine; importOptions.SeparationStrategy = _highestScoredLineAnalysisOption.SeparationStrategy; importOptions.NumberFormatCulture = _highestScoredLineAnalysisOption.NumberFormat; importOptions.DateTimeFormatCulture = _highestScoredLineAnalysisOption.DateTimeFormat; importOptions.RecognizedStructure = _lineAnalysisOptionsScoring[_highestScoredLineAnalysisOption].LineStructure; }
/// <summary> /// Tests all member variables and adjusts them to valid values. /// </summary> /// <param name="options">The options.</param> protected static void TestAndAdjustMembersToValidValues(AsciiDocumentAnalysisOptions options) { // Test the deserialized instance for appropriate member values if (options.NumberOfLinesToAnalyze <= 0) options.NumberOfLinesToAnalyze = DefaultNumberOfLinesToAnalyze; if (options.NumberFormatsToTest.Count == 0) options.NumberFormatsToTest.Add(CultureInfo.InvariantCulture); if (options.DateTimeFormatsToTest.Count == 0) options.DateTimeFormatsToTest.Add(CultureInfo.InvariantCulture); }
/// <summary> /// Initializes a new instance of the <see cref="AsciiDocumentAnalysisOptions"/> class with values from another instance (copy constructor). /// </summary> /// <param name="from">Instance to copy the values from.</param> public AsciiDocumentAnalysisOptions(AsciiDocumentAnalysisOptions from) { CopyFrom(from); }
private void SetLineAnalysisOptionsToTest(AsciiImportOptions importOptions, AsciiDocumentAnalysisOptions analysisOptions) { var numberFormatsToTest = new List <System.Globalization.CultureInfo>(); var dateTimeFormatsToTest = new List <System.Globalization.CultureInfo>(); var separationStrategiesToTest = new List <IAsciiSeparationStrategy>(); // all number formats to test if (null != importOptions.NumberFormatCulture) { numberFormatsToTest.Add(importOptions.NumberFormatCulture); } else { numberFormatsToTest.AddRange(analysisOptions.NumberFormatsToTest); if (0 == numberFormatsToTest.Count) { numberFormatsToTest.Add(System.Globalization.CultureInfo.InvariantCulture); } } // all DateTime formats to test if (null != importOptions.DateTimeFormatCulture) { dateTimeFormatsToTest.Add(importOptions.DateTimeFormatCulture); } else { dateTimeFormatsToTest.AddRange(analysisOptions.DateTimeFormatsToTest); if (0 == dateTimeFormatsToTest.Count) { dateTimeFormatsToTest.Add(System.Globalization.CultureInfo.InvariantCulture); } } // all separation strategies to test if (importOptions.SeparationStrategy != null) // if a separation strategy is given use only this { separationStrategiesToTest.Add(importOptions.SeparationStrategy); } else // no separation strategy given - we include the possible strategies here { if (_globalStructure.ContainsTabs) { separationStrategiesToTest.Add(new SingleCharSeparationStrategy('\t')); } if (_globalStructure.ContainsCommas) { separationStrategiesToTest.Add(new SingleCharSeparationStrategy(',')); } if (_globalStructure.ContainsSemicolons) { separationStrategiesToTest.Add(new SingleCharSeparationStrategy(';')); } if (_globalStructure.FixedBoundaries != null) { if (_globalStructure.RecognizedTabSize == 1) { separationStrategiesToTest.Add(new FixedColumnWidthWithoutTabSeparationStrategy(_globalStructure.FixedBoundaries)); } else { separationStrategiesToTest.Add(new FixedColumnWidthWithTabSeparationStrategy(_globalStructure.FixedBoundaries, _globalStructure.RecognizedTabSize)); } } if (separationStrategiesToTest.Count == 0) { separationStrategiesToTest.Add(new SkipWhiteSpaceSeparationStrategy()); } separationStrategiesToTest.Add(new SingleLineSeparationStrategy()); // this separation strategy must always be considered } // make a full outer join of all three categories var optionsToTest = new HashSet <AsciiLineAnalysisOption>(); foreach (var s in separationStrategiesToTest) { foreach (var n in numberFormatsToTest) { foreach (var d in dateTimeFormatsToTest) { optionsToTest.Add(new AsciiLineAnalysisOption(s, n, d)); } } } // remove all those keys where the char of the single char separation strategy is equal to the number format's decimal separator foreach (AsciiLineAnalysisOption k in optionsToTest.ToArray()) { if ( (k.SeparationStrategy is SingleCharSeparationStrategy) && (((SingleCharSeparationStrategy)k.SeparationStrategy).SeparatorChar.ToString() == k.NumberFormat.NumberFormat.NumberDecimalSeparator) ) { optionsToTest.Remove(k); } } _lineAnalysisOptionsToTest = new List <AsciiLineAnalysisOption>(optionsToTest); }
/// <summary> /// Analyzes the first <code>nLines</code> of the ascii stream. /// </summary> /// <param name="importOptions">The import options. Some of the field can already be filled with useful values. Since it is not neccessary to determine the value of those known fields, the analysis will be run faster then.</param> /// <param name="stream">The ascii stream to analyze.</param> /// <param name="analysisOptions">Options that specify how many lines are analyzed, and what number formats and date/time formats will be tested.</param> /// <returns>Import options that can be used in a following step to read in the ascii stream. If the stream contains no data, the returned import options will be not fully specified. /// The same instance is returned as given by the parameter <paramref name="importOptions"/>. If <paramref name="importOptions"/> was <c>null</c>, a new instance is created.</returns> public static AsciiImportOptions Analyze(AsciiImportOptions importOptions, System.IO.Stream stream, AsciiDocumentAnalysisOptions analysisOptions) { if (importOptions == null) { importOptions = new AsciiImportOptions(); } var analysis = new AsciiDocumentAnalysis(); analysis.InternalAnalyze(importOptions, stream, analysisOptions); return(importOptions); }
/// <summary> /// Shows the ASCII analysis dialog. /// </summary> /// <param name="fileName">Name of the file to analyze.</param> /// <param name="importOptions">On return, contains the ASCII import options the user has confirmed.</param> /// <param name="analysisOptions">Options that specify how many lines are analyzed, and what number formats and date/time formats will be tested.</param> /// <returns><c>True</c> if the user confirms this dialog (clicks OK). False if the user cancels this dialog.</returns> public static bool ShowAsciiImportOptionsDialog(string fileName, AsciiDocumentAnalysisOptions analysisOptions, out AsciiImportOptions importOptions) { importOptions = new AsciiImportOptions(); using (FileStream str = AsciiImporter.GetAsciiInputFileStream(fileName)) { importOptions = AsciiDocumentAnalysis.Analyze(new AsciiImportOptions(), str, analysisOptions); object[] args = new object[] { importOptions, str }; var controller = (Altaxo.Gui.IMVCAController)Current.Gui.GetControllerAndControl(args, typeof(Altaxo.Gui.IMVCAController), Gui.UseDocument.Directly); if (!Current.Gui.ShowDialog(controller, "Choose Ascii import options")) return false; importOptions = (AsciiImportOptions)controller.ModelObject; return true; } }