Ejemplo n.º 1
0
        public static AsciiDocumentAnalysisOptions GetOptionsForCultures(params System.Globalization.CultureInfo[] cultures)
        {
            var options = new AsciiDocumentAnalysisOptions();

            InitializeWithCultures(options, cultures);
            return(options);
        }
Ejemplo n.º 2
0
        public static AsciiDocumentAnalysisOptions GetDefaultSystemOptions()
        {
            var options = new AsciiDocumentAnalysisOptions();

            InitializeDefaultSystemValues(options);
            return(options);
        }
Ejemplo n.º 3
0
		/// <summary>
		/// Analyzes the first <code>nLines</code> of the ascii stream.
		/// </summary>
		/// <param name="importOptions">The import options. Some of the field can already be filled with useful values. Since it is not neccessary to determine the value of those known fields, the analysis will be run faster then.</param>
		/// <param name="stream">The ascii stream to analyze.</param>
		/// <param name="analysisOptions">Options that specify how many lines are analyzed, and what number formats and date/time formats will be tested.</param>
		/// <returns>Import options that can be used in a following step to read in the ascii stream. If the stream contains no data, the returned import options will be not fully specified.
		/// The same instance is returned as given by the parameter <paramref name="importOptions"/>. If <paramref name="importOptions"/> was <c>null</c>, a new instance is created.</returns>
		public static AsciiImportOptions Analyze(AsciiImportOptions importOptions, System.IO.Stream stream, AsciiDocumentAnalysisOptions analysisOptions)
		{
			if (importOptions == null)
				importOptions = new AsciiImportOptions();

			var analysis = new AsciiDocumentAnalysis();

			analysis.InternalAnalyze(importOptions, stream, analysisOptions);
			return importOptions;
		}
Ejemplo n.º 4
0
 /// <summary>
 /// Initializes  an instance of <see cref="AsciiDocumentAnalysisOptions"/> with the default system values.
 /// </summary>
 /// <param name="options">The options.</param>
 protected static void InitializeDefaultSystemValues(AsciiDocumentAnalysisOptions options)
 {
     InitializeWithCultures(
         options,
         System.Globalization.CultureInfo.InvariantCulture,
         System.Globalization.CultureInfo.CurrentCulture,
         System.Globalization.CultureInfo.CurrentUICulture,
         System.Globalization.CultureInfo.InstalledUICulture
         );
 }
Ejemplo n.º 5
0
        /// <summary>
        /// Initializes  an instance of <see cref="AsciiDocumentAnalysisOptions"/> with the default system values.
        /// </summary>
        /// <param name="options">The options.</param>
        /// <param name="cultures">The cultures to test.</param>
        protected static void InitializeWithCultures(AsciiDocumentAnalysisOptions options, params CultureInfo[] cultures)
        {
            options._numberOfLinesToAnalyze = 30;
            options._numberFormatsToTest.Clear();
            options._dateTimeFormatsToTest.Clear();

            foreach (var culture in cultures)
            {
                options._numberFormatsToTest.Add(culture);
                options._dateTimeFormatsToTest.Add(culture);
            }
        }
Ejemplo n.º 6
0
 /// <summary>
 /// Tests all member variables and adjusts them to valid values.
 /// </summary>
 /// <param name="options">The options.</param>
 protected static void TestAndAdjustMembersToValidValues(AsciiDocumentAnalysisOptions options)
 {
     // Test the deserialized instance for appropriate member values
     if (options.NumberOfLinesToAnalyze <= 0)
     {
         options.NumberOfLinesToAnalyze = DefaultNumberOfLinesToAnalyze;
     }
     if (options.NumberFormatsToTest.Count == 0)
     {
         options.NumberFormatsToTest.Add(CultureInfo.InvariantCulture);
     }
     if (options.DateTimeFormatsToTest.Count == 0)
     {
         options.DateTimeFormatsToTest.Add(CultureInfo.InvariantCulture);
     }
 }
Ejemplo n.º 7
0
		/// <summary>
		/// Analyzes the first <code>nLines</code> of the ascii stream.
		/// </summary>
		/// <param name="importOptions">The import options. This can already contain known values. On return, this instance should be ready to be used to import ascii data, i.e. all fields should contain values unequal to <c>null</c>.</param>
		/// <param name="stream">The ascii stream to analyze.</param>
		/// <param name="analysisOptions">Options that specify how many lines are analyzed, and what number formats and date/time formats will be tested.</param>
		public void InternalAnalyze(AsciiImportOptions importOptions, System.IO.Stream stream, AsciiDocumentAnalysisOptions analysisOptions)
		{
			if (null == stream)
				throw new ArgumentNullException("Stream");
			if (null == analysisOptions)
				throw new ArgumentNullException("analysisOptions");
			if (null == importOptions)
				throw new ArgumentNullException("importOptions");

			// Read-in the lines into _bodyLines. If the number of header lines is already known, those header lines are read into _headerLines
			ReadLinesToAnalyze(stream, analysisOptions.NumberOfLinesToAnalyze, importOptions.NumberOfMainHeaderLines);

			if (_bodyLines.Count == 0)
				return; // there is nothing to analyze

			// Analyze the whitespace structure of the body lines, find out if there is a fixed column width
			_globalStructure = new AsciiGlobalStructureAnalysis(_bodyLines);

			// Sets all separation strategies to test for. If importOptions already contain a separation strategy, only this separation strategy is set
			SetLineAnalysisOptionsToTest(importOptions, analysisOptions);

			// Analyze each of the first few lines with all possible separation strategies
			_lineAnalysisOfBodyLines = new AsciiLineAnalysis[_bodyLines.Count];

			// Do the analysis itself in parallel for each of the lines
			System.Threading.Tasks.Parallel.For(0, _bodyLines.Count, (i) => _lineAnalysisOfBodyLines[i] = new AsciiLineAnalysis(i, _bodyLines[i], _lineAnalysisOptionsToTest));

			// for debugging activate the next line and paste the data into notepad:
			// PutRecognizedStructuresToClipboard(result, separationStrategies);

			EvaluateScoringOfAllLineAnalysisOptions();

			// Evaluate the best separation strategy. Store the value in _highestScoredSeparationStrategy and the corresponding line structure in _highestScoredLineStructure;
			EvaluateHighestScoredLineAnalysisOption();

			// look how many header lines are in the file by comparing the structure of the first lines  with the _highestScoredLineStructure
			if (null == importOptions.NumberOfMainHeaderLines)
				EvaluateNumberOfMainHeaderLines();
			else
				_numberOfMainHeaderLines = importOptions.NumberOfMainHeaderLines.Value;

			// get the index of the caption line
			if (null == importOptions.IndexOfCaptionLine)
				EvaluateIndexOfCaptionLine();
			else
				_indexOfCaptionLine = importOptions.IndexOfCaptionLine.Value;

			importOptions.NumberOfMainHeaderLines = _numberOfMainHeaderLines;
			importOptions.IndexOfCaptionLine = _indexOfCaptionLine;

			importOptions.SeparationStrategy = _highestScoredLineAnalysisOption.SeparationStrategy;
			importOptions.NumberFormatCulture = _highestScoredLineAnalysisOption.NumberFormat;
			importOptions.DateTimeFormatCulture = _highestScoredLineAnalysisOption.DateTimeFormat;

			importOptions.RecognizedStructure = _lineAnalysisOptionsScoring[_highestScoredLineAnalysisOption].LineStructure;
		}
Ejemplo n.º 8
0
		private void SetLineAnalysisOptionsToTest(AsciiImportOptions importOptions, AsciiDocumentAnalysisOptions analysisOptions)
		{
			var numberFormatsToTest = new List<System.Globalization.CultureInfo>();
			var dateTimeFormatsToTest = new List<System.Globalization.CultureInfo>();
			var separationStrategiesToTest = new List<IAsciiSeparationStrategy>();

			// all number formats to test
			if (null != importOptions.NumberFormatCulture)
			{
				numberFormatsToTest.Add(importOptions.NumberFormatCulture);
			}
			else
			{
				numberFormatsToTest.AddRange(analysisOptions.NumberFormatsToTest);
				if (0 == numberFormatsToTest.Count)
					numberFormatsToTest.Add(System.Globalization.CultureInfo.InvariantCulture);
			}

			// all DateTime formats to test
			if (null != importOptions.DateTimeFormatCulture)
			{
				dateTimeFormatsToTest.Add(importOptions.DateTimeFormatCulture);
			}
			else
			{
				dateTimeFormatsToTest.AddRange(analysisOptions.DateTimeFormatsToTest);
				if (0 == dateTimeFormatsToTest.Count)
					dateTimeFormatsToTest.Add(System.Globalization.CultureInfo.InvariantCulture);
			}

			// all separation strategies to test
			if (importOptions.SeparationStrategy != null) // if a separation strategy is given use only this
			{
				separationStrategiesToTest.Add(importOptions.SeparationStrategy);
			}
			else // no separation strategy given - we include the possible strategies here
			{
				if (_globalStructure.ContainsTabs)
					separationStrategiesToTest.Add(new SingleCharSeparationStrategy('\t'));
				if (_globalStructure.ContainsCommas)
					separationStrategiesToTest.Add(new SingleCharSeparationStrategy(','));
				if (_globalStructure.ContainsSemicolons)
					separationStrategiesToTest.Add(new SingleCharSeparationStrategy(';'));
				if (_globalStructure.FixedBoundaries != null)
				{
					if (_globalStructure.RecognizedTabSize == 1)
						separationStrategiesToTest.Add(new FixedColumnWidthWithoutTabSeparationStrategy(_globalStructure.FixedBoundaries));
					else
						separationStrategiesToTest.Add(new FixedColumnWidthWithTabSeparationStrategy(_globalStructure.FixedBoundaries, _globalStructure.RecognizedTabSize));
				}
				if (separationStrategiesToTest.Count == 0)
					separationStrategiesToTest.Add(new SkipWhiteSpaceSeparationStrategy());

				separationStrategiesToTest.Add(new SingleLineSeparationStrategy()); // this separation strategy must always be considered
			}

			// make a full outer join of all three categories
			var optionsToTest = new HashSet<AsciiLineAnalysisOption>();
			foreach (var s in separationStrategiesToTest)
				foreach (var n in numberFormatsToTest)
					foreach (var d in dateTimeFormatsToTest)
						optionsToTest.Add(new AsciiLineAnalysisOption(s, n, d));

			// remove all those keys where the char of the single char separation strategy is equal to the number format's decimal separator
			foreach (AsciiLineAnalysisOption k in optionsToTest.ToArray())
			{
				if (
					(k.SeparationStrategy is SingleCharSeparationStrategy) &&
					(((SingleCharSeparationStrategy)k.SeparationStrategy).SeparatorChar.ToString() == k.NumberFormat.NumberFormat.NumberDecimalSeparator)
					)
					optionsToTest.Remove(k);
			}

			_lineAnalysisOptionsToTest = new List<AsciiLineAnalysisOption>(optionsToTest);
		}
Ejemplo n.º 9
0
		public static AsciiDocumentAnalysisOptions GetOptionsForCultures(params System.Globalization.CultureInfo[] cultures)
		{
			var options = new AsciiDocumentAnalysisOptions();
			InitializeWithCultures(options, cultures);
			return options;
		}
Ejemplo n.º 10
0
		public static AsciiDocumentAnalysisOptions GetDefaultSystemOptions()
		{
			var options = new AsciiDocumentAnalysisOptions();
			InitializeDefaultSystemValues(options);
			return options;
		}
Ejemplo n.º 11
0
        /// <summary>
        /// Analyzes the first <code>nLines</code> of the ascii stream.
        /// </summary>
        /// <param name="importOptions">The import options. This can already contain known values. On return, this instance should be ready to be used to import ascii data, i.e. all fields should contain values unequal to <c>null</c>.</param>
        /// <param name="stream">The ascii stream to analyze.</param>
        /// <param name="analysisOptions">Options that specify how many lines are analyzed, and what number formats and date/time formats will be tested.</param>
        public void InternalAnalyze(AsciiImportOptions importOptions, System.IO.Stream stream, AsciiDocumentAnalysisOptions analysisOptions)
        {
            if (null == stream)
            {
                throw new ArgumentNullException("Stream");
            }
            if (null == analysisOptions)
            {
                throw new ArgumentNullException("analysisOptions");
            }
            if (null == importOptions)
            {
                throw new ArgumentNullException("importOptions");
            }

            // Read-in the lines into _bodyLines. If the number of header lines is already known, those header lines are read into _headerLines
            ReadLinesToAnalyze(stream, analysisOptions.NumberOfLinesToAnalyze, importOptions.NumberOfMainHeaderLines);

            if (_bodyLines.Count == 0)
            {
                return; // there is nothing to analyze
            }
            // Analyze the whitespace structure of the body lines, find out if there is a fixed column width
            _globalStructure = new AsciiGlobalStructureAnalysis(_bodyLines);

            // Sets all separation strategies to test for. If importOptions already contain a separation strategy, only this separation strategy is set
            SetLineAnalysisOptionsToTest(importOptions, analysisOptions);

            // Analyze each of the first few lines with all possible separation strategies
            _lineAnalysisOfBodyLines = new AsciiLineAnalysis[_bodyLines.Count];

            // Do the analysis itself in parallel for each of the lines
            System.Threading.Tasks.Parallel.For(0, _bodyLines.Count, (i) => _lineAnalysisOfBodyLines[i] = new AsciiLineAnalysis(i, _bodyLines[i], _lineAnalysisOptionsToTest));

            // for debugging activate the next line and paste the data into notepad:
            // PutRecognizedStructuresToClipboard(result, separationStrategies);

            EvaluateScoringOfAllLineAnalysisOptions();

            // Evaluate the best separation strategy. Store the value in _highestScoredSeparationStrategy and the corresponding line structure in _highestScoredLineStructure;
            EvaluateHighestScoredLineAnalysisOption();

            // look how many header lines are in the file by comparing the structure of the first lines  with the _highestScoredLineStructure
            if (null == importOptions.NumberOfMainHeaderLines)
            {
                EvaluateNumberOfMainHeaderLines();
            }
            else
            {
                _numberOfMainHeaderLines = importOptions.NumberOfMainHeaderLines.Value;
            }

            // get the index of the caption line
            if (null == importOptions.IndexOfCaptionLine)
            {
                EvaluateIndexOfCaptionLine();
            }
            else
            {
                _indexOfCaptionLine = importOptions.IndexOfCaptionLine.Value;
            }

            importOptions.NumberOfMainHeaderLines = _numberOfMainHeaderLines;
            importOptions.IndexOfCaptionLine      = _indexOfCaptionLine;

            importOptions.SeparationStrategy    = _highestScoredLineAnalysisOption.SeparationStrategy;
            importOptions.NumberFormatCulture   = _highestScoredLineAnalysisOption.NumberFormat;
            importOptions.DateTimeFormatCulture = _highestScoredLineAnalysisOption.DateTimeFormat;

            importOptions.RecognizedStructure = _lineAnalysisOptionsScoring[_highestScoredLineAnalysisOption].LineStructure;
        }
Ejemplo n.º 12
0
		/// <summary>
		/// Tests all member variables and adjusts them to valid values.
		/// </summary>
		/// <param name="options">The options.</param>
		protected static void TestAndAdjustMembersToValidValues(AsciiDocumentAnalysisOptions options)
		{
			// Test the deserialized instance for appropriate member values
			if (options.NumberOfLinesToAnalyze <= 0)
				options.NumberOfLinesToAnalyze = DefaultNumberOfLinesToAnalyze;
			if (options.NumberFormatsToTest.Count == 0)
				options.NumberFormatsToTest.Add(CultureInfo.InvariantCulture);
			if (options.DateTimeFormatsToTest.Count == 0)
				options.DateTimeFormatsToTest.Add(CultureInfo.InvariantCulture);
		}
Ejemplo n.º 13
0
		/// <summary>
		/// Initializes a new instance of the <see cref="AsciiDocumentAnalysisOptions"/> class with values from another instance (copy constructor).
		/// </summary>
		/// <param name="from">Instance to copy the values from.</param>
		public AsciiDocumentAnalysisOptions(AsciiDocumentAnalysisOptions from)
		{
			CopyFrom(from);
		}
Ejemplo n.º 14
0
        private void SetLineAnalysisOptionsToTest(AsciiImportOptions importOptions, AsciiDocumentAnalysisOptions analysisOptions)
        {
            var numberFormatsToTest        = new List <System.Globalization.CultureInfo>();
            var dateTimeFormatsToTest      = new List <System.Globalization.CultureInfo>();
            var separationStrategiesToTest = new List <IAsciiSeparationStrategy>();

            // all number formats to test
            if (null != importOptions.NumberFormatCulture)
            {
                numberFormatsToTest.Add(importOptions.NumberFormatCulture);
            }
            else
            {
                numberFormatsToTest.AddRange(analysisOptions.NumberFormatsToTest);
                if (0 == numberFormatsToTest.Count)
                {
                    numberFormatsToTest.Add(System.Globalization.CultureInfo.InvariantCulture);
                }
            }

            // all DateTime formats to test
            if (null != importOptions.DateTimeFormatCulture)
            {
                dateTimeFormatsToTest.Add(importOptions.DateTimeFormatCulture);
            }
            else
            {
                dateTimeFormatsToTest.AddRange(analysisOptions.DateTimeFormatsToTest);
                if (0 == dateTimeFormatsToTest.Count)
                {
                    dateTimeFormatsToTest.Add(System.Globalization.CultureInfo.InvariantCulture);
                }
            }

            // all separation strategies to test
            if (importOptions.SeparationStrategy != null) // if a separation strategy is given use only this
            {
                separationStrategiesToTest.Add(importOptions.SeparationStrategy);
            }
            else // no separation strategy given - we include the possible strategies here
            {
                if (_globalStructure.ContainsTabs)
                {
                    separationStrategiesToTest.Add(new SingleCharSeparationStrategy('\t'));
                }
                if (_globalStructure.ContainsCommas)
                {
                    separationStrategiesToTest.Add(new SingleCharSeparationStrategy(','));
                }
                if (_globalStructure.ContainsSemicolons)
                {
                    separationStrategiesToTest.Add(new SingleCharSeparationStrategy(';'));
                }
                if (_globalStructure.FixedBoundaries != null)
                {
                    if (_globalStructure.RecognizedTabSize == 1)
                    {
                        separationStrategiesToTest.Add(new FixedColumnWidthWithoutTabSeparationStrategy(_globalStructure.FixedBoundaries));
                    }
                    else
                    {
                        separationStrategiesToTest.Add(new FixedColumnWidthWithTabSeparationStrategy(_globalStructure.FixedBoundaries, _globalStructure.RecognizedTabSize));
                    }
                }
                if (separationStrategiesToTest.Count == 0)
                {
                    separationStrategiesToTest.Add(new SkipWhiteSpaceSeparationStrategy());
                }

                separationStrategiesToTest.Add(new SingleLineSeparationStrategy()); // this separation strategy must always be considered
            }

            // make a full outer join of all three categories
            var optionsToTest = new HashSet <AsciiLineAnalysisOption>();

            foreach (var s in separationStrategiesToTest)
            {
                foreach (var n in numberFormatsToTest)
                {
                    foreach (var d in dateTimeFormatsToTest)
                    {
                        optionsToTest.Add(new AsciiLineAnalysisOption(s, n, d));
                    }
                }
            }

            // remove all those keys where the char of the single char separation strategy is equal to the number format's decimal separator
            foreach (AsciiLineAnalysisOption k in optionsToTest.ToArray())
            {
                if (
                    (k.SeparationStrategy is SingleCharSeparationStrategy) &&
                    (((SingleCharSeparationStrategy)k.SeparationStrategy).SeparatorChar.ToString() == k.NumberFormat.NumberFormat.NumberDecimalSeparator)
                    )
                {
                    optionsToTest.Remove(k);
                }
            }

            _lineAnalysisOptionsToTest = new List <AsciiLineAnalysisOption>(optionsToTest);
        }
Ejemplo n.º 15
0
		/// <summary>
		/// Initializes  an instance of <see cref="AsciiDocumentAnalysisOptions"/> with the default system values.
		/// </summary>
		/// <param name="options">The options.</param>
		protected static void InitializeDefaultSystemValues(AsciiDocumentAnalysisOptions options)
		{
			InitializeWithCultures(
			options,
			System.Globalization.CultureInfo.InvariantCulture,
			System.Globalization.CultureInfo.CurrentCulture,
			System.Globalization.CultureInfo.CurrentUICulture,
			System.Globalization.CultureInfo.InstalledUICulture
			);
		}
Ejemplo n.º 16
0
        /// <summary>
        /// Analyzes the first <code>nLines</code> of the ascii stream.
        /// </summary>
        /// <param name="importOptions">The import options. Some of the field can already be filled with useful values. Since it is not neccessary to determine the value of those known fields, the analysis will be run faster then.</param>
        /// <param name="stream">The ascii stream to analyze.</param>
        /// <param name="analysisOptions">Options that specify how many lines are analyzed, and what number formats and date/time formats will be tested.</param>
        /// <returns>Import options that can be used in a following step to read in the ascii stream. If the stream contains no data, the returned import options will be not fully specified.
        /// The same instance is returned as given by the parameter <paramref name="importOptions"/>. If <paramref name="importOptions"/> was <c>null</c>, a new instance is created.</returns>
        public static AsciiImportOptions Analyze(AsciiImportOptions importOptions, System.IO.Stream stream, AsciiDocumentAnalysisOptions analysisOptions)
        {
            if (importOptions == null)
            {
                importOptions = new AsciiImportOptions();
            }

            var analysis = new AsciiDocumentAnalysis();

            analysis.InternalAnalyze(importOptions, stream, analysisOptions);
            return(importOptions);
        }
Ejemplo n.º 17
0
		/// <summary>
		/// Initializes  an instance of <see cref="AsciiDocumentAnalysisOptions"/> with the default system values.
		/// </summary>
		/// <param name="options">The options.</param>
		/// <param name="cultures">The cultures to test.</param>
		protected static void InitializeWithCultures(AsciiDocumentAnalysisOptions options, params CultureInfo[] cultures)
		{
			options._numberOfLinesToAnalyze = 30;
			options._numberFormatsToTest.Clear();
			options._dateTimeFormatsToTest.Clear();

			foreach (var culture in cultures)
			{
				options._numberFormatsToTest.Add(culture);
				options._dateTimeFormatsToTest.Add(culture);
			}
		}
Ejemplo n.º 18
0
		/// <summary>
		/// Shows the ASCII analysis dialog.
		/// </summary>
		/// <param name="fileName">Name of the file to analyze.</param>
		/// <param name="importOptions">On return, contains the ASCII import options the user has confirmed.</param>
		/// <param name="analysisOptions">Options that specify how many lines are analyzed, and what number formats and date/time formats will be tested.</param>
		/// <returns><c>True</c> if the user confirms this dialog (clicks OK). False if the user cancels this dialog.</returns>
		public static bool ShowAsciiImportOptionsDialog(string fileName, AsciiDocumentAnalysisOptions analysisOptions, out AsciiImportOptions importOptions)
		{
			importOptions = new AsciiImportOptions();

			using (FileStream str = AsciiImporter.GetAsciiInputFileStream(fileName))
			{
				importOptions = AsciiDocumentAnalysis.Analyze(new AsciiImportOptions(), str, analysisOptions);
				object[] args = new object[] { importOptions, str };
				var controller = (Altaxo.Gui.IMVCAController)Current.Gui.GetControllerAndControl(args, typeof(Altaxo.Gui.IMVCAController), Gui.UseDocument.Directly);

				if (!Current.Gui.ShowDialog(controller, "Choose Ascii import options"))
					return false;

				importOptions = (AsciiImportOptions)controller.ModelObject;
				return true;
			}
		}
Ejemplo n.º 19
0
 /// <summary>
 /// Initializes a new instance of the <see cref="AsciiDocumentAnalysisOptions"/> class with values from another instance (copy constructor).
 /// </summary>
 /// <param name="from">Instance to copy the values from.</param>
 public AsciiDocumentAnalysisOptions(AsciiDocumentAnalysisOptions from)
 {
     CopyFrom(from);
 }