Example #1
0
		public static void ImportAsciiToMultipleWorksheets(string[] filenames, AsciiImportOptions importOptions)
		{
			if (null != importOptions)
				AsciiImporter.ImportFilesIntoSeparateNewTables(Main.ProjectFolder.RootFolder, filenames, true, importOptions);
			else
				AsciiImporter.ImportFilesIntoSeparateNewTables(Main.ProjectFolder.RootFolder, filenames, true, true);
		}
Example #2
0
        object ICloneable.Clone()
        {
            var result = new AsciiImportOptions();

            result.CopyFrom(this);
            return(result);
        }
Example #3
0
		public static void ImportAsciiToSingleWorksheetVertically(this DataTable dataTable, string[] filenames, AsciiImportOptions importOptions)
		{
			if (null != importOptions)
				AsciiImporter.ImportFromMultipleAsciiFilesVertically(dataTable, filenames, true, importOptions);
			else
				AsciiImporter.ImportFromMultipleAsciiFilesVertically(dataTable, filenames, true, true);
		}
Example #4
0
        public AsciiImportOptions Clone()
        {
            var result = new AsciiImportOptions();

            result.CopyFrom(this);
            return(result);
        }
Example #5
0
		/// <summary>
		/// Analyzes the first <code>nLines</code> of the ascii stream.
		/// </summary>
		/// <param name="importOptions">The import options. Some of the field can already be filled with useful values. Since it is not neccessary to determine the value of those known fields, the analysis will be run faster then.</param>
		/// <param name="stream">The ascii stream to analyze.</param>
		/// <param name="analysisOptions">Options that specify how many lines are analyzed, and what number formats and date/time formats will be tested.</param>
		/// <returns>Import options that can be used in a following step to read in the ascii stream. If the stream contains no data, the returned import options will be not fully specified.
		/// The same instance is returned as given by the parameter <paramref name="importOptions"/>. If <paramref name="importOptions"/> was <c>null</c>, a new instance is created.</returns>
		public static AsciiImportOptions Analyze(AsciiImportOptions importOptions, System.IO.Stream stream, AsciiDocumentAnalysisOptions analysisOptions)
		{
			if (importOptions == null)
				importOptions = new AsciiImportOptions();

			var analysis = new AsciiDocumentAnalysis();

			analysis.InternalAnalyze(importOptions, stream, analysisOptions);
			return importOptions;
		}
Example #6
0
        protected override IEnumerable <Main.DocumentNodeAndName> GetDocumentNodeChildrenWithName()
        {
            if (null != _asciiImportOptions)
            {
                yield return(new Main.DocumentNodeAndName(_asciiImportOptions, () => _asciiImportOptions = null, "AsciiImportOptions"));
            }

            if (null != _importOptions)
            {
                yield return(new Main.DocumentNodeAndName(_importOptions, () => _importOptions = null, "ImportOptions"));
            }
        }
Example #7
0
        /// <summary>
        /// Analyzes the first <code>nLines</code> of the ascii stream.
        /// </summary>
        /// <param name="importOptions">The import options. Some of the field can already be filled with useful values. Since it is not neccessary to determine the value of those known fields, the analysis will be run faster then.</param>
        /// <param name="stream">The ascii stream to analyze.</param>
        /// <param name="analysisOptions">Options that specify how many lines are analyzed, and what number formats and date/time formats will be tested.</param>
        /// <returns>Import options that can be used in a following step to read in the ascii stream. If the stream contains no data, the returned import options will be not fully specified.
        /// The same instance is returned as given by the parameter <paramref name="importOptions"/>. If <paramref name="importOptions"/> was <c>null</c>, a new instance is created.</returns>
        public static AsciiImportOptions Analyze(AsciiImportOptions importOptions, System.IO.Stream stream, AsciiDocumentAnalysisOptions analysisOptions)
        {
            if (importOptions == null)
            {
                importOptions = new AsciiImportOptions();
            }

            var analysis = new AsciiDocumentAnalysis();

            analysis.InternalAnalyze(importOptions, stream, analysisOptions);
            return(importOptions);
        }
Example #8
0
 public AsciiImportDataSource(IEnumerable <string> fileNames, AsciiImportOptions options)
 {
     _asciiFiles = new List <AbsoluteAndRelativeFileName>();
     foreach (var fileName in fileNames)
     {
         _asciiFiles.Add(new AbsoluteAndRelativeFileName(fileName));
     }
     ChildCopyToMember(ref _asciiImportOptions, options);
     _importOptions = new DataSourceImportOptions()
     {
         ParentObject = this
     };
 }
Example #9
0
        public void ImportAscii(AsciiImportOptions impopt, Altaxo.Data.DataTable table)
        {
            string sLine;

            stream.Position = 0; // rewind the stream to the beginning
            System.IO.StreamReader           sr      = new System.IO.StreamReader(stream, System.Text.Encoding.Default, true);
            Altaxo.Data.DataColumnCollection newcols = new Altaxo.Data.DataColumnCollection();

            Altaxo.Data.DataColumnCollection newpropcols = new Altaxo.Data.DataColumnCollection();

            // in case a structure is provided, allocate already the columsn

            if (null != impopt.recognizedStructure)
            {
                for (int i = 0; i < impopt.recognizedStructure.Count; i++)
                {
                    if (impopt.recognizedStructure[i] == typeof(Double))
                    {
                        newcols.Add(new Altaxo.Data.DoubleColumn());
                    }
                    else if (impopt.recognizedStructure[i] == typeof(DateTime))
                    {
                        newcols.Add(new Altaxo.Data.DateTimeColumn());
                    }
                    else if (impopt.recognizedStructure[i] == typeof(string))
                    {
                        newcols.Add(new Altaxo.Data.TextColumn());
                    }
                    else
                    {
                        newcols.Add(new Altaxo.Data.DBNullColumn());
                    };
                }
            }

            // add also additional property columns if not enough there
            if (impopt.nMainHeaderLines > 1) // if there are more than one header line, allocate also property columns
            {
                int toAdd = impopt.nMainHeaderLines - 1;
                for (int i = 0; i < toAdd; i++)
                {
                    newpropcols.Add(new Data.TextColumn());
                }
            }

            // if decimal separator statistics is provided by impopt, create a number format info object
            System.Globalization.NumberFormatInfo numberFormatInfo = null;
            if (impopt.m_DecimalSeparatorCommaCount > 0 || impopt.m_DecimalSeparatorDotCount > 0)
            {
                numberFormatInfo = (System.Globalization.NumberFormatInfo)System.Globalization.NumberFormatInfo.CurrentInfo.Clone();

                // analyse the statistics
                if (impopt.m_DecimalSeparatorCommaCount > impopt.m_DecimalSeparatorDotCount) // the comma is the decimal separator
                {
                    numberFormatInfo.NumberDecimalSeparator = ",";
                    if (numberFormatInfo.NumberGroupSeparator == numberFormatInfo.NumberDecimalSeparator)
                    {
                        numberFormatInfo.NumberGroupSeparator = ""; // in case now the group separator is also comma, remove the group separator
                    }
                }
                else if (impopt.m_DecimalSeparatorCommaCount < impopt.m_DecimalSeparatorDotCount) // the comma is the decimal separator
                {
                    numberFormatInfo.NumberDecimalSeparator = ".";
                    if (numberFormatInfo.NumberGroupSeparator == numberFormatInfo.NumberDecimalSeparator)
                    {
                        numberFormatInfo.NumberGroupSeparator = ""; // in case now the group separator is also comma, remove the group separator
                    }
                }
            }
            else // no decimal separator statistics is provided, so retrieve the numberFormatInfo object from the program options or from the current thread
            {
                numberFormatInfo = System.Globalization.NumberFormatInfo.CurrentInfo;
            }


            char [] splitchar = new char[] { impopt.cDelimiter };

            // first of all, read the header if existent
            for (int i = 0; i < impopt.nMainHeaderLines; i++)
            {
                sLine = sr.ReadLine();
                if (null == sLine)
                {
                    break;
                }

                string[] substr = sLine.Split(splitchar);
                int      cnt    = substr.Length;
                for (int k = 0; k < cnt; k++)
                {
                    if (substr[k].Length == 0)
                    {
                        continue;
                    }

                    if (k >= newcols.ColumnCount)
                    {
                        continue;
                    }

                    if (i == 0) // is it the column name line
                    {
                        newcols.SetColumnName(k, substr[k]);
                    }
                    else // this are threated as additional properties
                    {
                        ((Data.DataColumn)newpropcols[i - 1])[k] = substr[k]; // set the properties
                    }
                }
            }

            for (int i = 0; true; i++)
            {
                sLine = sr.ReadLine();
                if (null == sLine)
                {
                    break;
                }

                string[] substr = sLine.Split(splitchar);
                int      cnt    = Math.Min(substr.Length, newcols.ColumnCount);
                for (int k = 0; k < cnt; k++)
                {
                    if (substr[k].Length == 0)
                    {
                        continue;
                    }

                    if (newcols[k] is Altaxo.Data.DoubleColumn)
                    {
                        try { ((Altaxo.Data.DoubleColumn)newcols[k])[i] = System.Convert.ToDouble(substr[k], numberFormatInfo); }
                        catch {}
                    }
                    else if (newcols[k] is Altaxo.Data.DateTimeColumn)
                    {
                        try { ((Altaxo.Data.DateTimeColumn)newcols[k])[i] = System.Convert.ToDateTime(substr[k]); }
                        catch {}
                    }
                    else if (newcols[k] is Altaxo.Data.TextColumn)
                    {
                        ((Altaxo.Data.TextColumn)newcols[k])[i] = substr[k];
                    }
                    else if (null == newcols[k] || newcols[k] is Altaxo.Data.DBNullColumn)
                    {
                        bool     bConverted  = false;
                        double   val         = Double.NaN;
                        DateTime valDateTime = DateTime.MinValue;

                        try
                        {
                            val        = System.Convert.ToDouble(substr[k]);
                            bConverted = true;
                        }
                        catch
                        {
                        }
                        if (bConverted)
                        {
                            Altaxo.Data.DoubleColumn newc = new Altaxo.Data.DoubleColumn();
                            newc[i] = val;
                            newcols.Replace(k, newc);
                        }
                        else
                        {
                            try
                            {
                                valDateTime = System.Convert.ToDateTime(substr[k]);
                                bConverted  = true;
                            }
                            catch
                            {
                            }
                            if (bConverted)
                            {
                                Altaxo.Data.DateTimeColumn newc = new Altaxo.Data.DateTimeColumn();
                                newc[i] = valDateTime;

                                newcols.Replace(k, newc);
                            }
                            else
                            {
                                Altaxo.Data.TextColumn newc = new Altaxo.Data.TextColumn();
                                newc[i] = substr[k];
                                newcols.Replace(k, newc);
                            }
                        } // end outer if null==newcol
                    }
                }         // end of for all cols
            }             // end of for all lines

            // insert the new columns or replace the old ones
            table.Suspend();
            bool tableWasEmptyBefore = table.DataColumns.ColumnCount == 0;

            for (int i = 0; i < newcols.ColumnCount; i++)
            {
                if (newcols[i] is Altaxo.Data.DBNullColumn) // if the type is undefined, use a new DoubleColumn
                {
                    table.DataColumns.CopyOrReplaceOrAdd(i, new Altaxo.Data.DoubleColumn(), newcols.GetColumnName(i));
                }
                else
                {
                    table.DataColumns.CopyOrReplaceOrAdd(i, newcols[i], newcols.GetColumnName(i));
                }

                // set the first column as x-column if the table was empty before, and there are more than one column
                if (i == 0 && tableWasEmptyBefore && newcols.ColumnCount > 1)
                {
                    table.DataColumns.SetColumnKind(0, Altaxo.Data.ColumnKind.X);
                }
            } // end for loop

            // add the property columns
            for (int i = 0; i < newpropcols.ColumnCount; i++)
            {
                table.PropCols.CopyOrReplaceOrAdd(i, newpropcols[i], newpropcols.GetColumnName(i));
            }
            table.Resume();
        } // end of function ImportAscii
Example #10
0
		/// <summary>
		/// Shows the ASCII analysis dialog.
		/// </summary>
		/// <param name="fileName">Name of the file to analyze.</param>
		/// <param name="importOptions">On return, contains the ASCII import options the user has confirmed.</param>
		/// <param name="analysisOptions">Options that specify how many lines are analyzed, and what number formats and date/time formats will be tested.</param>
		/// <returns><c>True</c> if the user confirms this dialog (clicks OK). False if the user cancels this dialog.</returns>
		public static bool ShowAsciiImportOptionsDialog(string fileName, AsciiDocumentAnalysisOptions analysisOptions, out AsciiImportOptions importOptions)
		{
			importOptions = new AsciiImportOptions();

			using (FileStream str = AsciiImporter.GetAsciiInputFileStream(fileName))
			{
				importOptions = AsciiDocumentAnalysis.Analyze(new AsciiImportOptions(), str, analysisOptions);
				object[] args = new object[] { importOptions, str };
				var controller = (Altaxo.Gui.IMVCAController)Current.Gui.GetControllerAndControl(args, typeof(Altaxo.Gui.IMVCAController), Gui.UseDocument.Directly);

				if (!Current.Gui.ShowDialog(controller, "Choose Ascii import options"))
					return false;

				importOptions = (AsciiImportOptions)controller.ModelObject;
				return true;
			}
		}
Example #11
0
		public AsciiImportDataSource(string fileName, AsciiImportOptions options)
			: this(new string[] { fileName }, options)
		{
		}
Example #12
0
		protected override IEnumerable<Main.DocumentNodeAndName> GetDocumentNodeChildrenWithName()
		{
			if (null != _asciiImportOptions)
				yield return new Main.DocumentNodeAndName(_asciiImportOptions, () => _asciiImportOptions = null, "AsciiImportOptions");

			if (null != _importOptions)
				yield return new Main.DocumentNodeAndName(_importOptions, () => _importOptions = null, "ImportOptions");
		}
Example #13
0
		/// <summary>
		/// Imports a couple of ASCII files into one (!) table. The first column of each file is considered to be the x-column, and if they match another x-column, the newly imported columns will get the same column group.
		/// </summary>
		/// <param name="fileNames">An array of filenames to import.</param>
		/// <param name="sortFileNames">If <c>true</c>, the fileNames are sorted before usage in ascending order using the default string comparator.</param>
		/// <param name="importOptions">Options used to import the Ascii files. This parameter can be <c>null</c>. In this case the value on return is the determined import options of the first file (if <paramref name="determineImportOptionsSeparatelyForEachFile"/> is <c>false</c>) or of the last file (if <paramref name="determineImportOptionsSeparatelyForEachFile"/> is <c>true</c>).</param>
		/// <param name="determineImportOptionsSeparatelyForEachFile">
		/// If <c>true</c>, the import options are determined for each file separately. In this case the provided parameter <paramref name="importOptions"/> is ignored, but on return it contains the importOptions used to import the last file.
		/// If <c>false</c>, the import options are either provided by the parameter <paramref name="importOptions"/> (if not null and fully specified), or during import of the first file. The so determined importOptions are then used to import all other files.
		/// </param>
		/// <param name="errors">Null if no error occurs, or an error description.</param>
		/// <returns>A newly created table (not included in the project) containing the imported data.</returns>
		private static DataTable InternalImportMultipleFilesHorizontallyIntoNewTable(IEnumerable<string> fileNames, bool sortFileNames, ref AsciiImportOptions importOptions, bool determineImportOptionsSeparatelyForEachFile, out string errors)
		{
			DataColumn xcol = null;
			DataColumn xvalues;

			System.Text.StringBuilder errorList = new System.Text.StringBuilder();

			var dataTable = new DataTable(); // destination table

			int lastColumnGroup = 0;

			// add also a property column named "FilePath" if not existing so far
			TextColumn filePathPropCol = (TextColumn)dataTable.PropCols.EnsureExistence("FilePath", typeof(TextColumn), ColumnKind.Label, 0);
			filePathPropCol.Clear();

			if (sortFileNames)
				fileNames = fileNames.OrderBy(x => x);

			var clearedColumns = new HashSet<DataColumn>();

			foreach (string fileName in fileNames)
			{
				DataTable srcTable = new DataTable();
				if (determineImportOptionsSeparatelyForEachFile)
					ImportFromAsciiFile(srcTable, fileName);
				else if (null != importOptions && importOptions.IsFullySpecified)
					ImportFromAsciiFile(srcTable, fileName, importOptions);
				else
					ImportFromAsciiFile(srcTable, fileName, out importOptions);

				if (srcTable.DataColumns.ColumnCount == 0)
					continue;

				xvalues = srcTable.DataColumns[0];
				bool bMatchsXColumn = false;

				// first look if our default xcolumn matches the xvalues
				if (null != xcol)
					bMatchsXColumn = ValuesMatch(xvalues, xcol);

				// if no match, then consider all xcolumns from right to left, maybe some fits
				if (!bMatchsXColumn)
				{
					for (int ncol = dataTable.DataColumns.ColumnCount - 1; ncol >= 0; ncol--)
					{
						if ((ColumnKind.X == dataTable.DataColumns.GetColumnKind(ncol)) &&
							(ValuesMatch(xvalues, dataTable.DataColumns[ncol]))
							)
						{
							xcol = dataTable.DataColumns[ncol];
							lastColumnGroup = dataTable.DataColumns.GetColumnGroup(xcol);
							bMatchsXColumn = true;
							break;
						}
					}
				}

				// create a new x column if the last one does not match
				if (!bMatchsXColumn)
				{
					xcol = (DataColumn)xvalues.Clone();
					lastColumnGroup = dataTable.DataColumns.GetUnusedColumnGroupNumber();
					dataTable.DataColumns.Add(xcol, srcTable.DataColumns.GetColumnName(0), ColumnKind.X, lastColumnGroup);
				}

				for (int i = 1; i < srcTable.DataColumns.ColumnCount; i++)
				{
					// now add the y-values
					DataColumn ycol = (DataColumn)srcTable.DataColumns[i].Clone();
					dataTable.DataColumns.Add(ycol,
					dataTable.DataColumns.FindUniqueColumnName(srcTable.DataColumns.GetColumnName(i)),
						ColumnKind.V,
						lastColumnGroup);

					// now set the file name property cell
					int destcolnumber = dataTable.DataColumns.GetColumnNumber(ycol);
					filePathPropCol[destcolnumber] = fileName;

					// now set the imported property cells
					for (int s = 0; s < srcTable.PropCols.ColumnCount; s++)
					{
						DataColumn dest = dataTable.PropCols.EnsureExistence(srcTable.PropCols.GetColumnName(s), srcTable.PropCols[s].GetType(), ColumnKind.V, 0);
						dest.SetValueAt(destcolnumber, srcTable.PropCols[s][i]);
					}
				}
			} // foreache file

			errors = errorList.Length == 0 ? null : errorList.ToString();
			return dataTable;
		}
Example #14
0
		object ICloneable.Clone()
		{
			var result = new AsciiImportOptions();
			result.CopyFrom(this);
			return result;
		}
Example #15
0
		/// <summary>
		/// Imports from an ASCII file into an existing table.
		/// </summary>
		/// <param name="dataTable">The data table to import into.</param>
		/// <param name="fileName">File name of the file to import.</param>
		/// <param name="importOptions">On return, contains the import options that were used to import the file.</param>
		/// <exception cref="System.ArgumentNullException">
		/// Argument dataTable is null
		/// or
		/// Argument fileName is null or empty
		/// </exception>
		public static void ImportFromAsciiFile(this DataTable dataTable, string fileName, out AsciiImportOptions importOptions)
		{
			if (null == dataTable)
				throw new ArgumentNullException("Argument dataTable is null");
			if (string.IsNullOrEmpty(fileName))
				throw new ArgumentNullException("Argument fileName is null or empty");

			using (var myStream = GetAsciiInputFileStream(fileName))
			{
				ImportFromAsciiStream(dataTable, myStream, FileUrlStart + fileName, out importOptions);
				myStream.Close();
			}

			// finally set or change the data source of the table
			AddOrUpdateAsciiImportDataSource(dataTable, new string[] { fileName }, importOptions);
		}
Example #16
0
		/// <summary>
		/// Imports from an ASCII text provided as string into an existing table.
		/// </summary>
		/// <param name="dataTable">The data table to import into.</param>
		/// <param name="asciiText">The Ascii text that is to be imported.</param>
		/// <param name="importOptions">On return, contains the import options that were used to import the Ascii text.</param>
		public static void ImportFromAsciiText(this DataTable dataTable, string asciiText, out AsciiImportOptions importOptions)
		{
			if (null == dataTable)
				throw new ArgumentNullException("Argument dataTable is null");
			if (null == asciiText)
				throw new ArgumentNullException("Argument asciiText is null");

			using (var memstream = new MemoryStream())
			{
				using (var textwriter = new StreamWriter(memstream, System.Text.Encoding.UTF8, 512))
				{
					textwriter.Write(asciiText);
					textwriter.Flush();  // do not close the textwriter here.  TODO NET45 we can close textwriter here if we provide true in as 4th argument in the constructor

					memstream.Position = 0;
					ImportFromAsciiStream(dataTable, memstream, "Ascii text", out importOptions);
				}
			}
		}
Example #17
0
		/// <summary>
		/// Imports Ascii data from a stream into the data table.
		/// </summary>
		/// <param name="dataTable">The table where to import into.</param>
		/// <param name="stream">The stream to import from.</param>
		/// <param name="streamOriginHint">Designates a short hint where the provided stream originates from. Can be <c>Null</c> if the origin is unknown.</param>
		/// <param name="importOptions">On return, contains the recognized import options that were used to import from the provided stream.</param>
		public static void ImportFromAsciiStream(this DataTable dataTable, Stream stream, string streamOriginHint, out AsciiImportOptions importOptions)
		{
			importOptions = null;
			InternalImportFromAsciiStream(dataTable, stream, streamOriginHint, ref importOptions);
		}
Example #18
0
		} // end of function ImportAscii

		/// <summary>
		/// Imports an Ascii stream into a table. The import options have to be known already.
		/// </summary>
		/// <param name="dataTable">The table into which to import.</param>
		/// <param name="stream">The stream to read from.</param>
		/// <param name="streamOriginHint">Stream origin hint. If the stream was opened from a file, you should prepend <see cref=" FileUrlStart"/> to the file name.</param>
		/// <param name="importOptions">The Ascii import options. This parameter must not be <c>null</c>. If the provided options are not fully specified, it is tried to analyse the stream to get fully specified options.</param>
		/// <exception cref="System.ArgumentNullException">
		/// Argument importOptions is null
		/// or
		/// Argument table is null
		/// </exception>
		/// <exception cref="System.ArgumentException">Argument importOptions: importOptions must be fully specified, i.e. all elements of importOptions must be valid. Please run a document analysis in-before to get appropriate values.</exception>
		/// <exception cref="System.ArgumentOutOfRangeException">
		/// Unconsidered AsciiColumnType:  + impopt.RecognizedStructure[i].ToString()
		/// or
		/// Unknown switch case:  + impopt.HeaderLinesDestination.ToString()
		/// </exception>
		public static void ImportFromAsciiStream(this DataTable dataTable, Stream stream, string streamOriginHint, AsciiImportOptions importOptions)
		{
			if (importOptions == null)
				throw new ArgumentNullException("Argument importOptions is null");
			if (null == dataTable)
				throw new ArgumentNullException("Argument table is null");

			InternalImportFromAsciiStream(dataTable, stream, streamOriginHint, ref importOptions);
		}
Example #19
0
		/// <summary>
		/// Adds (if not already present) or updates (if present) the ASCII import data source for the provided table. This can also mean that the data source of the table is set to null,
		/// for instance if the provided streamOriginHint is not a file Url.
		/// </summary>
		/// <param name="dataTable">The provided data table on which to set the import data source..</param>
		/// <param name="fileNames">The file names of the files that were imported.</param>
		/// <param name="importOptions">The Ascii import options that were used to import the file.</param>
		private static void AddOrUpdateAsciiImportDataSource(DataTable dataTable, IEnumerable<string> fileNames, AsciiImportOptions importOptions)
		{
			if (null == fileNames)
				return;
			if (!object.ReferenceEquals(dataTable.ParentObject, Current.Project.DataTableCollection))
				return;

			var dataSource = dataTable.DataSource as AsciiImportDataSource;

			if (null != dataSource)
			{
				dataSource.SourceFileNames = fileNames;
				dataSource.AsciiImportOptions = importOptions;
			}
			else
			{
				dataTable.DataSource = new AsciiImportDataSource(fileNames, importOptions);
			}
		}
Example #20
0
		/// <summary>
		/// Imports ascii from a memory stream into a table. Returns null (!) if nothing is imported.
		/// </summary>
		/// <param name="filename">The file name of the file from which to import.</param>
		/// <param name="separatorChar">The character used to separate the columns</param>
		/// <returns>The table representation of the imported text, or null if nothing is imported.</returns>
		public static DataTable ImportFileIntoNewTable(string filename, char separatorChar)
		{
			if (string.IsNullOrEmpty(filename))
				throw new ArgumentNullException("filename is null or empty");

			var defaultImportOptions = new AsciiImportOptions();
			defaultImportOptions.SeparationStrategy = new SingleCharSeparationStrategy(separatorChar);
			using (var stream = GetAsciiInputFileStream(filename))
			{
				return InternalImportStreamIntoNewTable(stream, FileUrlStart + filename, defaultImportOptions);
			}
		}
Example #21
0
		/// <summary>
		/// Imports an Ascii stream into a table. The import options have to be known already.
		/// </summary>
		/// <param name="dataTable">The table into which to import.</param>
		/// <param name="stream">The stream to read from.</param>
		/// <param name="streamOriginHint">Stream origin hint. If the stream was opened from a file, you should prepend <see cref=" FileUrlStart"/> to the file name.</param>
		/// <param name="importOptions">The Ascii import options. This parameter can be null, or the options can be not fully specified. In this case the method tries to determine the import options by analyzing the stream.</param>
		/// <exception cref="System.ArgumentNullException">
		/// Argument importOptions is null
		/// or
		/// Argument table is null
		/// </exception>
		/// <exception cref="System.ArgumentException">Argument importOptions: importOptions must be fully specified, i.e. all elements of importOptions must be valid. Please run a document analysis in-before to get appropriate values.</exception>
		/// <exception cref="System.ArgumentOutOfRangeException">
		/// Unconsidered AsciiColumnType:  + impopt.RecognizedStructure[i].ToString()
		/// or
		/// Unknown switch case:  + impopt.HeaderLinesDestination.ToString()
		/// </exception>
		private static void InternalImportFromAsciiStream(this DataTable dataTable, Stream stream, string streamOriginHint, ref AsciiImportOptions importOptions)
		{
			if (null == importOptions || !importOptions.IsFullySpecified)
			{
				var analysisOptions = GetDefaultAsciiDocumentAnalysisOptions(dataTable);
				importOptions = AsciiDocumentAnalysis.Analyze(importOptions ?? new AsciiImportOptions(), stream, analysisOptions);
			}

			if (null == importOptions)
				throw new InvalidDataException("Import options could not be determined from the data stream. Possibly, the data stream is empty or it is not an Ascii data stream");
			if (!importOptions.IsFullySpecified)
				throw new InvalidDataException("Import options could not be fully determined from the data stream. Possibly, the data stream is empty or it is not an Ascii data stream");

			string sLine;
			stream.Position = 0; // rewind the stream to the beginning
			StreamReader sr = new StreamReader(stream, System.Text.Encoding.Default, true);
			DataColumnCollection newcols = new DataColumnCollection();

			DataColumnCollection newpropcols = new DataColumnCollection();

			// in case a structure is provided, allocate already the columsn

			if (null != importOptions.RecognizedStructure)
			{
				for (int i = 0; i < importOptions.RecognizedStructure.Count; i++)
				{
					switch (importOptions.RecognizedStructure[i].ColumnType)
					{
						case AsciiColumnType.Double:
							newcols.Add(new DoubleColumn());
							break;

						case AsciiColumnType.Int64:
							newcols.Add(new DoubleColumn());
							break;

						case AsciiColumnType.DateTime:
							newcols.Add(new DateTimeColumn());
							break;

						case AsciiColumnType.Text:
							newcols.Add(new TextColumn());
							break;

						case AsciiColumnType.DBNull:
							newcols.Add(new DBNullColumn());
							break;

						default:
							throw new ArgumentOutOfRangeException("Unconsidered AsciiColumnType: " + importOptions.RecognizedStructure[i].ToString());
					}
				}
			}

			// add also additional property columns if not enough there
			if (importOptions.NumberOfMainHeaderLines.HasValue && importOptions.NumberOfMainHeaderLines.Value > 0) // if there are more than one header line, allocate also property columns
			{
				int toAdd = importOptions.NumberOfMainHeaderLines.Value;
				for (int i = 0; i < toAdd; i++)
					newpropcols.Add(new Data.TextColumn());
			}

			// if decimal separator statistics is provided by impopt, create a number format info object
			System.Globalization.NumberFormatInfo numberFormatInfo = importOptions.NumberFormatCulture.NumberFormat;
			System.Globalization.DateTimeFormatInfo dateTimeFormat = importOptions.DateTimeFormatCulture.DateTimeFormat;

			var notesHeader = new System.Text.StringBuilder();
			notesHeader.Append("Imported");
			if (!string.IsNullOrEmpty(streamOriginHint))
				notesHeader.AppendFormat(" from {0}", streamOriginHint);
			notesHeader.AppendFormat(" at {0}", DateTime.Now);
			notesHeader.AppendLine();

			// first of all, read the header if existent
			for (int i = 0; i < importOptions.NumberOfMainHeaderLines; i++)
			{
				sLine = sr.ReadLine();
				if (null == sLine) break;

				var tokens = new List<string>(importOptions.SeparationStrategy.GetTokens(sLine));
				if (i == importOptions.IndexOfCaptionLine) // is it the column name line
				{
					for (int k = 0; k < tokens.Count; ++k)
					{
						var ttoken = tokens[k].Trim();
						if (!string.IsNullOrEmpty(ttoken))
						{
							string newcolname = newcols.FindUniqueColumnName(ttoken);
							newcols.SetColumnName(k, newcolname);
						}
					}
					continue;
				}

				switch (importOptions.HeaderLinesDestination)
				{
					case AsciiHeaderLinesDestination.Ignore:
						break;

					case AsciiHeaderLinesDestination.ImportToNotes:
						AppendLineToTableNotes(notesHeader, sLine);
						break;

					case AsciiHeaderLinesDestination.ImportToProperties:
						FillPropertyColumnWithTokens(newpropcols[i], tokens);
						break;

					case AsciiHeaderLinesDestination.ImportToPropertiesOrNotes:
						if (tokens.Count == importOptions.RecognizedStructure.Count)
							FillPropertyColumnWithTokens(newpropcols[i], tokens);
						else
							AppendLineToTableNotes(notesHeader, sLine);
						break;

					case AsciiHeaderLinesDestination.ImportToPropertiesAndNotes:
						FillPropertyColumnWithTokens(newpropcols[i], tokens);
						AppendLineToTableNotes(notesHeader, sLine);
						break;

					default:
						throw new ArgumentOutOfRangeException("Unknown switch case: " + importOptions.HeaderLinesDestination.ToString());
				}
			}

			// now the data lines
			for (int i = 0; true; i++)
			{
				sLine = sr.ReadLine();
				if (null == sLine)
					break;

				int maxcolumns = newcols.ColumnCount;

				int k = -1;
				foreach (string token in importOptions.SeparationStrategy.GetTokens(sLine))
				{
					k++;
					if (k >= maxcolumns)
						break;

					if (string.IsNullOrEmpty(token))
						continue;

					if (newcols[k] is DoubleColumn)
					{
						double val;
						if (double.TryParse(token, System.Globalization.NumberStyles.Any, numberFormatInfo, out val))
							((DoubleColumn)newcols[k])[i] = val;
					}
					else if (newcols[k] is DateTimeColumn)
					{
						DateTime val;
						if (DateTime.TryParse(token, dateTimeFormat, System.Globalization.DateTimeStyles.NoCurrentDateDefault, out val))
							((DateTimeColumn)newcols[k])[i] = val;
					}
					else if (newcols[k] is TextColumn)
					{
						((TextColumn)newcols[k])[i] = token.Trim();
					}
					else if (null == newcols[k] || newcols[k] is DBNullColumn)
					{
						bool bConverted = false;
						double val = Double.NaN;
						DateTime valDateTime = DateTime.MinValue;

						try
						{
							val = System.Convert.ToDouble(token);
							bConverted = true;
						}
						catch
						{
						}
						if (bConverted)
						{
							DoubleColumn newc = new DoubleColumn();
							newc[i] = val;
							newcols.Replace(k, newc);
						}
						else
						{
							try
							{
								valDateTime = System.Convert.ToDateTime(token);
								bConverted = true;
							}
							catch
							{
							}
							if (bConverted)
							{
								DateTimeColumn newc = new DateTimeColumn();
								newc[i] = valDateTime;

								newcols.Replace(k, newc);
							}
							else
							{
								TextColumn newc = new TextColumn();
								newc[i] = token;
								newcols.Replace(k, newc);
							}
						} // end outer if null==newcol
					}
				} // end of for all cols
			} // end of for all lines

			// insert the new columns or replace the old ones
			using (var suspendToken = dataTable.SuspendGetToken())
			{
				bool tableWasEmptyBefore = dataTable.DataColumns.ColumnCount == 0;
				for (int i = 0; i < newcols.ColumnCount; i++)
				{
					if (newcols[i] is DBNullColumn) // if the type is undefined, use a new DoubleColumn
						dataTable.DataColumns.CopyOrReplaceOrAdd(i, new DoubleColumn(), newcols.GetColumnName(i));
					else
						dataTable.DataColumns.CopyOrReplaceOrAdd(i, newcols[i], newcols.GetColumnName(i));

					// set the first column as x-column if the table was empty before, and there are more than one column
					if (i == 0 && tableWasEmptyBefore && newcols.ColumnCount > 1)
						dataTable.DataColumns.SetColumnKind(0, ColumnKind.X);
				} // end for loop

				// add the property columns
				for (int i = 0, j = 0; i < newpropcols.ColumnCount; i++)
				{
					if (newpropcols[i].Count == 0)
						continue;
					dataTable.PropCols.CopyOrReplaceOrAdd(j, newpropcols[i], newpropcols.GetColumnName(i));
					++j;
				}

				dataTable.Notes.Write(notesHeader.ToString());

				suspendToken.Dispose();
			}
		} // end of function ImportAscii
Example #22
0
		private void SetLineAnalysisOptionsToTest(AsciiImportOptions importOptions, AsciiDocumentAnalysisOptions analysisOptions)
		{
			var numberFormatsToTest = new List<System.Globalization.CultureInfo>();
			var dateTimeFormatsToTest = new List<System.Globalization.CultureInfo>();
			var separationStrategiesToTest = new List<IAsciiSeparationStrategy>();

			// all number formats to test
			if (null != importOptions.NumberFormatCulture)
			{
				numberFormatsToTest.Add(importOptions.NumberFormatCulture);
			}
			else
			{
				numberFormatsToTest.AddRange(analysisOptions.NumberFormatsToTest);
				if (0 == numberFormatsToTest.Count)
					numberFormatsToTest.Add(System.Globalization.CultureInfo.InvariantCulture);
			}

			// all DateTime formats to test
			if (null != importOptions.DateTimeFormatCulture)
			{
				dateTimeFormatsToTest.Add(importOptions.DateTimeFormatCulture);
			}
			else
			{
				dateTimeFormatsToTest.AddRange(analysisOptions.DateTimeFormatsToTest);
				if (0 == dateTimeFormatsToTest.Count)
					dateTimeFormatsToTest.Add(System.Globalization.CultureInfo.InvariantCulture);
			}

			// all separation strategies to test
			if (importOptions.SeparationStrategy != null) // if a separation strategy is given use only this
			{
				separationStrategiesToTest.Add(importOptions.SeparationStrategy);
			}
			else // no separation strategy given - we include the possible strategies here
			{
				if (_globalStructure.ContainsTabs)
					separationStrategiesToTest.Add(new SingleCharSeparationStrategy('\t'));
				if (_globalStructure.ContainsCommas)
					separationStrategiesToTest.Add(new SingleCharSeparationStrategy(','));
				if (_globalStructure.ContainsSemicolons)
					separationStrategiesToTest.Add(new SingleCharSeparationStrategy(';'));
				if (_globalStructure.FixedBoundaries != null)
				{
					if (_globalStructure.RecognizedTabSize == 1)
						separationStrategiesToTest.Add(new FixedColumnWidthWithoutTabSeparationStrategy(_globalStructure.FixedBoundaries));
					else
						separationStrategiesToTest.Add(new FixedColumnWidthWithTabSeparationStrategy(_globalStructure.FixedBoundaries, _globalStructure.RecognizedTabSize));
				}
				if (separationStrategiesToTest.Count == 0)
					separationStrategiesToTest.Add(new SkipWhiteSpaceSeparationStrategy());

				separationStrategiesToTest.Add(new SingleLineSeparationStrategy()); // this separation strategy must always be considered
			}

			// make a full outer join of all three categories
			var optionsToTest = new HashSet<AsciiLineAnalysisOption>();
			foreach (var s in separationStrategiesToTest)
				foreach (var n in numberFormatsToTest)
					foreach (var d in dateTimeFormatsToTest)
						optionsToTest.Add(new AsciiLineAnalysisOption(s, n, d));

			// remove all those keys where the char of the single char separation strategy is equal to the number format's decimal separator
			foreach (AsciiLineAnalysisOption k in optionsToTest.ToArray())
			{
				if (
					(k.SeparationStrategy is SingleCharSeparationStrategy) &&
					(((SingleCharSeparationStrategy)k.SeparationStrategy).SeparatorChar.ToString() == k.NumberFormat.NumberFormat.NumberDecimalSeparator)
					)
					optionsToTest.Remove(k);
			}

			_lineAnalysisOptionsToTest = new List<AsciiLineAnalysisOption>(optionsToTest);
		}
Example #23
0
		/// <summary>
		/// Imports a couple of ASCII files into one (!) table, vertically. If the names of the subsequently imported table columns match, the data
		/// will be written in the matching column. Otherwise new columns with the unmatched column names were created.
		/// Property columns will only be imported from the first table.
		/// </summary>
		/// <param name="fileNames">An array of file names to import.</param>
		/// <param name="sortFileNames">If <c>true</c>, the fileNames are sorted before usage in ascending order using the default string comparator.</param>
		/// <param name="importOptions">Options used to import the Ascii files. This parameter can be <c>null</c>. In this case the value on return is the determined import options of the first file (if <paramref name="determineImportOptionsSeparatelyForEachFile"/> is <c>false</c>) or of the last file (if <paramref name="determineImportOptionsSeparatelyForEachFile"/> is <c>true</c>).</param>
		/// <param name="determineImportOptionsSeparatelyForEachFile">
		/// If <c>true</c>, the import options are determined for each file separately. In this case the provided parameter <paramref name="importOptions"/> is ignored, but on return it contains the importOptions used to import the last file.
		/// If <c>false</c>, the import options are either provided by the parameter <paramref name="importOptions"/> (if not null and fully specified), or during import of the first file. The so determined importOptions are then used to import all other files.
		/// </param>
		/// <param name="errors">Null if no error occurs, or an error description.</param>
		/// <returns>A newly created table (not included in the project) with the imported data.</returns>
		private static DataTable InternalImportMultipleFilesVerticallyIntoNewTable(IEnumerable<string> fileNames, bool sortFileNames, ref AsciiImportOptions importOptions, bool determineImportOptionsSeparatelyForEachFile, out string errors)
		{
			System.Text.StringBuilder errorList = new System.Text.StringBuilder();
			var dataTable = new DataTable();

			int lastDestinationRow = 0;
			int numberOfImportedTables = 0;

			// add also a property column named "FilePath" if not existing so far
			TextColumn filePathCol = (TextColumn)dataTable.Col.EnsureExistenceAtPositionStrictly(0, "FilePath", typeof(TextColumn), ColumnKind.Label, 0);
			filePathCol.Clear();

			if (sortFileNames)
				fileNames = fileNames.OrderBy(x => x);

			bool isFirstImportedFile = true;
			var clearedColumns = new HashSet<DataColumn>();

			foreach (string fileName in fileNames)
			{
				DataTable srcTable = new DataTable();
				if (determineImportOptionsSeparatelyForEachFile)
					ImportFromAsciiFile(srcTable, fileName);
				else if (null != importOptions && importOptions.IsFullySpecified)
					ImportFromAsciiFile(srcTable, fileName, importOptions);
				else
					ImportFromAsciiFile(srcTable, fileName, out importOptions);

				if (srcTable.DataColumns.ColumnCount == 0)
					continue;

				// mark the beginning of the new file with the file path
				filePathCol[lastDestinationRow] = fileName;

				// transfer the data columns
				for (int srcDataColIdx = 0; srcDataColIdx < srcTable.DataColumns.ColumnCount; srcDataColIdx++)
				{
					var srcDataCol = srcTable.DataColumns[srcDataColIdx];

					DataColumn destDataCol = null;
					if (isFirstImportedFile)
					{
						// Position must be +1, because the first column is the column with the file paths
						destDataCol = dataTable.DataColumns.EnsureExistenceAtPositionStrictly(srcDataColIdx + 1, srcTable.DataColumns.GetColumnName(srcDataColIdx), srcTable.DataColumns[srcDataColIdx].GetType(), srcTable.DataColumns.GetColumnKind(srcDataColIdx), srcTable.DataColumns.GetColumnGroup(srcDataColIdx));
						isFirstImportedFile = false;
					}
					else // not the first file
					{
						destDataCol = dataTable.DataColumns.EnsureExistence(srcTable.DataColumns.GetColumnName(srcDataColIdx), srcTable.DataColumns[srcDataColIdx].GetType(), srcTable.DataColumns.GetColumnKind(srcDataColIdx), srcTable.DataColumns.GetColumnGroup(srcDataColIdx));
					}

					if (!clearedColumns.Contains(destDataCol))
					{
						clearedColumns.Add(destDataCol);
						destDataCol.Clear();
					}

					int destDataColIdx = dataTable.DataColumns.GetColumnNumber(destDataCol);

					// transfer the data of one data column
					for (int j = 0; j < srcDataCol.Count; j++)
						destDataCol[lastDestinationRow + j] = srcDataCol[j];

					// now also process the property columns
					for (int srcPropColIdx = 0; srcPropColIdx < srcTable.PropCols.ColumnCount; srcPropColIdx++)
					{
						var destPropCol = dataTable.PropCols.EnsureExistence(srcTable.PropCols.GetColumnName(srcPropColIdx), srcTable.PropCols[srcPropColIdx].GetType(), srcTable.PropCols.GetColumnKind(srcPropColIdx), srcTable.PropCols.GetColumnGroup(srcPropColIdx));

						if (0 == numberOfImportedTables)
						{
							destPropCol[destDataColIdx] = srcTable.PropCols[srcPropColIdx][srcDataColIdx];
						}
						else if (destPropCol[destDataColIdx] != srcTable.PropCols[srcPropColIdx][srcDataColIdx])
						{
							destPropCol.SetElementEmpty(destDataColIdx);
						}
					}
				}

				lastDestinationRow += srcTable.DataColumns.RowCount;
				numberOfImportedTables++;
			} // foreache file

			errors = errorList.Length == 0 ? null : errorList.ToString();
			return dataTable;
		}
Example #24
0
		/// <summary>
		/// Analyzes the first <code>nLines</code> of the ascii stream.
		/// </summary>
		/// <param name="importOptions">The import options. This can already contain known values. On return, this instance should be ready to be used to import ascii data, i.e. all fields should contain values unequal to <c>null</c>.</param>
		/// <param name="stream">The ascii stream to analyze.</param>
		/// <param name="analysisOptions">Options that specify how many lines are analyzed, and what number formats and date/time formats will be tested.</param>
		public void InternalAnalyze(AsciiImportOptions importOptions, System.IO.Stream stream, AsciiDocumentAnalysisOptions analysisOptions)
		{
			if (null == stream)
				throw new ArgumentNullException("Stream");
			if (null == analysisOptions)
				throw new ArgumentNullException("analysisOptions");
			if (null == importOptions)
				throw new ArgumentNullException("importOptions");

			// Read-in the lines into _bodyLines. If the number of header lines is already known, those header lines are read into _headerLines
			ReadLinesToAnalyze(stream, analysisOptions.NumberOfLinesToAnalyze, importOptions.NumberOfMainHeaderLines);

			if (_bodyLines.Count == 0)
				return; // there is nothing to analyze

			// Analyze the whitespace structure of the body lines, find out if there is a fixed column width
			_globalStructure = new AsciiGlobalStructureAnalysis(_bodyLines);

			// Sets all separation strategies to test for. If importOptions already contain a separation strategy, only this separation strategy is set
			SetLineAnalysisOptionsToTest(importOptions, analysisOptions);

			// Analyze each of the first few lines with all possible separation strategies
			_lineAnalysisOfBodyLines = new AsciiLineAnalysis[_bodyLines.Count];

			// Do the analysis itself in parallel for each of the lines
			System.Threading.Tasks.Parallel.For(0, _bodyLines.Count, (i) => _lineAnalysisOfBodyLines[i] = new AsciiLineAnalysis(i, _bodyLines[i], _lineAnalysisOptionsToTest));

			// for debugging activate the next line and paste the data into notepad:
			// PutRecognizedStructuresToClipboard(result, separationStrategies);

			EvaluateScoringOfAllLineAnalysisOptions();

			// Evaluate the best separation strategy. Store the value in _highestScoredSeparationStrategy and the corresponding line structure in _highestScoredLineStructure;
			EvaluateHighestScoredLineAnalysisOption();

			// look how many header lines are in the file by comparing the structure of the first lines  with the _highestScoredLineStructure
			if (null == importOptions.NumberOfMainHeaderLines)
				EvaluateNumberOfMainHeaderLines();
			else
				_numberOfMainHeaderLines = importOptions.NumberOfMainHeaderLines.Value;

			// get the index of the caption line
			if (null == importOptions.IndexOfCaptionLine)
				EvaluateIndexOfCaptionLine();
			else
				_indexOfCaptionLine = importOptions.IndexOfCaptionLine.Value;

			importOptions.NumberOfMainHeaderLines = _numberOfMainHeaderLines;
			importOptions.IndexOfCaptionLine = _indexOfCaptionLine;

			importOptions.SeparationStrategy = _highestScoredLineAnalysisOption.SeparationStrategy;
			importOptions.NumberFormatCulture = _highestScoredLineAnalysisOption.NumberFormat;
			importOptions.DateTimeFormatCulture = _highestScoredLineAnalysisOption.DateTimeFormat;

			importOptions.RecognizedStructure = _lineAnalysisOptionsScoring[_highestScoredLineAnalysisOption].LineStructure;
		}
Example #25
0
		/// <summary>
		/// Imports a couple of ASCII files into one (!) table, vertically. If the names of the subsequently imported table columns match, the data
		/// will be written in the matching column. Otherwise new columns with the unmatched column names were created.
		/// Property columns will only be imported from the first table.
		/// </summary>
		/// <param name="dataTable">The table the data should be imported to.</param>
		/// <param name="fileNames">An array of file names to import.</param>
		/// <param name="sortFileNames">If <c>true</c>, the fileNames are sorted before usage in ascending order using the default string comparator.</param>
		/// <param name="importOptions">Options used to import the Ascii files. This parameter can be <c>null</c>. In this case the value on return is the determined import options of the first file (if <paramref name="determineImportOptionsSeparatelyForEachFile"/> is <c>false</c>) or of the last file (if <paramref name="determineImportOptionsSeparatelyForEachFile"/> is <c>true</c>).</param>
		/// <param name="determineImportOptionsSeparatelyForEachFile">
		/// If <c>true</c>, the import options are determined for each file separately. In this case the provided parameter <paramref name="importOptions"/> is ignored, but on return it contains the importOptions used to import the last file.
		/// If <c>false</c>, the import options are either provided by the parameter <paramref name="importOptions"/> (if not null and fully specified), or during import of the first file. The so determined importOptions are then used to import all other files.
		/// </param>
		/// <returns>Null if no error occurs, or an error description.</returns>
		private static string InternalImportFromMultipleAsciiFilesVertically(this DataTable dataTable, IEnumerable<string> fileNames, bool sortFileNames, ref AsciiImportOptions importOptions, bool determineImportOptionsSeparatelyForEachFile)
		{
			string errors;
			var temporaryTable = InternalImportMultipleFilesVerticallyIntoNewTable(fileNames, sortFileNames, ref importOptions, determineImportOptionsSeparatelyForEachFile, out errors);

			if (null != temporaryTable)
			{
				TransferTemporaryTable(temporaryTable, dataTable);

				// finally set or change the data source of the table
				importOptions.ImportMultipleStreamsVertically = true;
				AddOrUpdateAsciiImportDataSource(dataTable, fileNames, importOptions);
			}

			return errors;
		}
Example #26
0
		public AsciiImportDataSource(IEnumerable<string> fileNames, AsciiImportOptions options)
		{
			_asciiFiles = new List<AbsoluteAndRelativeFileName>();
			foreach (var fileName in fileNames)
			{
				_asciiFiles.Add(new AbsoluteAndRelativeFileName(fileName));
			}
			ChildCopyToMember(ref _asciiImportOptions, options);
			_importOptions = new DataSourceImportOptions() { ParentObject = this };
		}
Example #27
0
		/// <summary>
		/// Imports multiple Ascii files into the provided table in vertical order, i.e. in new rows. The provided <paramref name="importOptions"/> are used to import the files.
		/// If the names of the subsequently imported table columns match, the data will be written in the matching column. Otherwise new columns with the unmatched column names were created.
		/// Property columns will only be imported from the first table.
		/// </summary>
		/// <param name="dataTable">The table the data should be imported to.</param>
		/// <param name="fileNames">An array of filenames to import.</param>
		/// <param name="sortFileNames">If <c>true</c>, the fileNames are sorted before usage in ascending order using the default string comparator.</param>
		/// <param name="importOptions">Options used to import the Ascii files. This parameter must not be null, and the options must be fully specified.</param>
		/// <returns>Null if no error occurs, or an error description.</returns>
		public static string ImportFromMultipleAsciiFilesVertically(this DataTable dataTable, IEnumerable<string> fileNames, bool sortFileNames, AsciiImportOptions importOptions)
		{
			if (null == dataTable)
				throw new ArgumentNullException("Argument dataTable is null");
			if (null == fileNames)
				throw new ArgumentNullException("Argument fileNames is null");
			if (importOptions == null)
				throw new ArgumentNullException("Argument importOptions is null");
			if (!importOptions.IsFullySpecified)
				throw new ArgumentException("Argument importOptions: importOptions must be fully specified, i.e. all elements of importOptions must be valid. Please run a document analysis in-before to get appropriate values.");

			return InternalImportFromMultipleAsciiFilesVertically(dataTable, fileNames, sortFileNames, ref importOptions, false);
		}
Example #28
0
        private void SetLineAnalysisOptionsToTest(AsciiImportOptions importOptions, AsciiDocumentAnalysisOptions analysisOptions)
        {
            var numberFormatsToTest        = new List <System.Globalization.CultureInfo>();
            var dateTimeFormatsToTest      = new List <System.Globalization.CultureInfo>();
            var separationStrategiesToTest = new List <IAsciiSeparationStrategy>();

            // all number formats to test
            if (null != importOptions.NumberFormatCulture)
            {
                numberFormatsToTest.Add(importOptions.NumberFormatCulture);
            }
            else
            {
                numberFormatsToTest.AddRange(analysisOptions.NumberFormatsToTest);
                if (0 == numberFormatsToTest.Count)
                {
                    numberFormatsToTest.Add(System.Globalization.CultureInfo.InvariantCulture);
                }
            }

            // all DateTime formats to test
            if (null != importOptions.DateTimeFormatCulture)
            {
                dateTimeFormatsToTest.Add(importOptions.DateTimeFormatCulture);
            }
            else
            {
                dateTimeFormatsToTest.AddRange(analysisOptions.DateTimeFormatsToTest);
                if (0 == dateTimeFormatsToTest.Count)
                {
                    dateTimeFormatsToTest.Add(System.Globalization.CultureInfo.InvariantCulture);
                }
            }

            // all separation strategies to test
            if (importOptions.SeparationStrategy != null) // if a separation strategy is given use only this
            {
                separationStrategiesToTest.Add(importOptions.SeparationStrategy);
            }
            else // no separation strategy given - we include the possible strategies here
            {
                if (_globalStructure.ContainsTabs)
                {
                    separationStrategiesToTest.Add(new SingleCharSeparationStrategy('\t'));
                }
                if (_globalStructure.ContainsCommas)
                {
                    separationStrategiesToTest.Add(new SingleCharSeparationStrategy(','));
                }
                if (_globalStructure.ContainsSemicolons)
                {
                    separationStrategiesToTest.Add(new SingleCharSeparationStrategy(';'));
                }
                if (_globalStructure.FixedBoundaries != null)
                {
                    if (_globalStructure.RecognizedTabSize == 1)
                    {
                        separationStrategiesToTest.Add(new FixedColumnWidthWithoutTabSeparationStrategy(_globalStructure.FixedBoundaries));
                    }
                    else
                    {
                        separationStrategiesToTest.Add(new FixedColumnWidthWithTabSeparationStrategy(_globalStructure.FixedBoundaries, _globalStructure.RecognizedTabSize));
                    }
                }
                if (separationStrategiesToTest.Count == 0)
                {
                    separationStrategiesToTest.Add(new SkipWhiteSpaceSeparationStrategy());
                }

                separationStrategiesToTest.Add(new SingleLineSeparationStrategy()); // this separation strategy must always be considered
            }

            // make a full outer join of all three categories
            var optionsToTest = new HashSet <AsciiLineAnalysisOption>();

            foreach (var s in separationStrategiesToTest)
            {
                foreach (var n in numberFormatsToTest)
                {
                    foreach (var d in dateTimeFormatsToTest)
                    {
                        optionsToTest.Add(new AsciiLineAnalysisOption(s, n, d));
                    }
                }
            }

            // remove all those keys where the char of the single char separation strategy is equal to the number format's decimal separator
            foreach (AsciiLineAnalysisOption k in optionsToTest.ToArray())
            {
                if (
                    (k.SeparationStrategy is SingleCharSeparationStrategy) &&
                    (((SingleCharSeparationStrategy)k.SeparationStrategy).SeparatorChar.ToString() == k.NumberFormat.NumberFormat.NumberDecimalSeparator)
                    )
                {
                    optionsToTest.Remove(k);
                }
            }

            _lineAnalysisOptionsToTest = new List <AsciiLineAnalysisOption>(optionsToTest);
        }
Example #29
0
		public AsciiImportOptions Clone()
		{
			var result = new AsciiImportOptions();
			result.CopyFrom(this);
			return result;
		}
Example #30
0
        /// <summary>
        /// Analyzes the first <code>nLines</code> of the ascii stream.
        /// </summary>
        /// <param name="importOptions">The import options. This can already contain known values. On return, this instance should be ready to be used to import ascii data, i.e. all fields should contain values unequal to <c>null</c>.</param>
        /// <param name="stream">The ascii stream to analyze.</param>
        /// <param name="analysisOptions">Options that specify how many lines are analyzed, and what number formats and date/time formats will be tested.</param>
        public void InternalAnalyze(AsciiImportOptions importOptions, System.IO.Stream stream, AsciiDocumentAnalysisOptions analysisOptions)
        {
            if (null == stream)
            {
                throw new ArgumentNullException("Stream");
            }
            if (null == analysisOptions)
            {
                throw new ArgumentNullException("analysisOptions");
            }
            if (null == importOptions)
            {
                throw new ArgumentNullException("importOptions");
            }

            // Read-in the lines into _bodyLines. If the number of header lines is already known, those header lines are read into _headerLines
            ReadLinesToAnalyze(stream, analysisOptions.NumberOfLinesToAnalyze, importOptions.NumberOfMainHeaderLines);

            if (_bodyLines.Count == 0)
            {
                return; // there is nothing to analyze
            }
            // Analyze the whitespace structure of the body lines, find out if there is a fixed column width
            _globalStructure = new AsciiGlobalStructureAnalysis(_bodyLines);

            // Sets all separation strategies to test for. If importOptions already contain a separation strategy, only this separation strategy is set
            SetLineAnalysisOptionsToTest(importOptions, analysisOptions);

            // Analyze each of the first few lines with all possible separation strategies
            _lineAnalysisOfBodyLines = new AsciiLineAnalysis[_bodyLines.Count];

            // Do the analysis itself in parallel for each of the lines
            System.Threading.Tasks.Parallel.For(0, _bodyLines.Count, (i) => _lineAnalysisOfBodyLines[i] = new AsciiLineAnalysis(i, _bodyLines[i], _lineAnalysisOptionsToTest));

            // for debugging activate the next line and paste the data into notepad:
            // PutRecognizedStructuresToClipboard(result, separationStrategies);

            EvaluateScoringOfAllLineAnalysisOptions();

            // Evaluate the best separation strategy. Store the value in _highestScoredSeparationStrategy and the corresponding line structure in _highestScoredLineStructure;
            EvaluateHighestScoredLineAnalysisOption();

            // look how many header lines are in the file by comparing the structure of the first lines  with the _highestScoredLineStructure
            if (null == importOptions.NumberOfMainHeaderLines)
            {
                EvaluateNumberOfMainHeaderLines();
            }
            else
            {
                _numberOfMainHeaderLines = importOptions.NumberOfMainHeaderLines.Value;
            }

            // get the index of the caption line
            if (null == importOptions.IndexOfCaptionLine)
            {
                EvaluateIndexOfCaptionLine();
            }
            else
            {
                _indexOfCaptionLine = importOptions.IndexOfCaptionLine.Value;
            }

            importOptions.NumberOfMainHeaderLines = _numberOfMainHeaderLines;
            importOptions.IndexOfCaptionLine      = _indexOfCaptionLine;

            importOptions.SeparationStrategy    = _highestScoredLineAnalysisOption.SeparationStrategy;
            importOptions.NumberFormatCulture   = _highestScoredLineAnalysisOption.NumberFormat;
            importOptions.DateTimeFormatCulture = _highestScoredLineAnalysisOption.DateTimeFormat;

            importOptions.RecognizedStructure = _lineAnalysisOptionsScoring[_highestScoredLineAnalysisOption].LineStructure;
        }
Example #31
0
    /// <summary>
    /// Analyzes the first <code>nLines</code> of the ascii stream.
    /// </summary>
    /// <param name="nLines">The number of lines to analyze. It is no error if the stream contains a less number of lines than provided here.</param>
    /// <param name="defaultImportOptions">The default import options.</param>
    /// <returns>Import options that can be used in a following step to read in the ascii stream. Null is returned if the stream contains no data.</returns>
    public AsciiImportOptions Analyze(int nLines, AsciiImportOptions defaultImportOptions)
    {

      string sLine;

      stream.Position = 0;
      System.IO.StreamReader sr = new System.IO.StreamReader(stream,System.Text.Encoding.Default,true);
      System.Collections.ArrayList result = new System.Collections.ArrayList();
    
      for(int i=0;i<nLines;i++)
      {
        sLine = sr.ReadLine();
        if(null==sLine)
          break;
        result.Add(new AsciiLineAnalyzer(i,sLine));
      }
    
      if(result.Count==0)
        return null; // there is nothing to analyze

      // now view the results
      // calc the frequency o
      System.Collections.SortedList sl= new System.Collections.SortedList();
      int nItems;
      // first the tabs

      /*
      sl.Clear();
      for(int i=0;i<result.Count;i++)
      {
        nItems = ((AsciiLineAnalyzer)result[i]).nNumberOfTabs;
        if(0!=nItems)
        {
          if(null==sl[nItems])
            sl.Add(nItems,1);
          else 
            sl[nItems] = 1+(int)sl[nItems];
        }
      }
      // get the tab count with the topmost frequency
      int nMaxNumberOfSameTabs = 0;
      int nMaxTabsOfSameNumber = 0;
      for(int i=0;i<sl.Count;i++)
      {
        if(nMaxNumberOfSameTabs<(int)sl.GetByIndex(i))
        {
          nMaxNumberOfSameTabs = (int)sl.GetByIndex(i);
          nMaxTabsOfSameNumber = (int)sl.GetKey(i);
        }
      }
*/
      
      
      // Count the commas
      sl.Clear();
      for(int i=0;i<result.Count;i++)
      {
        nItems = ((AsciiLineAnalyzer)result[i]).nNumberOfCommas;
        if(0!=nItems)
        {
          if(null==sl[nItems])
            sl.Add(nItems,1);
          else 
            sl[nItems] = 1+(int)sl[nItems];
        }
      }
      // get the comma count with the topmost frequency
      int nMaxNumberOfSameCommas = 0;
      int nMaxCommasOfSameNumber = 0;
      for(int i=0;i<sl.Count;i++)
      {
        if(nMaxNumberOfSameCommas<(int)sl.GetByIndex(i))
        {
          nMaxNumberOfSameCommas = (int)sl.GetByIndex(i);
          nMaxCommasOfSameNumber = (int)sl.GetKey(i);
        }
      }

      // Count the semicolons
      sl.Clear();
      for(int i=0;i<result.Count;i++)
      {
        nItems = ((AsciiLineAnalyzer)result[i]).nNumberOfSemicolons;
        if(0!=nItems)
        {
          if(null==sl[nItems])
            sl.Add(nItems,1);
          else 
            sl[nItems] = 1+(int)sl[nItems];
        }
      }
      // get the tab count with the topmost frequency
      int nMaxNumberOfSameSemicolons = 0;
      int nMaxSemicolonsOfSameNumber = 0;
      for(int i=0;i<sl.Count;i++)
      {
        if(nMaxNumberOfSameSemicolons<(int)sl.GetByIndex(i))
        {
          nMaxNumberOfSameSemicolons = (int)sl.GetByIndex(i);
          nMaxSemicolonsOfSameNumber = (int)sl.GetKey(i);
        }
      }

    
      NumberAndStructure[] st = new NumberAndStructure[3];

      for(int i=0;i<3;i++)
      {
        st[i].nLines = GetPriorityOf(result,(AsciiLineAnalyzer.Separation)i,ref st[i].structure);
      }

      // look for the top index
    
      int nMaxLines = int.MinValue;
      double maxprtylines=0;
      int nBestSeparator = int.MinValue;
      for(int i=0;i<3;i++)
      {
        double prtylines = (double)st[i].nLines * st[i].structure.Priority;
        if(prtylines==maxprtylines)
        {
          if(st[i].nLines > nMaxLines)
          {
            nMaxLines = st[i].nLines;
            nBestSeparator = i;
          }
        }
        else if(prtylines>maxprtylines)
        {
          maxprtylines = prtylines;
          nBestSeparator = i;
          nMaxLines=st[i].nLines;
        }
      }

      AsciiImportOptions opt = defaultImportOptions.Clone();
      
      opt.bDelimited = true;
      opt.cDelimiter = nBestSeparator==0 ? '\t' : (nBestSeparator==1 ? ',' : ';');
      opt.recognizedStructure = st[nBestSeparator].structure;


      // look how many header lines are in the file by comparing the structure of the first lines  with the recognized structure
      for(int i=0;i<result.Count;i++)
      {
        opt.nMainHeaderLines=i;
        if(((AsciiLineAnalyzer)result[i]).structure[nBestSeparator].IsCompatibleWith(opt.recognizedStructure))
          break;
      }


      // calculate the total statistics of decimal separators
      opt.m_DecimalSeparatorCommaCount=0;
      opt.m_DecimalSeparatorDotCount=0;
      for(int i=0;i<result.Count;i++)
      {
        opt.m_DecimalSeparatorDotCount += ((AsciiLineAnalyzer)result[i]).structure[nBestSeparator].DecimalSeparatorDotCount;
        opt.m_DecimalSeparatorCommaCount += ((AsciiLineAnalyzer)result[i]).structure[nBestSeparator].DecimalSeparatorCommaCount;
      }



      return opt;

    }
Example #32
0
        /// <summary>
        /// Analyzes the first <code>nLines</code> of the ascii stream.
        /// </summary>
        /// <param name="nLines">The number of lines to analyze. It is no error if the stream contains a less number of lines than provided here.</param>
        /// <param name="defaultImportOptions">The default import options.</param>
        /// <returns>Import options that can be used in a following step to read in the ascii stream. Null is returned if the stream contains no data.</returns>
        public AsciiImportOptions Analyze(int nLines, AsciiImportOptions defaultImportOptions)
        {
            string sLine;

            stream.Position = 0;
            System.IO.StreamReader       sr     = new System.IO.StreamReader(stream, System.Text.Encoding.Default, true);
            System.Collections.ArrayList result = new System.Collections.ArrayList();

            for (int i = 0; i < nLines; i++)
            {
                sLine = sr.ReadLine();
                if (null == sLine)
                {
                    break;
                }
                result.Add(new AsciiLineAnalyzer(i, sLine));
            }

            if (result.Count == 0)
            {
                return(null); // there is nothing to analyze
            }
            // now view the results
            // calc the frequency o
            System.Collections.SortedList sl = new System.Collections.SortedList();
            int nItems;

            // first the tabs

            /*
             * sl.Clear();
             * for(int i=0;i<result.Count;i++)
             * {
             * nItems = ((AsciiLineAnalyzer)result[i]).nNumberOfTabs;
             * if(0!=nItems)
             * {
             *  if(null==sl[nItems])
             *    sl.Add(nItems,1);
             *  else
             *    sl[nItems] = 1+(int)sl[nItems];
             * }
             * }
             * // get the tab count with the topmost frequency
             * int nMaxNumberOfSameTabs = 0;
             * int nMaxTabsOfSameNumber = 0;
             * for(int i=0;i<sl.Count;i++)
             * {
             * if(nMaxNumberOfSameTabs<(int)sl.GetByIndex(i))
             * {
             *  nMaxNumberOfSameTabs = (int)sl.GetByIndex(i);
             *  nMaxTabsOfSameNumber = (int)sl.GetKey(i);
             * }
             * }
             */


            // Count the commas
            sl.Clear();
            for (int i = 0; i < result.Count; i++)
            {
                nItems = ((AsciiLineAnalyzer)result[i]).nNumberOfCommas;
                if (0 != nItems)
                {
                    if (null == sl[nItems])
                    {
                        sl.Add(nItems, 1);
                    }
                    else
                    {
                        sl[nItems] = 1 + (int)sl[nItems];
                    }
                }
            }
            // get the comma count with the topmost frequency
            int nMaxNumberOfSameCommas = 0;
            int nMaxCommasOfSameNumber = 0;

            for (int i = 0; i < sl.Count; i++)
            {
                if (nMaxNumberOfSameCommas < (int)sl.GetByIndex(i))
                {
                    nMaxNumberOfSameCommas = (int)sl.GetByIndex(i);
                    nMaxCommasOfSameNumber = (int)sl.GetKey(i);
                }
            }

            // Count the semicolons
            sl.Clear();
            for (int i = 0; i < result.Count; i++)
            {
                nItems = ((AsciiLineAnalyzer)result[i]).nNumberOfSemicolons;
                if (0 != nItems)
                {
                    if (null == sl[nItems])
                    {
                        sl.Add(nItems, 1);
                    }
                    else
                    {
                        sl[nItems] = 1 + (int)sl[nItems];
                    }
                }
            }
            // get the tab count with the topmost frequency
            int nMaxNumberOfSameSemicolons = 0;
            int nMaxSemicolonsOfSameNumber = 0;

            for (int i = 0; i < sl.Count; i++)
            {
                if (nMaxNumberOfSameSemicolons < (int)sl.GetByIndex(i))
                {
                    nMaxNumberOfSameSemicolons = (int)sl.GetByIndex(i);
                    nMaxSemicolonsOfSameNumber = (int)sl.GetKey(i);
                }
            }


            NumberAndStructure[] st = new NumberAndStructure[3];

            for (int i = 0; i < 3; i++)
            {
                st[i].nLines = GetPriorityOf(result, (AsciiLineAnalyzer.Separation)i, ref st[i].structure);
            }

            // look for the top index

            int    nMaxLines      = int.MinValue;
            double maxprtylines   = 0;
            int    nBestSeparator = int.MinValue;

            for (int i = 0; i < 3; i++)
            {
                double prtylines = (double)st[i].nLines * st[i].structure.Priority;
                if (prtylines == maxprtylines)
                {
                    if (st[i].nLines > nMaxLines)
                    {
                        nMaxLines      = st[i].nLines;
                        nBestSeparator = i;
                    }
                }
                else if (prtylines > maxprtylines)
                {
                    maxprtylines   = prtylines;
                    nBestSeparator = i;
                    nMaxLines      = st[i].nLines;
                }
            }

            AsciiImportOptions opt = defaultImportOptions.Clone();

            opt.bDelimited          = true;
            opt.cDelimiter          = nBestSeparator == 0 ? '\t' : (nBestSeparator == 1 ? ',' : ';');
            opt.recognizedStructure = st[nBestSeparator].structure;


            // look how many header lines are in the file by comparing the structure of the first lines  with the recognized structure
            for (int i = 0; i < result.Count; i++)
            {
                opt.nMainHeaderLines = i;
                if (((AsciiLineAnalyzer)result[i]).structure[nBestSeparator].IsCompatibleWith(opt.recognizedStructure))
                {
                    break;
                }
            }


            // calculate the total statistics of decimal separators
            opt.m_DecimalSeparatorCommaCount = 0;
            opt.m_DecimalSeparatorDotCount   = 0;
            for (int i = 0; i < result.Count; i++)
            {
                opt.m_DecimalSeparatorDotCount   += ((AsciiLineAnalyzer)result[i]).structure[nBestSeparator].DecimalSeparatorDotCount;
                opt.m_DecimalSeparatorCommaCount += ((AsciiLineAnalyzer)result[i]).structure[nBestSeparator].DecimalSeparatorCommaCount;
            }



            return(opt);
        }
Example #33
0
		/// <summary>
		/// Imports multiple Ascii files into newly created new tables (each file into a separate table). The tables are named and stored in the project file in the provided project folder.
		/// </summary>
		/// <param name="projectFolder">The project folder in which the new tables should be created.</param>
		/// <param name="fileNames">The names of the files to import.</param>
		/// <param name="sortFileNames">If <c>true</c>, the fileNames are sorted before usage in ascending order using the default string comparator.</param>
		/// <param name="importOptions">Options used to import ASCII. This parameter must not be null, and the options must be fully specified.</param>
		/// <returns>The list of tables created during the import.</returns>
		public static IList<DataTable> ImportFilesIntoSeparateNewTables(this ProjectFolder projectFolder, IEnumerable<string> fileNames, bool sortFileNames, AsciiImportOptions importOptions)
		{
			if (null == projectFolder)
				throw new ArgumentNullException("projectFolder");
			if (null == fileNames)
				throw new ArgumentNullException("filenames");
			if (null == importOptions)
				throw new ArgumentNullException("importOptions");
			if (!importOptions.IsFullySpecified)
				throw new ArgumentException("Argument importOptions: importOptions must be fully specified, i.e. all elements of importOptions must be valid. Please run a document analysis in-before to get appropriate values.");

			return InternalImportFilesIntoSeparateNewTables(projectFolder, fileNames, sortFileNames, ref importOptions, false);
		}
Example #34
0
 public AsciiImportDataSource(string fileName, AsciiImportOptions options)
     : this(new string[] { fileName }, options)
 {
 }
Example #35
0
		/// <summary>
		/// Imports multiple Ascii files into newly created new tables (each file into a separate table). The tables are named and stored in the project file in the provided project folder.
		/// </summary>
		/// <param name="projectFolder">The project folder in which the new tables should be created.</param>
		/// <param name="fileNames">The names of the files to import.</param>
		/// <param name="sortFileNames">If <c>true</c>, the fileNames are sorted before usage in ascending order using the default string comparator.</param>
		/// <param name="importOptions">On return, contains the options used to import the first Ascii files. These options are also used to import all other Ascii files.</param>
		/// <returns>The list of tables created during the import.</returns>
		public static IList<DataTable> ImportFilesIntoSeparateNewTables(this ProjectFolder projectFolder, IEnumerable<string> fileNames, bool sortFileNames, out AsciiImportOptions importOptions)
		{
			if (null == projectFolder)
				throw new ArgumentNullException("projectFolder");
			if (null == fileNames)
				throw new ArgumentNullException("filenames");

			importOptions = null;
			return InternalImportFilesIntoSeparateNewTables(projectFolder, fileNames, sortFileNames, ref importOptions, false);
		}
Example #36
0
		/// <summary>
		/// Imports multiple Ascii files into the provided table in vertical order, i.e. in new rows.. The import options are determined from the first file, and then used to import all other files.
		/// If the names of the subsequently imported table columns match, the data will be written in the matching column. Otherwise new columns with the unmatched column names were created.
		/// Property columns will only be imported from the first table.
		/// </summary>
		/// <param name="dataTable">The table the data should be imported to.</param>
		/// <param name="fileNames">An array of filenames to import.</param>
		/// <param name="sortFileNames">If <c>true</c>, the fileNames are sorted before usage in ascending order using the default string comparator.</param>
		/// <param name="importOptions">On return, contains the options used to import the first Ascii files. These options are also used to import all other Ascii files.</param>
		/// <returns>Null if no error occurs, or an error description.</returns>
		public static string ImportFromMultipleAsciiFilesVertically(this DataTable dataTable, IEnumerable<string> fileNames, bool sortFileNames, out AsciiImportOptions importOptions)
		{
			if (null == dataTable)
				throw new ArgumentNullException("Argument dataTable is null");
			if (null == fileNames)
				throw new ArgumentNullException("Argument fileNames is null");

			importOptions = null;
			return InternalImportFromMultipleAsciiFilesVertically(dataTable, fileNames, sortFileNames, ref importOptions, false);
		}
Example #37
0
		/// <summary>
		/// Imports ascii from a memory stream into a table. Returns null (!) if nothing is imported.
		/// </summary>
		/// <param name="stream">The stream to import ascii from. Is not (!) closed at the end of this function.</param>
		/// <param name="streamOriginHint">Designates a short hint where the provided stream originates from. Can be <c>Null</c> if the origin is unknown.</param>
		/// <param name="defaultImportOptions">The default import options. The importer uses this options as base, but updates some fields by analyzing the data to import.</param>
		/// <returns>The table representation of the imported text, or null if nothing is imported.</returns>
		private static DataTable InternalImportStreamIntoNewTable(Stream stream, string streamOriginHint, AsciiImportOptions defaultImportOptions)
		{
			var importOptions = AsciiDocumentAnalysis.Analyze(defaultImportOptions ?? new AsciiImportOptions(), stream, GetDefaultAsciiDocumentAnalysisOptions(null));
			if (importOptions != null)
			{
				DataTable table = new DataTable();
				ImportFromAsciiStream(table, stream, streamOriginHint, importOptions);
				return table;
			}
			else
			{
				return null;
			}
		}
Example #38
0
		/// <summary>
		/// Imports multiple Ascii files into newly created new tables (each file into a separate table). The tables are named and stored in the project file in the provided project folder.
		/// </summary>
		/// <param name="projectFolder">The project folder in which the new tables should be created.</param>
		/// <param name="fileNames">The names of the files to import.</param>
		/// <param name="sortFileNames">If <c>true</c>, the fileNames are sorted before usage in ascending order using the default string comparator.</param>
		/// <param name="importOptions">Options used to import ASCII. This parameter must not be null, and the options must be fully specified.</param>
		/// <param name="determineImportOptionsSeparatelyForEachFile">
		/// If <c>true</c>, the import options are determined for each file separately. In this case the provided parameter <paramref name="importOptions"/> is ignored, but on return it contains the importOptions used to import the last file.
		/// If <c>false</c>, the import options are either provided by the parameter <paramref name="importOptions"/> (if not null and fully specified), or during import of the first file. The so determined importOptions are then used to import all other files.
		/// </param>
		/// <returns>The list of tables created during the import.</returns>
		private static IList<DataTable> InternalImportFilesIntoSeparateNewTables(ProjectFolder projectFolder, IEnumerable<string> fileNames, bool sortFileNames, ref AsciiImportOptions importOptions, bool determineImportOptionsSeparatelyForEachFile)
		{
			var listOfNewTables = new List<DataTable>();

			if (sortFileNames)
				fileNames = fileNames.OrderBy(x => x);

			foreach (var fileName in fileNames)
			{
				var srcTable = new DataTable(projectFolder.Name + Path.GetFileNameWithoutExtension(fileName));
				Current.ProjectService.CreateNewWorksheet(srcTable);

				if (determineImportOptionsSeparatelyForEachFile)
					ImportFromAsciiFile(srcTable, fileName);
				else if (null != importOptions && importOptions.IsFullySpecified)
					ImportFromAsciiFile(srcTable, fileName, importOptions);
				else
					ImportFromAsciiFile(srcTable, fileName, out importOptions);

				listOfNewTables.Add(srcTable);
			}

			return listOfNewTables;
		}
Example #39
0
    public void ImportAscii(AsciiImportOptions impopt, Altaxo.Data.DataTable table)
    {
      string sLine;
      stream.Position=0; // rewind the stream to the beginning
      System.IO.StreamReader sr = new System.IO.StreamReader(stream,System.Text.Encoding.Default,true);
      Altaxo.Data.DataColumnCollection newcols = new Altaxo.Data.DataColumnCollection();
    
      Altaxo.Data.DataColumnCollection newpropcols = new Altaxo.Data.DataColumnCollection();

      // in case a structure is provided, allocate already the columsn
      
      if(null!=impopt.recognizedStructure)
      {
        for(int i=0;i<impopt.recognizedStructure.Count;i++)
        {
          if(impopt.recognizedStructure[i]==typeof(Double))
            newcols.Add(new Altaxo.Data.DoubleColumn());
          else if(impopt.recognizedStructure[i]==typeof(DateTime))
            newcols.Add(new Altaxo.Data.DateTimeColumn());
          else if(impopt.recognizedStructure[i]==typeof(string))
            newcols.Add(new Altaxo.Data.TextColumn());
          else
            newcols.Add(new Altaxo.Data.DBNullColumn());;
        }
      }

      // add also additional property columns if not enough there
      if(impopt.nMainHeaderLines>1) // if there are more than one header line, allocate also property columns
      {
        int toAdd = impopt.nMainHeaderLines-1;
        for(int i=0;i<toAdd;i++)
          newpropcols.Add(new Data.TextColumn());
      }

      // if decimal separator statistics is provided by impopt, create a number format info object
      System.Globalization.NumberFormatInfo numberFormatInfo=null;
      if(impopt.m_DecimalSeparatorCommaCount>0 || impopt.m_DecimalSeparatorDotCount>0)
      {
        numberFormatInfo = (System.Globalization.NumberFormatInfo)System.Globalization.NumberFormatInfo.CurrentInfo.Clone();

        // analyse the statistics
        if(impopt.m_DecimalSeparatorCommaCount>impopt.m_DecimalSeparatorDotCount) // the comma is the decimal separator
        {
          numberFormatInfo.NumberDecimalSeparator=",";
          if(numberFormatInfo.NumberGroupSeparator==numberFormatInfo.NumberDecimalSeparator)
            numberFormatInfo.NumberGroupSeparator=""; // in case now the group separator is also comma, remove the group separator
        }
        else if(impopt.m_DecimalSeparatorCommaCount<impopt.m_DecimalSeparatorDotCount) // the comma is the decimal separator
        {
          numberFormatInfo.NumberDecimalSeparator=".";
          if(numberFormatInfo.NumberGroupSeparator==numberFormatInfo.NumberDecimalSeparator)
            numberFormatInfo.NumberGroupSeparator=""; // in case now the group separator is also comma, remove the group separator
        }
      }
      else // no decimal separator statistics is provided, so retrieve the numberFormatInfo object from the program options or from the current thread
      {
        numberFormatInfo = System.Globalization.NumberFormatInfo.CurrentInfo;
      }


      char [] splitchar = new char[]{impopt.cDelimiter};

      // first of all, read the header if existent
      for(int i=0;i<impopt.nMainHeaderLines;i++)
      {
        sLine = sr.ReadLine();
        if(null==sLine) break;

        string[] substr = sLine.Split(splitchar);
        int cnt = substr.Length;
        for(int k=0;k<cnt;k++)
        {
          if(substr[k].Length==0)
            continue;

          if(k>=newcols.ColumnCount)
            continue;
        
          if(i==0) // is it the column name line
          {
            newcols.SetColumnName(k, substr[k]);
          }
          else // this are threated as additional properties
          {
            ((Data.DataColumn)newpropcols[i-1])[k] = substr[k]; // set the properties
          }
        }
      }
      
      for(int i=0;true;i++)
      {
        sLine = sr.ReadLine();
        if(null==sLine) break;

        string[] substr = sLine.Split(splitchar);
        int cnt = Math.Min(substr.Length,newcols.ColumnCount);
        for(int k=0;k<cnt;k++)
        {
          if(substr[k].Length==0)
            continue;

          if(newcols[k] is Altaxo.Data.DoubleColumn)
          {
            try { ((Altaxo.Data.DoubleColumn)newcols[k])[i] = System.Convert.ToDouble(substr[k],numberFormatInfo); }
            catch {}
          }
          else if( newcols[k] is Altaxo.Data.DateTimeColumn)
          {
            try { ((Altaxo.Data.DateTimeColumn)newcols[k])[i] = System.Convert.ToDateTime(substr[k]); }
            catch {}
          }
          else if( newcols[k] is Altaxo.Data.TextColumn)
          {
            ((Altaxo.Data.TextColumn)newcols[k])[i] = substr[k];
          }
          else if(null==newcols[k] || newcols[k] is Altaxo.Data.DBNullColumn)
          {
            bool bConverted = false;
            double val=Double.NaN;
            DateTime valDateTime=DateTime.MinValue;

            try
            { 
              val = System.Convert.ToDouble(substr[k]);
              bConverted=true;
            }
            catch
            {
            }
            if(bConverted)
            {
              Altaxo.Data.DoubleColumn newc = new Altaxo.Data.DoubleColumn();
              newc[i]=val;
              newcols.Replace(k,newc);
            }
            else
            {
              try
              { 
                valDateTime = System.Convert.ToDateTime(substr[k]);
                bConverted=true;
              }
              catch
              {
              }
              if(bConverted)
              {
                Altaxo.Data.DateTimeColumn newc = new Altaxo.Data.DateTimeColumn();
                newc[i]=valDateTime;
                
                newcols.Replace(k, newc);
              }
              else
              {
                Altaxo.Data.TextColumn newc = new Altaxo.Data.TextColumn();
                newc[i]=substr[k];
                newcols.Replace(k,newc);
              }
            } // end outer if null==newcol
          }
        } // end of for all cols


      } // end of for all lines
      
      // insert the new columns or replace the old ones
      table.Suspend();
      bool tableWasEmptyBefore = table.DataColumns.ColumnCount==0;
      for(int i=0;i<newcols.ColumnCount;i++)
      {
        if(newcols[i] is Altaxo.Data.DBNullColumn) // if the type is undefined, use a new DoubleColumn
          table.DataColumns.CopyOrReplaceOrAdd(i,new Altaxo.Data.DoubleColumn(), newcols.GetColumnName(i));
        else
          table.DataColumns.CopyOrReplaceOrAdd(i,newcols[i], newcols.GetColumnName(i));

        // set the first column as x-column if the table was empty before, and there are more than one column
        if(i==0 && tableWasEmptyBefore && newcols.ColumnCount>1)
          table.DataColumns.SetColumnKind(0,Altaxo.Data.ColumnKind.X);

      } // end for loop

      // add the property columns
      for(int i=0;i<newpropcols.ColumnCount;i++)
      {
        table.PropCols.CopyOrReplaceOrAdd(i,newpropcols[i], newpropcols.GetColumnName(i));
      }
      table.Resume();
    } // end of function ImportAscii