/// <summary> /// Analyzes the first <code>nLines</code> of the ascii stream. /// </summary> /// <param name="nLines">The number of lines to analyze. It is no error if the stream contains a less number of lines than provided here.</param> /// <param name="defaultImportOptions">The default import options.</param> /// <returns>Import options that can be used in a following step to read in the ascii stream. Null is returned if the stream contains no data.</returns> public AsciiImportOptions Analyze(int nLines, AsciiImportOptions defaultImportOptions) { string sLine; stream.Position = 0; System.IO.StreamReader sr = new System.IO.StreamReader(stream, System.Text.Encoding.Default, true); System.Collections.ArrayList result = new System.Collections.ArrayList(); for (int i = 0; i < nLines; i++) { sLine = sr.ReadLine(); if (null == sLine) { break; } result.Add(new AsciiLineAnalyzer(i, sLine)); } if (result.Count == 0) { return(null); // there is nothing to analyze } // now view the results // calc the frequency o System.Collections.SortedList sl = new System.Collections.SortedList(); int nItems; // first the tabs /* * sl.Clear(); * for(int i=0;i<result.Count;i++) * { * nItems = ((AsciiLineAnalyzer)result[i]).nNumberOfTabs; * if(0!=nItems) * { * if(null==sl[nItems]) * sl.Add(nItems,1); * else * sl[nItems] = 1+(int)sl[nItems]; * } * } * // get the tab count with the topmost frequency * int nMaxNumberOfSameTabs = 0; * int nMaxTabsOfSameNumber = 0; * for(int i=0;i<sl.Count;i++) * { * if(nMaxNumberOfSameTabs<(int)sl.GetByIndex(i)) * { * nMaxNumberOfSameTabs = (int)sl.GetByIndex(i); * nMaxTabsOfSameNumber = (int)sl.GetKey(i); * } * } */ // Count the commas sl.Clear(); for (int i = 0; i < result.Count; i++) { nItems = ((AsciiLineAnalyzer)result[i]).nNumberOfCommas; if (0 != nItems) { if (null == sl[nItems]) { sl.Add(nItems, 1); } else { sl[nItems] = 1 + (int)sl[nItems]; } } } // get the comma count with the topmost frequency int nMaxNumberOfSameCommas = 0; int nMaxCommasOfSameNumber = 0; for (int i = 0; i < sl.Count; i++) { if (nMaxNumberOfSameCommas < (int)sl.GetByIndex(i)) { nMaxNumberOfSameCommas = (int)sl.GetByIndex(i); nMaxCommasOfSameNumber = (int)sl.GetKey(i); } } // Count the semicolons sl.Clear(); for (int i = 0; i < result.Count; i++) { nItems = ((AsciiLineAnalyzer)result[i]).nNumberOfSemicolons; if (0 != nItems) { if (null == sl[nItems]) { sl.Add(nItems, 1); } else { sl[nItems] = 1 + (int)sl[nItems]; } } } // get the tab count with the topmost frequency int nMaxNumberOfSameSemicolons = 0; int nMaxSemicolonsOfSameNumber = 0; for (int i = 0; i < sl.Count; i++) { if (nMaxNumberOfSameSemicolons < (int)sl.GetByIndex(i)) { nMaxNumberOfSameSemicolons = (int)sl.GetByIndex(i); nMaxSemicolonsOfSameNumber = (int)sl.GetKey(i); } } NumberAndStructure[] st = new NumberAndStructure[3]; for (int i = 0; i < 3; i++) { st[i].nLines = GetPriorityOf(result, (AsciiLineAnalyzer.Separation)i, ref st[i].structure); } // look for the top index int nMaxLines = int.MinValue; double maxprtylines = 0; int nBestSeparator = int.MinValue; for (int i = 0; i < 3; i++) { double prtylines = (double)st[i].nLines * st[i].structure.Priority; if (prtylines == maxprtylines) { if (st[i].nLines > nMaxLines) { nMaxLines = st[i].nLines; nBestSeparator = i; } } else if (prtylines > maxprtylines) { maxprtylines = prtylines; nBestSeparator = i; nMaxLines = st[i].nLines; } } AsciiImportOptions opt = defaultImportOptions.Clone(); opt.bDelimited = true; opt.cDelimiter = nBestSeparator == 0 ? '\t' : (nBestSeparator == 1 ? ',' : ';'); opt.recognizedStructure = st[nBestSeparator].structure; // look how many header lines are in the file by comparing the structure of the first lines with the recognized structure for (int i = 0; i < result.Count; i++) { opt.nMainHeaderLines = i; if (((AsciiLineAnalyzer)result[i]).structure[nBestSeparator].IsCompatibleWith(opt.recognizedStructure)) { break; } } // calculate the total statistics of decimal separators opt.m_DecimalSeparatorCommaCount = 0; opt.m_DecimalSeparatorDotCount = 0; for (int i = 0; i < result.Count; i++) { opt.m_DecimalSeparatorDotCount += ((AsciiLineAnalyzer)result[i]).structure[nBestSeparator].DecimalSeparatorDotCount; opt.m_DecimalSeparatorCommaCount += ((AsciiLineAnalyzer)result[i]).structure[nBestSeparator].DecimalSeparatorCommaCount; } return(opt); }
/// <summary> /// Analyzes the first <code>nLines</code> of the ascii stream. /// </summary> /// <param name="nLines">The number of lines to analyze. It is no error if the stream contains a less number of lines than provided here.</param> /// <param name="defaultImportOptions">The default import options.</param> /// <returns>Import options that can be used in a following step to read in the ascii stream. Null is returned if the stream contains no data.</returns> public AsciiImportOptions Analyze(int nLines, AsciiImportOptions defaultImportOptions) { string sLine; stream.Position = 0; System.IO.StreamReader sr = new System.IO.StreamReader(stream,System.Text.Encoding.Default,true); System.Collections.ArrayList result = new System.Collections.ArrayList(); for(int i=0;i<nLines;i++) { sLine = sr.ReadLine(); if(null==sLine) break; result.Add(new AsciiLineAnalyzer(i,sLine)); } if(result.Count==0) return null; // there is nothing to analyze // now view the results // calc the frequency o System.Collections.SortedList sl= new System.Collections.SortedList(); int nItems; // first the tabs /* sl.Clear(); for(int i=0;i<result.Count;i++) { nItems = ((AsciiLineAnalyzer)result[i]).nNumberOfTabs; if(0!=nItems) { if(null==sl[nItems]) sl.Add(nItems,1); else sl[nItems] = 1+(int)sl[nItems]; } } // get the tab count with the topmost frequency int nMaxNumberOfSameTabs = 0; int nMaxTabsOfSameNumber = 0; for(int i=0;i<sl.Count;i++) { if(nMaxNumberOfSameTabs<(int)sl.GetByIndex(i)) { nMaxNumberOfSameTabs = (int)sl.GetByIndex(i); nMaxTabsOfSameNumber = (int)sl.GetKey(i); } } */ // Count the commas sl.Clear(); for(int i=0;i<result.Count;i++) { nItems = ((AsciiLineAnalyzer)result[i]).nNumberOfCommas; if(0!=nItems) { if(null==sl[nItems]) sl.Add(nItems,1); else sl[nItems] = 1+(int)sl[nItems]; } } // get the comma count with the topmost frequency int nMaxNumberOfSameCommas = 0; int nMaxCommasOfSameNumber = 0; for(int i=0;i<sl.Count;i++) { if(nMaxNumberOfSameCommas<(int)sl.GetByIndex(i)) { nMaxNumberOfSameCommas = (int)sl.GetByIndex(i); nMaxCommasOfSameNumber = (int)sl.GetKey(i); } } // Count the semicolons sl.Clear(); for(int i=0;i<result.Count;i++) { nItems = ((AsciiLineAnalyzer)result[i]).nNumberOfSemicolons; if(0!=nItems) { if(null==sl[nItems]) sl.Add(nItems,1); else sl[nItems] = 1+(int)sl[nItems]; } } // get the tab count with the topmost frequency int nMaxNumberOfSameSemicolons = 0; int nMaxSemicolonsOfSameNumber = 0; for(int i=0;i<sl.Count;i++) { if(nMaxNumberOfSameSemicolons<(int)sl.GetByIndex(i)) { nMaxNumberOfSameSemicolons = (int)sl.GetByIndex(i); nMaxSemicolonsOfSameNumber = (int)sl.GetKey(i); } } NumberAndStructure[] st = new NumberAndStructure[3]; for(int i=0;i<3;i++) { st[i].nLines = GetPriorityOf(result,(AsciiLineAnalyzer.Separation)i,ref st[i].structure); } // look for the top index int nMaxLines = int.MinValue; double maxprtylines=0; int nBestSeparator = int.MinValue; for(int i=0;i<3;i++) { double prtylines = (double)st[i].nLines * st[i].structure.Priority; if(prtylines==maxprtylines) { if(st[i].nLines > nMaxLines) { nMaxLines = st[i].nLines; nBestSeparator = i; } } else if(prtylines>maxprtylines) { maxprtylines = prtylines; nBestSeparator = i; nMaxLines=st[i].nLines; } } AsciiImportOptions opt = defaultImportOptions.Clone(); opt.bDelimited = true; opt.cDelimiter = nBestSeparator==0 ? '\t' : (nBestSeparator==1 ? ',' : ';'); opt.recognizedStructure = st[nBestSeparator].structure; // look how many header lines are in the file by comparing the structure of the first lines with the recognized structure for(int i=0;i<result.Count;i++) { opt.nMainHeaderLines=i; if(((AsciiLineAnalyzer)result[i]).structure[nBestSeparator].IsCompatibleWith(opt.recognizedStructure)) break; } // calculate the total statistics of decimal separators opt.m_DecimalSeparatorCommaCount=0; opt.m_DecimalSeparatorDotCount=0; for(int i=0;i<result.Count;i++) { opt.m_DecimalSeparatorDotCount += ((AsciiLineAnalyzer)result[i]).structure[nBestSeparator].DecimalSeparatorDotCount; opt.m_DecimalSeparatorCommaCount += ((AsciiLineAnalyzer)result[i]).structure[nBestSeparator].DecimalSeparatorCommaCount; } return opt; }