private List <DelimiterInfo> GetDelimiters(string[][] data) { var frequency = new Dictionary <char, int>(); int lines = 0; for (int i = 0; i < data.Length; i++) { for (int j = 0; j < data[i].Length; j++) { // Ignore Header Line (if any) if (j == 0) { continue; } // ignore empty lines string line = data[i][j]; if (string.IsNullOrEmpty(line)) { continue; } // analyse line lines++; for (int ci = 0; ci < line.Length; ci++) { char c = line[ci]; if (char.IsLetterOrDigit(c) || c == ' ') { continue; } int count; if (frequency.TryGetValue(c, out count)) { count++; frequency[c] = count; } else { frequency.Add(c, 1); } } } } var candidates = new List <DelimiterInfo>(); // sanity check if (lines == 0) { return(candidates); } // remove delimiters with low occurrence count var delimiters = new List <char> (frequency.Count); foreach (var pair in frequency) { if (pair.Value >= lines) { delimiters.Add(pair.Key); } } // calculate foreach (var key in delimiters) { var indicators = Indicators.CalculateByDelimiter(key, data, QuotedChar); // Adjust based on the number of lines if (lines < MinSampleData) { indicators.Deviation = indicators.Deviation * Math.Min(1, ((double)lines) / MinSampleData); } if (indicators.Avg > 1 && indicators.Deviation < MinDelimitedDeviation) { candidates.Add(new DelimiterInfo(key, indicators.Avg, indicators.Max, indicators.Min, indicators.Deviation)); } } return(candidates); }
/// <summary> /// Calculate statistics based on sample data for the delimitter supplied /// </summary> /// <param name="data"></param> /// <param name="delimiter"></param> /// <returns></returns> private DelimiterInfo GetDelimiterInfo(string[][] data, char delimiter) { var indicators = Indicators.CalculateByDelimiter(delimiter, data, QuotedChar); return(new DelimiterInfo(delimiter, indicators.Avg, indicators.Max, indicators.Min, indicators.Deviation)); }