Esempio n. 1
0
        private List <DelimiterInfo> GetDelimiters(string[][] data)
        {
            var frequency = new Dictionary <char, int>();
            int lines     = 0;

            for (int i = 0; i < data.Length; i++)
            {
                for (int j = 0; j < data[i].Length; j++)
                {
                    // Ignore Header Line (if any)
                    if (j == 0)
                    {
                        continue;
                    }
                    // ignore empty lines
                    string line = data[i][j];
                    if (string.IsNullOrEmpty(line))
                    {
                        continue;
                    }

                    // analyse line
                    lines++;

                    for (int ci = 0; ci < line.Length; ci++)
                    {
                        char c = line[ci];

                        if (char.IsLetterOrDigit(c)
                            ||
                            c == ' ')
                        {
                            continue;
                        }

                        int count;
                        if (frequency.TryGetValue(c, out count))
                        {
                            count++;
                            frequency[c] = count;
                        }
                        else
                        {
                            frequency.Add(c, 1);
                        }
                    }
                }
            }

            var candidates = new List <DelimiterInfo>();

            // sanity check
            if (lines == 0)
            {
                return(candidates);
            }

            // remove delimiters with low occurrence count
            var delimiters = new List <char> (frequency.Count);

            foreach (var pair in frequency)
            {
                if (pair.Value >= lines)
                {
                    delimiters.Add(pair.Key);
                }
            }

            // calculate
            foreach (var key in delimiters)
            {
                var indicators = Indicators.CalculateByDelimiter(key, data, QuotedChar);
                // Adjust based on the number of lines
                if (lines < MinSampleData)
                {
                    indicators.Deviation = indicators.Deviation * Math.Min(1, ((double)lines) / MinSampleData);
                }
                if (indicators.Avg > 1 &&
                    indicators.Deviation < MinDelimitedDeviation)
                {
                    candidates.Add(new DelimiterInfo(key, indicators.Avg, indicators.Max, indicators.Min, indicators.Deviation));
                }
            }

            return(candidates);
        }
Esempio n. 2
0
        /// <summary>
        /// Calculate statistics based on sample data for the delimitter supplied
        /// </summary>
        /// <param name="data"></param>
        /// <param name="delimiter"></param>
        /// <returns></returns>
        private DelimiterInfo GetDelimiterInfo(string[][] data, char delimiter)
        {
            var indicators = Indicators.CalculateByDelimiter(delimiter, data, QuotedChar);

            return(new DelimiterInfo(delimiter, indicators.Avg, indicators.Max, indicators.Min, indicators.Deviation));
        }