Example #1
0
        bool HeadersInData(DelimiterInfo info, string[] headerValues, string[] rows)
        {
            var duplicate = 0;
            var first     = true;

            foreach (var row in rows)
            {
                if (first)
                {
                    first = false;
                    continue;
                }
                var values = row.Split(new char[] { info.Delimiter });
                if (values.Length != headerValues.Length)
                {
                    continue;
                }

                for (int i = 0; i < values.Length; i++)
                {
                    if (values [i] == headerValues [i])
                    {
                        duplicate++;
                    }
                }
            }

            return(duplicate >= rows.Length * 0.25);
        }
        private void AdjustConfidence(RecordFormatInfo format, DelimiterInfo info)
        {
            switch (info.Delimiter)
            {
            case '"':      // Avoid the quote identifier
            case '\'':     // Avoid the quote identifier
                format.mConfidence = (int)(format.Confidence * 0.2);
                break;

            case '/':     // Avoid the date delimiters and url to be selected
            case '.':     // Avoid the decimal separator to be selected
                format.mConfidence = (int)(format.Confidence * 0.4);
                break;

            case '@':     // Avoid the mails separator to be selected
            case '&':     // Avoid this is near a letter and URLS
            case '=':     // Avoid because URLS contains it
                format.mConfidence = (int)(format.Confidence * 0.6);
                break;

            case '-':     // Avoid this other date separator
                format.mConfidence = (int)(format.Confidence * 0.7);
                break;

            case ',':     // Help the , ; tab ~ to be confident
            case ';':
            case '\t':
            case '~':
                format.mConfidence = (int)Math.Min(100, format.Confidence * 1.15);
                break;
            }
        }
Example #3
0
 bool SameFirstLine(DelimiterInfo info, string[][] sampleData)
 {
     for (int i = 1; i < sampleData.Length; i++)
     {
         if (!SameHeaders(info, sampleData [0][0], sampleData [i][0]))
         {
             return(false);
         }
     }
     return(true);
 }
Example #4
0
        bool DetectIfContainsHeaders(DelimiterInfo info, string[][] sampleData)
        {
            if (sampleData.Length >= 2)
            {
                return(SameFirstLine(info, sampleData));
            }

            if (sampleData.Length >= 1)
            {
                var firstLine = sampleData [0] [0].Split(new char[] { info.Delimiter });
                var res       = AreAllHeaders(firstLine);
                if (res == false)
                {
                    return(false);                    // if has headers that starts with numbers so near sure are data and no header is present
                }
                if (HeadersInData(info, firstLine, sampleData[0]))
                {
                    return(false);
                }

                return(true);
            }
            return(false);
        }
        private void AdjustConfidence(RecordFormatInfo format, DelimiterInfo info)
        {
            switch (info.Delimiter)
            {

                case '"':  // Avoid the quote identifier
                case '\'': // Avoid the quote identifier
                    format.mConfidence = (int)(format.Confidence * 0.2);
                    break;

                case '/': // Avoid the date delimiters and url to be selected
                case '.': // Avoid the decimal separator to be selected
                    format.mConfidence = (int)(format.Confidence * 0.4);
                    break;

                case '@': // Avoid the mails separator to be selected
                case '&': // Avoid this is near a letter and URLS
                case '=': // Avoid because URLS contains it
                    format.mConfidence = (int)(format.Confidence * 0.6);
                    break;

                case '-': // Avoid this other date separator
                    format.mConfidence = (int)(format.Confidence * 0.7);
                    break;

                case ',': // Help the , ; tab to be confident
                case ';':
                case '\t':
                    format.mConfidence = (int)Math.Min(100, format.Confidence * 1.15);
                    break;

            }
        }
Example #6
0
 bool SameHeaders(DelimiterInfo info, string line1, string line2)
 {
     return(line1.Replace(info.Delimiter.ToString(), "").Trim()
            == line2.Replace(info.Delimiter.ToString(), "").Trim());
 }
 bool SameHeaders(DelimiterInfo info, string line1, string line2)
 {
     return line1.Replace (info.Delimiter.ToString (), "").Trim ()
     == line2.Replace (info.Delimiter.ToString (), "").Trim ();
 }
 bool SameFirstLine(DelimiterInfo info, string[][] sampleData)
 {
     for (int i = 1; i < sampleData.Length; i++) {
         if (!SameHeaders (info, sampleData [0][0], sampleData [i][0]))
             return false;
     }
     return true;
 }
        bool HeadersInData(DelimiterInfo info, string[] headerValues, string[] rows)
        {
            var duplicate = 0;
            var first = true;
            foreach (var row in rows) {
                if (first) {
                    first = false;
                    continue;

                }
                var values = row.Split (new char[]{ info.Delimiter });
                if (values.Length != headerValues.Length)
                    continue;

                for (int i = 0; i < values.Length; i++) {
                    if (values [i] == headerValues [i])
                        duplicate++;
                }
            }

            return duplicate >= rows.Length * 0.25;
        }
        bool DetectIfContainsHeaders(DelimiterInfo info, string[][] sampleData)
        {
            if (sampleData.Length >= 2) {
                return SameFirstLine (info, sampleData);
            }

            if (sampleData.Length >= 1) {
                var firstLine = sampleData [0] [0].Split (new char[]{ info.Delimiter });
                var res = AreAllHeaders (firstLine);
                if (res == false)
                    return false; // if has headers that starts with numbers so near sure are data and no header is present

                if (HeadersInData(info, firstLine, sampleData[0]))
                    return false;

                return true;

            }
            return false;
        }