private void DetectQuoted(RecordFormatInfo format, string[][] data)
 {
     if (format.ClassBuilder is FixedLengthClassBuilder)
     {
         return;
     }
 }
Exemple #2
0
        private void CreateDelimiterOptions(string[][] sampleData, List <RecordFormatInfo> res, char delimiter)
        {
            List <DelimiterInfo> delimiters = new List <DelimiterInfo>();

            if (delimiter == '\0')
            {
                delimiters = GetDelimiters(sampleData);
            }
            else
            {
                delimiters.Add(GetDelimiterInfo(sampleData, delimiter));
            }

            foreach (DelimiterInfo info in delimiters)
            {
                RecordFormatInfo format = new RecordFormatInfo();
                format.mConfidence = (int)((1 - info.Deviation) * 100);
                AdjustConfidence(format, info);

                DelimitedClassBuilder builder = new DelimitedClassBuilder("AutoDetectedClass", info.Delimiter.ToString());
                builder.AddFields(info.AvergeByLine + 1);

                format.mClassBuilder = builder;

                res.Add(format);
            }
        }
        private void AdjustConfidence(RecordFormatInfo format, DelimiterInfo info)
        {
            switch (info.Delimiter)
            {
            case '"':      // Avoid the quote identifier
            case '\'':     // Avoid the quote identifier
                format.mConfidence = (int)(format.Confidence * 0.2);
                break;

            case '/':     // Avoid the date delimiters and url to be selected
            case '.':     // Avoid the decimal separator to be selected
                format.mConfidence = (int)(format.Confidence * 0.4);
                break;

            case '@':     // Avoid the mails separator to be selected
            case '&':     // Avoid this is near a letter and URLS
            case '=':     // Avoid because URLS contains it
                format.mConfidence = (int)(format.Confidence * 0.6);
                break;

            case '-':     // Avoid this other date separator
                format.mConfidence = (int)(format.Confidence * 0.7);
                break;

            case ',':     // Help the , ; tab ~ to be confident
            case ';':
            case '\t':
            case '~':
                format.mConfidence = (int)Math.Min(100, format.Confidence * 1.15);
                break;
            }
        }
Exemple #4
0
        // DELIMITED

        private void CreateDelimiterOptions(string[][] sampleData, List <RecordFormatInfo> res, char delimiter = '\0')
        {
            var delimiters = new List <DelimiterInfo>();

            if (delimiter == '\0')
            {
                delimiters = GetDelimiters(sampleData);
            }
            else
            {
                delimiters.Add(GetDelimiterInfo(sampleData, delimiter));
            }

            foreach (var info in delimiters)
            {
                var format = new RecordFormatInfo {
                    mConfidence = (int)((1 - info.Deviation) * 100)
                };
                AdjustConfidence(format, info);
                var fileHasHeaders = false;
                if (FileHasHeaders.HasValue)
                {
                    fileHasHeaders = FileHasHeaders.Value;
                }
                else
                {
                    fileHasHeaders = DetectIfContainsHeaders(info, sampleData);
                }
                var builder = new DelimitedClassBuilder("AutoDetectedClass", info.Delimiter.ToString())
                {
                    IgnoreFirstLines = fileHasHeaders
                        ? 1
                        : 0
                };

                var firstLineSplitted = sampleData[0][0].Split(info.Delimiter);
                for (int i = 0; i < info.Max + 1; i++)
                {
                    string name = "Field " + (i + 1).ToString().PadLeft(3, '0');
                    if (fileHasHeaders && i < firstLineSplitted.Length)
                    {
                        name = firstLineSplitted[i];
                    }

                    var f = builder.AddField(StringHelper.ToValidIdentifier(name));
                    if (i > info.Min)
                    {
                        f.FieldOptional = true;
                    }
                }

                format.mClassBuilder = builder;

                res.Add(format);
            }
        }
Exemple #5
0
        // FIXED LENGTH
        private void CreateFixedLengthOptions(string[][] data, List <RecordFormatInfo> res)
        {
            var format = new RecordFormatInfo();
            var stats  = Indicators.CalculateAsFixedSize(data);

            format.mConfidence = (int)(Math.Max(0, 1 - stats.Deviation / stats.Avg) * 100);

            var builder = new FixedLengthClassBuilder("AutoDetectedClass");

            CreateFixedLengthFields(data, builder);

            format.mClassBuilder = builder;

            res.Add(format);
        }
        // FIXED LENGTH
        private void CreateFixedLengthOptions(string[][] data, List <RecordFormatInfo> res)
        {
            var    format    = new RecordFormatInfo();
            double average   = CalculateAverageLineWidth(data);
            double deviation = CalculateDeviationLineWidth(data, average);

            format.mConfidence = (int)(Math.Max(0, 1 - deviation / average) * 100);

            var builder = new FixedLengthClassBuilder("AutoDetectedClass");

            CreateFixedLengthFields(data, builder);

            format.mClassBuilder = builder;

            res.Add(format);
        }
        private void AssertFormat(RecordFormatInfo[] formats,
            string delimiter,
            int fields,
            int confidence,
            int numFormats)
        {
            if (numFormats > 0)
                Assert.AreEqual(numFormats, formats.Length);
            else
                Assert.IsTrue(formats.Length > 0);

            if (confidence > 0)
                Assert.IsTrue(formats[0].Confidence >= confidence);

            Assert.IsTrue(formats[0].ClassBuilder is DelimitedClassBuilder);
            Assert.AreEqual(delimiter, ((DelimitedClassBuilder) formats[0].ClassBuilder).Delimiter);
            Assert.AreEqual(fields, formats[0].ClassBuilder.FieldCount);
        }
 private void DetectOptionals(RecordFormatInfo option, string[][] data)
 {
 }
        // FIXED LENGTH
        private void CreateFixedLengthOptions(string[][] data, List<RecordFormatInfo> res)
        {
            RecordFormatInfo format = new RecordFormatInfo();
            double average = CalculateAverageLineWidth(data);
            double deviation = CalculateDeviationLineWidth(data, average);

            format.mConfidence = (int)(Math.Max(0, 1 - deviation / average) * 100);

            FixedLengthClassBuilder builder = new FixedLengthClassBuilder("AutoDetectedClass");
            CreateFixedLengthFields(data, builder);

            format.mClassBuilder = builder;

            res.Add(format);
        }
        private void CreateDelimiterOptions(string[][] sampleData, List<RecordFormatInfo> res, char delimiter)
        {
            List<DelimiterInfo> delimiters = new List<DelimiterInfo>();

            if (delimiter == '\0')
                delimiters = GetDelimiters(sampleData);
            else
                delimiters.Add(GetDelimiterInfo(sampleData, delimiter));

            foreach (DelimiterInfo info in delimiters)
            {
                RecordFormatInfo format = new RecordFormatInfo();
                format.mConfidence = (int)((1 - info.Deviation ) * 100);
                AdjustConfidence(format, info);

                DelimitedClassBuilder builder = new DelimitedClassBuilder("AutoDetectedClass", info.Delimiter.ToString());
                builder.IgnoreFirstLines = FileHasHeaders ? 1 : 0;
                var firstLineSplitted = sampleData[0][0].Split(info.Delimiter);
                for (int i = 0; i < info.Max + 1; i++)
                {
                    string name = "Field " + (i + 1).ToString().PadLeft(3, '0');
                    if (FileHasHeaders && i < firstLineSplitted.Length)
                        name = firstLineSplitted[i];

                    var f = builder.AddField(StringHelper.ToValidIdentifier(name));
                    if (i > info.Min)
                        f.FieldOptional = true;
                }

                format.mClassBuilder = builder;

                res.Add(format);
            }
        }
        private void AdjustConfidence(RecordFormatInfo format, DelimiterInfo info)
        {
            switch (info.Delimiter)
            {

                case '"':  // Avoid the quote identifier
                case '\'': // Avoid the quote identifier
                    format.mConfidence = (int)(format.Confidence * 0.2);
                    break;

                case '/': // Avoid the date delimiters and url to be selected
                case '.': // Avoid the decimal separator to be selected
                    format.mConfidence = (int)(format.Confidence * 0.4);
                    break;

                case '@': // Avoid the mails separator to be selected
                case '&': // Avoid this is near a letter and URLS
                case '=': // Avoid because URLS contains it
                    format.mConfidence = (int)(format.Confidence * 0.6);
                    break;

                case '-': // Avoid this other date separator
                    format.mConfidence = (int)(format.Confidence * 0.7);
                    break;

                case ',': // Help the , ; tab to be confident
                case ';':
                case '\t':
                    format.mConfidence = (int)Math.Min(100, format.Confidence * 1.15);
                    break;

            }
        }
 private void DetectOptionals(RecordFormatInfo option, string[][] data)
 {
 }
 private void DetectOptionals(RecordFormatInfo option, string[][] data)
 {
     // TODO: Try to detect optional fields
 }
 private void DetectQuoted(RecordFormatInfo format, string[][] data)
 {
     if (format.ClassBuilder is FixedLengthClassBuilder)
         return;
 }
Exemple #15
0
 private void DetectOptionals(RecordFormatInfo option, string[][] data)
 {
     // TODO: Try to detect optional fields
 }
Exemple #16
0
 private void DetectTypes(RecordFormatInfo format, string[][] data)
 {
     // TODO: Try to detect posible formats (mostly numbers or dates)
 }
        // FIXED LENGTH
        private void CreateFixedLengthOptions(string[][] data, List<RecordFormatInfo> res)
        {
            var format = new RecordFormatInfo();
            var stats = Indicators.CalculateAsFixedSize (data);

            format.mConfidence = (int)(Math.Max (0, 1 - stats.Deviation / stats.Avg) * 100);

            var builder = new FixedLengthClassBuilder("AutoDetectedClass");
            CreateFixedLengthFields(data, builder);

            format.mClassBuilder = builder;

            res.Add(format);
        }
 private void DetectTypes(RecordFormatInfo format, string[][] data)
 {
     // TODO: Try to detect posible formats (mostly numbers or dates)
 }
 private void DetectTypes(RecordFormatInfo format, string[][] data)
 {
 }
 private void DetectTypes(RecordFormatInfo format, string[][] data)
 {
 }