예제 #1
0
        private ParseFileResult ParseDataFile(Configuration configuration, byte[] csvBytes)
        {
            var csvText = ReadTextFromBytes(configuration, csvBytes);

            var rowParser = new RowParser(configuration, ResultsAppender, LocationInfo);

            var(prefaceLines, dataRowReader) = ExtractPrefaceLines(configuration, csvText);

            rowParser.AddPrefaceLines(prefaceLines);

            if (!rowParser.IsPrefaceValid())
            {
                return(ParseFileResult.CannotParse());
            }

            var dataRowCount = 0;

            using (var reader = dataRowReader)
            {
                var separator   = configuration.Separator ?? DefaultFieldSeparator;
                var fieldParser = GetCsvParser(reader, separator);

                while (!fieldParser.EndOfData)
                {
                    rowParser.LineNumber = rowParser.PrefaceLineCount + fieldParser.LineNumber;

                    string[] fields = null;

                    try
                    {
                        fields = fieldParser.ReadFields();
                    }
                    catch (Exception)
                    {
                        if (dataRowCount == 0)
                        {
                            // We'll hit this when the plugin tries to parse a text file that is not CSV, like a JSON document.
                            return(ParseFileResult.CannotParse());
                        }
                    }

                    if (fields == null)
                    {
                        continue;
                    }

                    if (fields.Length > 0 && fields.All(string.IsNullOrEmpty))
                    {
                        continue;
                    }

                    if (fields.Length > 0 && !string.IsNullOrEmpty(configuration.CommentLinePrefix) && fields[0].StartsWith(configuration.CommentLinePrefix))
                    {
                        continue;
                    }

                    if (dataRowCount == 0 && configuration.IsHeaderRowRequired)
                    {
                        try
                        {
                            if (rowParser.IsHeaderFullyParsed(fields))
                            {
                                ++dataRowCount;
                            }

                            continue;
                        }
                        catch (Exception exception)
                        {
                            // Most CSV files have a header.
                            // So a problem mapping the header is a strong indicator that this CSV file is not intended for us.
                            if (exception is AllHeadersMissingException)
                            {
                                // When all headers are missing, then we should exit without logging anything special.
                                // We'll just let the other plugins have a turn
                            }
                            else
                            {
                                // Some of the headers matched, so log a warning before returning CannotParse.
                                // This might be the only hint in the log that the configuration is incorrect.
                                Log.Info($"Partial headers detected: {exception.Message}");
                            }

                            return(ParseFileResult.CannotParse());
                        }
                    }

                    if (IsFooterDetected(configuration, () => string.Join(separator, fields)))
                    {
                        break;
                    }

                    try
                    {
                        rowParser.Parse(fields);
                    }
                    catch (Exception exception)
                    {
                        if (!ResultsAppender.AnyResultsAppended)
                        {
                            throw;
                        }

                        Log.Error($"Ignoring invalid CSV row: {exception.Message}");
                    }

                    ++dataRowCount;
                }

                if (dataRowCount == 0)
                {
                    if (configuration.NoDataRowsExpected)
                    {
                        // When all the data comes from the preface, then this will parse the preface context
                        rowParser.Parse(new string[0]);
                    }
                    else
                    {
                        // No rows were parsed.
                        if (rowParser.RemainingHeaderLines > 0)
                        {
                            return(ParseFileResult.CannotParse("No matching header row was detected."));
                        }

                        return(ParseFileResult.CannotParse());
                    }
                }

                Log.Info($"Configuration='{configuration.Id}' Priority={configuration.Priority} parsed {ResultsAppender.SummaryInfo()}.");

                return(ParseFileResult.SuccessfullyParsedAndDataValid());
            }
        }