Example #1
0
        public ParallelQuery <CsvMappingResult <TEntity> > Parse(IEnumerable <Row> csvData)
        {
            if (csvData == null)
            {
                throw new ArgumentNullException(nameof(csvData));
            }

            var query = csvData
                        .Skip(options.SkipHeader ? 1 : 0)
                        .AsParallel();

            // If you want to get the same order as in the CSV file, this option needs to be set:
            if (options.KeepOrder)
            {
                query = query.AsOrdered();
            }

            query = query
                    .WithDegreeOfParallelism(options.DegreeOfParallelism)
                    .Where(row => !string.IsNullOrWhiteSpace(row.Data));

            // Ignore Lines, that start with a comment character:
            if (!string.IsNullOrWhiteSpace(options.CommentCharacter))
            {
                query = query.Where(line => !line.Data.StartsWith(options.CommentCharacter));
            }

            return(query
                   .Select(line => new TokenizedRow(line.Index, options.Tokenizer.Tokenize(line.Data)))
                   .Select(fields => mapping.Map(fields)));
        }
Example #2
0
        public IEnumerable <CsvMappingResult <TEntity> > Parse(string rowData, int numberOfProperties)
        {
            if (rowData == null)
            {
                throw new ArgumentNullException(nameof(rowData));
            }

            // This could be too huge for In-Memory. Maybe optimize it, so you
            // are using IEnumerable:
            var tokens = options.Tokenizer.Tokenize(rowData);

            return(tokens
                   // Now we Batch the Columns into smaller groups, so they resemble a Row:
                   .Batch(numberOfProperties)
                   .Select((x, i) => new TokenizedRow(i * numberOfProperties, x.ToArray()))
                   .Select(x => mapping.Map(x)));
        }