Beispiel #1
0
        /// <summary>
        /// Reads JSON objects from the stream.
        /// </summary>
        /// <param name="context">
        /// The context.
        /// </param>
        /// <param name="rowBuilder">
        /// The row builder.
        /// </param>
        /// <param name="reader">
        /// The stream.
        /// </param>
        /// <param name="fields">
        /// The fields, or <c>null</c> to retrieve all fields.
        /// </param>
        /// <returns>
        /// The rows.
        /// </returns>
        public IEnumerable <Row> Read(IFileFormatExecutionContext context, IRowBuilder rowBuilder, StreamReader reader, HashSet <string> fields)
        {
            var idx          = 0L;
            var serializer   = JsonSerializer.Create();
            var filterFields = fields == null
                                   ? (Func <KeyValuePair <string, object>, bool>)(kv => true)
                                   : kv => fields.Contains(kv.Key);

            using (var jsonTextReader = new JsonTextReader(reader))
            {
                jsonTextReader.SupportMultipleContent = true;
                jsonTextReader.Read();

                if (jsonTextReader.TokenType == JsonToken.StartArray)
                {
                    while (jsonTextReader.Read() && jsonTextReader.TokenType == JsonToken.StartObject)
                    {
                        yield return(rowBuilder.CreateRow(idx++, serializer.Deserialize <Dictionary <string, object> >(jsonTextReader).Where(filterFields)));
                    }
                }
                else
                {
                    do
                    {
                        yield return(rowBuilder.CreateRow(idx++, serializer.Deserialize <Dictionary <string, object> >(jsonTextReader).Where(filterFields)));
                    }while (jsonTextReader.Read() && jsonTextReader.TokenType == JsonToken.StartObject);
                }
            }
        }
Beispiel #2
0
        /// <summary>
        ///     Reads a reader as comma separated values.
        /// </summary>
        /// <param name="context">
        ///     The context.
        /// </param>
        /// <param name="rowBuilder">
        ///     The data Set.
        /// </param>
        /// <param name="reader">
        ///     The stream.
        /// </param>
        /// <param name="fields">
        ///     The fields, or <c>null</c> to retrieve all fields.
        /// </param>
        /// <returns>
        ///     The rows.
        /// </returns>
        public IEnumerable <Row> Read(IFileFormatExecutionContext context, IRowBuilder rowBuilder, StreamReader reader, HashSet <string> fields)
        {
            var separator = context.GetDefault("SEPARATOR", false) as string ?? ",";
            var splitter  = GetSplitter(separator);
            var headers   = GetHeaders(splitter, reader, separator);

            if (headers.Length == 1 && string.IsNullOrEmpty(headers[0]))
            {
                yield break;
            }

            var idx = 0L;

            do
            {
                var line = splitter.Matches($"{reader.ReadLine()}{separator}")
                           .Cast <Match>()
                           .Select(match => match.Groups[1].Value)
                           .Select(value => value.Trim())
                           .Select(value => EscapedString.IsMatch(value) ? value.Substring(1, value.Length - 2).Replace("\"\"", "\"") : value)
                           .ToArray();

                if (line.Length == headers.Length)
                {
                    // ReSharper disable once AccessToDisposedClosure
                    yield return(rowBuilder.CreateRow(idx++, headers.Select((header, i) => new KeyValuePair <string, object>(header, line[i]))));
                }
            }while (!reader.EndOfStream);
        }
Beispiel #3
0
        /// <summary>
        /// Writes the footer to the file.
        /// </summary>
        /// <param name="context">
        /// The context.
        /// </param>
        /// <param name="writer">
        ///     The stream.
        /// </param>
        public void WriteFooter(IFileFormatExecutionContext context, StreamWriter writer)
        {
            //// If we have no rows, clean up the stream.
            this.newStreams.Remove(writer);

            writer.Write("]");
        }
Beispiel #4
0
        /// <summary>
        ///     Writes rows to the file.
        /// </summary>
        /// <param name="context">
        ///     The context.
        /// </param>
        /// <param name="writer">
        ///     The stream.
        /// </param>
        /// <param name="rows">
        ///     The rows to write.
        /// </param>
        /// <param name="upsert">
        ///     True to UPSERT, false to INSERT.
        /// </param>
        /// <returns>
        ///     The number of rows that were written.
        /// </returns>
        public long WriteRows(IFileFormatExecutionContext context, StreamWriter writer, IEnumerable <Row> rows, bool upsert)
        {
            var package = this.packages[writer.BaseStream];
            var sheet   = package.ExcelPackage.Workbook.Worksheets[package.Sheet];
            var count   = 0;

            foreach (var row in rows)
            {
                package.RecordPosition++;
                count++;

                if (package.RecordPosition == 1048576)
                {
                    sheet.Cells[1, 1, Math.Min(100, sheet.Dimension.Rows), sheet.Dimension.Columns].AutoFitColumns();

                    if (package.ExcelPackage.Workbook.Worksheets.Count == 1)
                    {
                        context.Logger.Warning("More than 1048575 records found, exporting to multiple sheets.");
                    }

                    package.Sheet = $"Data ({package.ExcelPackage.Workbook.Worksheets.Count + 1})";
                    package.ExcelPackage.Workbook.Worksheets.Add(package.Sheet);
                    package.RecordPosition = 1;

                    sheet = package.ExcelPackage.Workbook.Worksheets[package.Sheet];

                    var idx = 0;

                    foreach (var field in package.Header)
                    {
                        sheet.Cells[1, ++idx].Value         = field;
                        sheet.Cells[1, idx].Style.Font.Bold = true;
                    }
                }

                var col = 0;
                foreach (var column in row.ColumnNames)
                {
                    var value = row[column];
                    var cell  = sheet.Cells[package.RecordPosition + 1, ++col];

                    cell.Value = row[column];

                    if (value is string)
                    {
                        cell.Style.Numberformat.Format = "@";
                    }
                    else if (value is DateTime)
                    {
                        cell.Style.Numberformat.Format = "m/d/yy h:mm";
                    }
                    else if (value is Error)
                    {
                        cell.Style.Font.Bold = true;
                    }
                }
            }

            return(count);
        }
Beispiel #5
0
        /// <summary>
        ///     Writes rows to the file.
        /// </summary>
        /// <param name="context">
        ///     The context.
        /// </param>
        /// <param name="writer">
        ///     The stream.
        /// </param>
        /// <param name="rows">
        ///     The rows to write.
        /// </param>
        /// <param name="upsert">
        ///     True to upsert, false to insert.
        /// </param>
        /// <returns>
        ///     The number of rows that were written.
        /// </returns>
        public long WriteRows(IFileFormatExecutionContext context, StreamWriter writer, IEnumerable <Row> rows, bool upsert)
        {
            var count = 0L;

            foreach (var row in rows)
            {
                writer.WriteLine(string.Join(",", row.ColumnNames.Select(c => Escape(row[c]))));
                count++;
            }

            return(count);
        }
Beispiel #6
0
        /// <summary>
        ///     Writes the footer to the file.
        /// </summary>
        /// <param name="context">
        ///     The context.
        /// </param>
        /// <param name="writer">
        ///     The stream.
        /// </param>
        public void WriteFooter(IFileFormatExecutionContext context, StreamWriter writer)
        {
            var package = this.packages[writer.BaseStream];
            var sheet   = package.ExcelPackage.Workbook.Worksheets[package.Sheet];

            if (sheet.Dimension != null)
            {
                sheet.Cells[1, 1, Math.Min(100, sheet.Dimension.Rows), sheet.Dimension.Columns].AutoFitColumns();
            }

            this.packages[writer.BaseStream].ExcelPackage.SaveAs(writer.BaseStream);
            this.packages[writer.BaseStream].ExcelPackage.Dispose();
            this.packages.Remove(writer.BaseStream);

            writer.BaseStream.Flush();
        }
Beispiel #7
0
        /// <summary>
        ///     Writes the header to the file.
        /// </summary>
        /// <param name="context">
        ///     The context.
        /// </param>
        /// <param name="writer">
        ///     The stream.
        /// </param>
        /// <param name="fields">
        ///     The fields.
        /// </param>
        public void WriteHeader(IFileFormatExecutionContext context, StreamWriter writer, IEnumerable <string> fields)
        {
            var package = this.packages[writer.BaseStream] = new Package
            {
                ExcelPackage = new ExcelPackage(),
                Sheet        = "Data",
                Header       = fields.ToArray()
            };

            var sheet = package.ExcelPackage.Workbook.Worksheets.Add(package.Sheet);
            var idx   = 0;

            foreach (var field in package.Header)
            {
                sheet.Cells[1, ++idx].Value         = field;
                sheet.Cells[1, idx].Style.Font.Bold = true;
            }
        }
Beispiel #8
0
        /// <summary>
        ///     Reads objects from the stream.
        /// </summary>
        /// <param name="context">
        ///     The context.
        /// </param>
        /// <param name="rowBuilder">
        ///     The row builder.
        /// </param>
        /// <param name="reader">
        ///     The stream.
        /// </param>
        /// <param name="fields">
        ///     The fields, or <c>null</c> to retrieve all fields.
        /// </param>
        /// <returns>
        ///     The rows.
        /// </returns>
        public IEnumerable <Row> Read(IFileFormatExecutionContext context, IRowBuilder rowBuilder, StreamReader reader, HashSet <string> fields)
        {
            using (var package = new ExcelPackage(reader.BaseStream))
            {
                var sheet = package.Workbook.Worksheets.FirstOrDefault();

                if (sheet == null)
                {
                    yield break;
                }

                long idx     = 0;
                var  headers = Enumerable.Range(1, sheet.Dimension.End.Column).Select(col => sheet.Cells[1, col].Value?.ToString()).TakeWhile(header => header != null).ToArray();

                foreach (var range in Enumerable.Range(2, sheet.Dimension.End.Row - 1).Select(row => sheet.Cells[row, 1, row, headers.Length].Select(cell => cell.Value)))
                {
                    yield return(rowBuilder.CreateRow(idx++, headers.Zip(range, (header, value) => new KeyValuePair <string, object>(header, value))));
                }
            }
        }
Beispiel #9
0
        /// <summary>
        /// Writes rows to the file.
        /// </summary>
        /// <param name="context">
        /// The context.
        /// </param>
        /// <param name="writer">
        ///     The stream.
        /// </param>
        /// <param name="rows">
        ///     The rows to write.
        /// </param>
        /// <param name="upsert">
        ///     True to upsert, false to insert.
        /// </param>
        /// <returns>
        /// The number of rows that were written.
        /// </returns>
        public long WriteRows(IFileFormatExecutionContext context, StreamWriter writer, IEnumerable <Row> rows, bool upsert)
        {
            var count     = 0L;
            var serialize = JsonSerializer.Create();
            var first     = this.newStreams.Remove(writer);

            foreach (var row in rows)
            {
                if (!first)
                {
                    writer.Write(',');
                }
                else
                {
                    first = false;
                }

                serialize.Serialize(writer, row.ToDictionary());
                count++;
            }

            return(count);
        }
Beispiel #10
0
        /// <summary>
        ///     Gets the descriptor for this data source.
        /// </summary>
        /// <param name="alias">
        ///     The alias of the data source.
        /// </param>
        /// <param name="context">
        ///     The execution context.
        /// </param>
        /// <param name="reader">
        ///     The reader.
        /// </param>
        /// <returns>
        ///     The <see cref="System.Threading.Tasks.Task" />.
        /// </returns>
        public Task <IDataSourceDescriptor> GetDataSourceDescriptorAsync(string alias, IFileFormatExecutionContext context, StreamReader reader)
        {
            using (var package = new ExcelPackage(reader.BaseStream))
            {
                var sheet = package.Workbook.Worksheets.FirstOrDefault();

                if (sheet == null)
                {
                    return(Task.FromResult(Descriptor.DynamicDataSource(alias)));
                }

                var headers = Enumerable.Range(1, sheet.Dimension.End.Column).Select(col => sheet.Cells[1, col].Value?.ToString()).TakeWhile(header => header != null).ToArray();

                return(Task.FromResult(Descriptor.ForDataSource(alias, headers.Select(h => Descriptor.ForColumn(h, typeof(object))))));
            }
        }
Beispiel #11
0
 /// <summary>
 ///     Checks if the file reader can read this file.
 /// </summary>
 /// <param name="context">
 ///     The context.
 /// </param>
 /// <param name="fileName">
 ///     The file name.
 /// </param>
 /// <param name="firstBytes">
 ///     The first bytes of the file.
 /// </param>
 /// <returns>
 ///     <c>true</c> if this reader can read the file, <c>false</c> otherwise.
 /// </returns>
 public bool CanReadThisFile(IFileFormatExecutionContext context, string fileName, byte[] firstBytes)
 {
     return(fileName.EndsWith(".csv", StringComparison.OrdinalIgnoreCase));
 }
Beispiel #12
0
 /// <summary>
 ///     Writes the footer to the file.
 /// </summary>
 /// <param name="context">
 ///     The context.
 /// </param>
 /// <param name="writer">
 ///     The reader.
 /// </param>
 public void WriteFooter(IFileFormatExecutionContext context, StreamWriter writer)
 {
 }
Beispiel #13
0
        /// <summary>
        ///     Gets the descriptor for this data source.
        /// </summary>
        /// <param name="alias">
        ///     The alias.
        /// </param>
        /// <param name="context">
        ///     The execution context.
        /// </param>
        /// <param name="reader">
        ///     The reader.
        /// </param>
        /// <returns>
        ///     The <see cref="System.Threading.Tasks.Task" />.
        /// </returns>
        public Task <IDataSourceDescriptor> GetDataSourceDescriptorAsync(string alias, IFileFormatExecutionContext context, StreamReader reader)
        {
            var separator = context.GetDefault("SEPARATOR", false) as string ?? ",";

            return(Task.FromResult(Descriptor.ForDataSource(alias, GetHeaders(GetSplitter(separator), reader, separator).Where(header => header.Length > 0).Select(column => Descriptor.ForColumn(column, typeof(string))))));
        }
Beispiel #14
0
 /// <summary>
 /// Checks if the file reader can read this file.
 /// </summary>
 /// <param name="context">
 /// The context.
 /// </param>
 /// <param name="fileName">
 /// The file name.
 /// </param>
 /// <param name="firstBytes">
 /// The first bytes of the file.
 /// </param>
 /// <returns>
 /// <c>true</c> if this reader can read the file, <c>false</c> otherwise.
 /// </returns>
 public bool CanReadThisFile(IFileFormatExecutionContext context, string fileName, byte[] firstBytes)
 {
     return(fileName.EndsWith(".json", StringComparison.OrdinalIgnoreCase) || firstBytes.Length > 0 && (firstBytes[0] == '{' || firstBytes[0] == '['));
 }
Beispiel #15
0
        /// <summary>
        /// Writes the header to the file.
        /// </summary>
        /// <param name="context">
        /// The context.
        /// </param>
        /// <param name="writer">
        ///     The stream.
        /// </param>
        /// <param name="fields">
        ///     The fields.
        /// </param>
        public void WriteHeader(IFileFormatExecutionContext context, StreamWriter writer, IEnumerable <string> fields)
        {
            writer.Write("[");

            this.newStreams.Add(writer);
        }
Beispiel #16
0
 /// <summary>
 ///     Writes the header to the file.
 /// </summary>
 /// <param name="context">
 ///     The context.
 /// </param>
 /// <param name="writer">
 ///     The stream.
 /// </param>
 /// <param name="fields">
 ///     The fields.
 /// </param>
 public void WriteHeader(IFileFormatExecutionContext context, StreamWriter writer, IEnumerable <string> fields)
 {
     writer.WriteLine(string.Join(",", fields.Select(c => $"\"{c}\"")));
 }
Beispiel #17
0
 /// <summary>
 ///     Checks if the file reader can read this file.
 /// </summary>
 /// <param name="context">
 ///     The context.
 /// </param>
 /// <param name="fileName">
 ///     The file name.
 /// </param>
 /// <param name="firstBytes">
 ///     The first bytes of the file.
 /// </param>
 /// <returns>
 ///     <c>true</c> if this reader can read the file, <c>false</c> otherwise.
 /// </returns>
 public bool CanReadThisFile(IFileFormatExecutionContext context, string fileName, byte[] firstBytes)
 {
     return(fileName.EndsWith(".xlsx"));
 }
Beispiel #18
0
        /// <summary>
        /// Gets the descriptor for this data source.
        /// </summary>
        /// <param name="alias">
        /// The alias.
        /// </param>
        /// <param name="context">
        ///     The execution context.
        /// </param>
        /// <param name="reader">
        ///     The reader.
        /// </param>
        /// <returns>
        /// The <see cref="Task"/>.
        /// </returns>
        public Task <IDataSourceDescriptor> GetDataSourceDescriptorAsync(string alias, IFileFormatExecutionContext context, StreamReader reader)
        {
            var maxRowsToScan = context.MaxRowsToScan;

            using (var jsonReader = new JsonTextReader(reader))
            {
                var fields     = new HashSet <string>();
                var serializer = new JsonSerializer();
                var types      = new Dictionary <string, Type>();
                var lines      = 0;

                jsonReader.SupportMultipleContent = true;
                jsonReader.Read();

                if (jsonReader.TokenType == JsonToken.StartArray)
                {
                    while (jsonReader.Read() && jsonReader.TokenType == JsonToken.StartObject)
                    {
                        if (++lines > maxRowsToScan)
                        {
                            break;
                        }

                        foreach (var kv in serializer.Deserialize <Dictionary <string, object> >(jsonReader))
                        {
                            var fieldType = kv.Value?.GetType() ?? typeof(object);
                            if (fields.Add(kv.Key))
                            {
                                types[kv.Key] = fieldType;
                            }
                            else
                            {
                                var type = types[kv.Key];
                                if (type != fieldType && type != typeof(object))
                                {
                                    types[kv.Key] = typeof(object);
                                }
                            }
                        }
                    }
                }
                else
                {
                    do
                    {
                        if (lines++ > maxRowsToScan)
                        {
                            break;
                        }

                        foreach (var kv in serializer.Deserialize <Dictionary <string, object> >(jsonReader))
                        {
                            var fieldType = kv.Value?.GetType() ?? typeof(object);
                            if (fields.Add(kv.Key))
                            {
                                types[kv.Key] = fieldType;
                            }
                            else
                            {
                                var type = types[kv.Key];
                                if (type != fieldType && type != typeof(object))
                                {
                                    types[kv.Key] = typeof(object);
                                }
                            }
                        }
                    }while (jsonReader.Read() && jsonReader.TokenType == JsonToken.StartObject);
                }

                return(Task.FromResult(Descriptor.ForDataSource(alias, fields.Select(f => Descriptor.ForColumn(f, types[f])))));
            }
        }