Beispiel #1
0
        private static void Compare(string oldFilePath, string newFilePath, string outputFilePath, string columnIdentifier)
        {
            String8Block      block     = new String8Block();
            HashSet <String8> oldValues = new HashSet <String8>();
            HashSet <String8> newValues = new HashSet <String8>();

            using (ITabularReader oldReader = TabularFactory.BuildReader(oldFilePath))
            {
                int leftColumnIndex = oldReader.ColumnIndex(columnIdentifier);
                while (oldReader.NextRow())
                {
                    oldValues.Add(block.GetCopy(oldReader.Current(leftColumnIndex)));
                }

                Trace.WriteLine(String.Format("Old: {0:n0} values for \"{1}\" in {2:n0} rows.", oldValues.Count, columnIdentifier, oldReader.RowCountRead));
            }

            using (ITabularReader newReader = TabularFactory.BuildReader(newFilePath))
            {
                int rightColumnIndex = newReader.ColumnIndex(columnIdentifier);
                while (newReader.NextRow())
                {
                    newValues.Add(block.GetCopy(newReader.Current(rightColumnIndex)));
                }

                Trace.WriteLine(String.Format("New: {0:n0} values for \"{1}\" in {2:n0} rows.", newValues.Count, columnIdentifier, newReader.RowCountRead));
            }

            HashSet <String8> oldOnly = new HashSet <String8>(oldValues);

            oldOnly.ExceptWith(newValues);

            HashSet <String8> newOnly = new HashSet <String8>(newValues);

            newOnly.ExceptWith(oldValues);

            Trace.WriteLine(String.Format("{0:n0} values were only in \"{1}\".\r\n{2:n0} values were only in \"{3}\".", oldOnly.Count, oldFilePath, newOnly.Count, newFilePath));

            String8 leftMarker  = String8.Convert("-", new byte[1]);
            String8 rightMarker = String8.Convert("+", new byte[1]);

            using (ITabularWriter writer = TabularFactory.BuildWriter(outputFilePath))
            {
                writer.SetColumns(new string[] { "In", columnIdentifier });

                foreach (String8 value in oldOnly)
                {
                    writer.Write(leftMarker);
                    writer.Write(value);
                    writer.NextRow();
                }

                foreach (String8 value in newOnly)
                {
                    writer.Write(rightMarker);
                    writer.Write(value);
                    writer.NextRow();
                }
            }
        }
Beispiel #2
0
        public void Writer_WriteValidUsingAllOverloads(Stream stream, Func <Stream, ITabularWriter> buildWriter)
        {
            String8Set names = String8Set.Split(String8.Convert("Jeff,Bill,Todd,\\Barry\\", new byte[30]), UTF8.Comma, new int[5]);

            using (ITabularWriter w = buildWriter(stream))
            {
                Assert.AreEqual(0, w.RowCountWritten);
                w.SetColumns(new string[] { "ID", "IsEven", "Backslash", "Today", "Name", "Description" });
                Assert.AreEqual(0, w.RowCountWritten);

                for (int i = 0; i < 10; ++i)
                {
                    w.Write(i);
                    w.Write(i % 2 == 0);
                    w.Write(UTF8.Backslash);
                    w.Write(new DateTime(2017, 05, 03, 0, 0, 0, DateTimeKind.Utc));
                    w.Write(names[i % names.Count]);

                    w.WriteValueStart();
                    w.WriteValuePart(i + 1);
                    w.WriteValuePart(i % 2 == 1);
                    w.WriteValuePart(UTF8.Quote);
                    w.WriteValuePart(new DateTime(2017, 05, 01, 0, 0, 0, DateTimeKind.Utc));
                    w.WriteValuePart(names[i % names.Count]);
                    w.WriteValueEnd();

                    Assert.AreEqual(i, w.RowCountWritten);
                    w.NextRow();
                    Assert.AreEqual(i + 1, w.RowCountWritten);

                    Assert.AreEqual(stream.Position, w.BytesWritten);
                }
            }
        }
Beispiel #3
0
        public void Reader_Roundtrip(Func <string, bool, ITabularReader> buildReader, Func <Stream, ITabularWriter> buildWriter)
        {
            string filePath = "ValidSample.xsv";

            // Write a valid file with some values which require CSV escaping
            WriteValidSample(new FileStream(filePath, FileMode.Create, FileAccess.ReadWrite), buildWriter);

            // Direct Copy the file from the reader to the writer - every value unescaped and then escaped
            using (ITabularReader reader = buildReader(filePath, true))
            {
                using (ITabularWriter writer = buildWriter(new FileStream(filePath + ".new", FileMode.Create, FileAccess.ReadWrite)))
                {
                    writer.SetColumns(reader.Columns);

                    while (reader.NextRow())
                    {
                        for (int i = 0; i < reader.CurrentRowColumns; ++i)
                        {
                            writer.Write(reader.Current(i).ToString8());
                        }

                        writer.NextRow();
                    }
                }
            }

            // Verify files are identical
            string fileBefore = File.ReadAllText(filePath);
            string fileAfter  = File.ReadAllText(filePath + ".new");

            Assert.AreEqual(fileBefore, fileAfter);
        }
Beispiel #4
0
        public void WriteTo(ITabularWriter writer, String8Block block)
        {
            if (writer.RowCountWritten == 0)
            {
                writer.SetColumns(new string[] {
                    "UserID",
                    "UserGuid",
                    "EmailAddress",
                    "Region",
                    "Browser",
                    "OS",
                    "IsPremiumUser",
                    "JoinDate"
                });
            }

            block.Clear();

            writer.Write(this.ID);
            writer.Write(block.GetCopy(this.Guid.ToString()));
            writer.Write(block.GetCopy(this.EmailAddress));
            writer.Write(block.GetCopy(this.Region));
            writer.Write(block.GetCopy(this.Browser));
            writer.Write(block.GetCopy(this.OS));
            writer.Write(this.IsPremiumUser);
            writer.Write(this.JoinDate);

            writer.NextRow();
        }
Beispiel #5
0
        private static void Copy(string inputFilePath, string outputFilePath, int rowLimit = -1)
        {
            using (ITabularReader reader = TabularFactory.BuildReader(inputFilePath))
            {
                using (ITabularWriter writer = TabularFactory.BuildWriter(outputFilePath))
                {
                    writer.SetColumns(reader.Columns);

                    while (reader.NextRow())
                    {
                        for (int i = 0; i < reader.CurrentRowColumns; ++i)
                        {
                            writer.Write(reader.Current(i).ToString8());
                        }

                        writer.NextRow();
                        if (writer.RowCountWritten == rowLimit)
                        {
                            break;
                        }
                    }

                    WriteSizeSummary(reader, writer);
                }
            }
        }
Beispiel #6
0
        public void Initialize()
        {
            String8Block block = new String8Block();

            String8[] names = new String8[] { block.GetCopy("Scott"), block.GetCopy("Mike"), block.GetCopy("Jeff"), block.GetCopy("Sophie") };

            using (ITabularWriter sample = TabularFactory.BuildWriter(s_sampleFilePath))
            {
                sample.SetColumns(new string[] { "ID", "IsEven", "Count", "WhenAdded", "Name" });

                int sum = 0;
                for (int i = 0; i < 1000; ++i)
                {
                    sum += i;

                    sample.Write(i);
                    sample.Write((i & 0x1) == 0);
                    sample.Write(sum);
                    sample.Write(new DateTime(2017, 05, 23).AddMinutes(i));
                    sample.Write(names[i % names.Length]);

                    sample.NextRow();
                }
            }
        }
Beispiel #7
0
        private static void NotStartsWith(string inputFilePath, string outputFilePath, string valueColumnIdentifier, string nameColumnIdentifier)
        {
            using (ITabularReader reader = TabularFactory.BuildReader(inputFilePath))
            {
                int valueColumnIndex = reader.ColumnIndex(valueColumnIdentifier);
                int nameColumnIndex  = reader.ColumnIndex(nameColumnIdentifier);

                using (ITabularWriter writer = TabularFactory.BuildWriter(outputFilePath))
                {
                    writer.SetColumns(reader.Columns);

                    while (reader.NextRow())
                    {
                        String8 name  = reader.Current(nameColumnIndex).ToString8();
                        String8 value = reader.Current(valueColumnIndex).ToString8();

                        if (!value.StartsWith(name))
                        {
                            for (int i = 0; i < reader.CurrentRowColumns; ++i)
                            {
                                writer.Write(reader.Current(i).ToString8());
                            }

                            writer.NextRow();
                        }
                    }

                    WriteSizeSummary(reader, writer);
                }
            }
        }
Beispiel #8
0
        private static void HtmlInnerText(string inputFilePath, string outputFilePath, string columnsDelimited)
        {
            using (ITabularReader reader = TabularFactory.BuildReader(inputFilePath))
            {
                List <int> columnIndicesToEscape = columnsDelimited.Split(',').Select((col) => reader.ColumnIndex(col.Trim())).ToList();

                using (ITabularWriter writer = TabularFactory.BuildWriter(outputFilePath))
                {
                    writer.SetColumns(reader.Columns);

                    while (reader.NextRow())
                    {
                        for (int i = 0; i < reader.CurrentRowColumns; ++i)
                        {
                            if (columnIndicesToEscape.Contains(i))
                            {
                                WriteHtmlEscaped(reader.Current(i).ToString8(), writer);
                            }
                            else
                            {
                                writer.Write(reader.Current(i).ToString8());
                            }
                        }

                        writer.NextRow();
                    }

                    WriteSizeSummary(reader, writer);
                }
            }
        }
Beispiel #9
0
        private void Suggest(IHttpRequest request, IHttpResponse response)
        {
            using (ITabularWriter writer = WriterForFormat("json", response))
            {
                try
                {
                    string query = Require(request, "q");

                    DateTime      asOfDate = ParseOrDefault(request.QueryString["asof"], _xDatabaseContext.RequestedAsOfDateTime);
                    SuggestResult result   = _suggester.Suggest(query, asOfDate);

                    // If the query is valid and there are no extra values valid next, just return valid
                    if (result.IsValid == true && result.Context == null)
                    {
                        writer.SetColumns(new string[] { "Valid" });
                        writer.Write(true);
                        writer.NextRow();
                    }
                    else
                    {
                        WriteException(result, writer);
                    }
                }
                catch (Exception ex)
                {
                    WriteException(ex, writer, false);
                }
            }
        }
Beispiel #10
0
        private static void Copy(string inputFilePath, string outputFilePath, string columnsDelimited)
        {
            List <string> columns = new List <string>();

            foreach (string columnName in columnsDelimited.Split(','))
            {
                columns.Add(columnName.Trim());
            }

            using (ITabularReader reader = TabularFactory.BuildReader(inputFilePath))
            {
                int[] columnIndices = new int[columns.Count];
                for (int i = 0; i < columnIndices.Length; ++i)
                {
                    columnIndices[i] = reader.ColumnIndex(columns[i]);
                }

                using (ITabularWriter writer = TabularFactory.BuildWriter(outputFilePath))
                {
                    writer.SetColumns(columns);

                    while (reader.NextRow())
                    {
                        for (int i = 0; i < columnIndices.Length; ++i)
                        {
                            writer.Write(reader.Current(columnIndices[i]).ToString8());
                        }

                        writer.NextRow();
                    }

                    WriteSizeSummary(reader, writer);
                }
            }
        }
Beispiel #11
0
        private static void Distinct(string inputFilePath, string outputFilePath, string columnIdentifier)
        {
            String8Block      block          = new String8Block();
            HashSet <String8> distinctValues = new HashSet <String8>();

            using (ITabularReader reader = TabularFactory.BuildReader(inputFilePath))
            {
                int columnIndex = reader.ColumnIndex(columnIdentifier);

                using (ITabularWriter writer = TabularFactory.BuildWriter(outputFilePath))
                {
                    writer.SetColumns(new string[] { reader.Columns[columnIndex] });

                    while (reader.NextRow())
                    {
                        String8 value = reader.Current(columnIndex).ToString8();

                        if (!distinctValues.Contains(value))
                        {
                            distinctValues.Add(block.GetCopy(value));
                            writer.Write(value);
                            writer.NextRow();
                        }
                    }

                    WriteSizeSummary(reader, writer);
                }
            }
        }
Beispiel #12
0
        private static void HtmlInnerText(string inputFilePath, string outputFilePath, string columnIdentifier)
        {
            using (ITabularReader reader = TabularFactory.BuildReader(inputFilePath))
            {
                int columnIndexToEscape = reader.ColumnIndex(columnIdentifier);

                using (ITabularWriter writer = TabularFactory.BuildWriter(outputFilePath))
                {
                    writer.SetColumns(reader.Columns);

                    while (reader.NextRow())
                    {
                        for (int i = 0; i < reader.CurrentRowColumns; ++i)
                        {
                            if (i == columnIndexToEscape)
                            {
                                WriteHtmlEscaped(reader.Current(i).ToString8(), writer);
                            }
                            else
                            {
                                writer.Write(reader.Current(i).ToString8());
                            }
                        }

                        writer.NextRow();
                    }

                    WriteSizeSummary(reader, writer);
                }
            }
        }
Beispiel #13
0
        private void WriteException(ErrorContext context, bool isValid, ITabularWriter writer)
        {
            String8Block block = new String8Block();

            writer.SetColumns(new string[] { "Valid", "Usage", "ItemCategory", "ErrorMessage", "Values", "InvalidToken", "InvalidTokenIndex" });
            writer.Write(isValid);
            writer.Write(block.GetCopy(context.Usage));
            writer.Write(block.GetCopy(context.InvalidValueCategory));
            writer.Write(block.GetCopy(context.ErrorMessage));

            String8 values = String8.Empty;

            if (context.ValidValues != null)
            {
                foreach (string value in context.ValidValues)
                {
                    values = block.Concatenate(values, s_delimiter, block.GetCopy(value));
                }
            }
            writer.Write(values);

            writer.Write(block.GetCopy(context.InvalidValue ?? ""));
            writer.Write(context.InvalidTokenIndex);

            writer.NextRow();
        }
Beispiel #14
0
        private static void RowId(string inputFilePath, string outputFilePath, int firstId = 1)
        {
            int currentId = firstId;

            using (ITabularReader reader = TabularFactory.BuildReader(inputFilePath))
            {
                using (ITabularWriter writer = TabularFactory.BuildWriter(outputFilePath))
                {
                    List <string> columns = new List <string>();
                    columns.Add("ID");
                    columns.AddRange(reader.Columns);

                    writer.SetColumns(columns);

                    while (reader.NextRow())
                    {
                        writer.Write(currentId);
                        currentId++;

                        for (int i = 0; i < reader.CurrentRowColumns; ++i)
                        {
                            writer.Write(reader.Current(i).ToString8());
                        }

                        writer.NextRow();
                    }

                    WriteSizeSummary(reader, writer);
                }
            }
        }
Beispiel #15
0
        public static void Write(IStreamProvider streamProvider, string tableRootPath, TableMetadata metadata)
        {
            String8Block block = new String8Block();

            using (ITabularWriter sw = TabularFactory.BuildWriter(streamProvider.OpenWrite(Path.Combine(tableRootPath, SchemaFileName)), SchemaFileName))
            {
                sw.SetColumns(new string[] { "Name", "Type" });

                foreach (ColumnDetails column in metadata.Schema)
                {
                    sw.Write(block.GetCopy(column.Name));
                    sw.Write(block.GetCopy(column.Type.Name.ToString()));
                    sw.NextRow();
                }
            }

            using (ITabularWriter mw = TabularFactory.BuildWriter(streamProvider.OpenWrite(Path.Combine(tableRootPath, MetadataFileName)), MetadataFileName))
            {
                mw.SetColumns(new string[] { "Name", "Context", "Value" });

                mw.Write(block.GetCopy("RowCount"));
                mw.Write(String8.Empty);
                mw.Write(metadata.RowCount);
                mw.NextRow();
            }

            streamProvider.WriteAllText(Path.Combine(tableRootPath, ConfigQueryPath), metadata.Query);

            s_Cache.Add($"{streamProvider}|{tableRootPath}", metadata);
        }
Beispiel #16
0
        public void Reader_NewlineVariations(Func <Stream, ITabularWriter> buildWriter, Func <string, bool, ITabularReader> buildReader)
        {
            string xsvPath = "NewlineVariations.xsv";
            Stream stream  = new FileStream(xsvPath, FileMode.Create, FileAccess.ReadWrite);

            using (ITabularWriter w = buildWriter(stream))
            {
                w.SetColumns(new string[] { "One", "Two", "Three" });

                for (int row = 0; row < 3; ++row)
                {
                    w.Write(3 * row + 1);
                    w.Write(3 * row + 2);
                    w.Write(3 * row + 3);

                    // Write the end of row but then override it
                    long position = stream.Position;
                    w.NextRow();

                    if (row == 0)
                    {
                        // Row 0 - newline only
                        stream.Seek(position, SeekOrigin.Begin);
                        stream.WriteByte(UTF8.Newline);
                    }
                    else if (row == 2)
                    {
                        // Row 2 - no end of line
                        stream.SetLength(position);
                    }
                }
            }

            using (ITabularReader r = buildReader(xsvPath, true))
            {
                // Verify column heading not clipped even though no '\r'
                Assert.AreEqual("Three", r.Columns[2]);

                Assert.IsTrue(r.NextRow());
                Assert.AreEqual(3, r.CurrentRowColumns);

                // Verify last column doesn't have extra '\r' when terminated with '\r\n'
                Assert.AreEqual("3", r.Current(2).ToString());

                Assert.IsTrue(r.NextRow());
                Assert.AreEqual(3, r.CurrentRowColumns);

                // Verify last column not clipped when terminated with '\n'
                Assert.AreEqual("6", r.Current(2).ToString());

                Assert.IsTrue(r.NextRow());
                Assert.AreEqual(3, r.CurrentRowColumns);

                // Verify last column not clipped when unterminated [EOF]
                Assert.AreEqual("9", r.Current(2).ToString());

                Assert.IsFalse(r.NextRow(), "Reader didn't stop after last line without newline");
            }
        }
Beispiel #17
0
        private static void WriteCombinedRow(ITabularWriter writer, String8[] values)
        {
            for (int i = 0; i < values.Length; ++i)
            {
                writer.Write(values[i]);
            }

            writer.NextRow();
        }
Beispiel #18
0
        public void Writer_Performance(Func <Stream, ITabularWriter> buildWriter)
        {
            String8Block block = new String8Block();
            String8      d1    = block.GetCopy("Description 1");
            String8      d2    = block.GetCopy("Description 2");
            String8      s1    = block.GetCopy("Source: Internal");
            String8      s2    = block.GetCopy("Source: External");

            using (MemoryStream s = new MemoryStream())
            {
                int  iterations   = 50;
                long bytesWritten = 0;
                int  rowsWritten  = 0;

                // Tsv Write goal: 100MB/sec [Surface Book]
                // NOTE: Tsv Write performance is very sensitive the mix of text and numbers written. Writing integers is slower.
                Verify.PerformanceByBytes(50 * LongExtensions.Megabyte, () =>
                {
                    for (int iteration = 0; iteration < iterations; ++iteration)
                    {
                        s.Seek(0, SeekOrigin.Begin);

                        ITabularWriter writer = buildWriter(s);
                        writer.SetColumns(new string[] { "LineNumber", "Count", "Description", "Source" });

                        int sum = 0;
                        for (int row = 1; row < 10000; ++row)
                        {
                            sum += row;

                            writer.Write(row);
                            writer.Write(sum);

                            if (row % 2 == 0)
                            {
                                writer.Write(d1);
                                writer.Write(s1);
                            }
                            else
                            {
                                writer.Write(d2);
                                writer.Write(s2);
                            }

                            writer.NextRow();
                        }

                        bytesWritten += writer.BytesWritten;
                        rowsWritten  += writer.RowCountWritten;
                    }

                    return(bytesWritten);
                });
            }
        }
Beispiel #19
0
        public void Writer_CheckValidation(Func <Stream, ITabularWriter> buildWriter)
        {
            using (MemoryStream s = new MemoryStream())
            {
                using (ITabularWriter w = buildWriter(s))
                {
                    // Write before SetColumns
                    Verify.Exception <InvalidOperationException>(() => w.Write(0));

                    w.SetColumns(new string[] { "ID", "IsEven" });

                    // SetColumns already called
                    Verify.Exception <InvalidOperationException>(() => w.SetColumns(new string[] { "Three", "Four" }));

                    w.Write(0);

                    // Not enough columns
                    Verify.Exception <InvalidOperationException>(() => w.NextRow());

                    w.Write(true);

                    // Too many columns
                    Verify.Exception <InvalidOperationException>(() => w.Write(String8.FromBoolean(false)));

                    w.NextRow();

                    // WriteValuePart without WriteValueStart
                    Verify.Exception <InvalidOperationException>(() => w.WriteValuePart(true));

                    // WriteValueEnd not in partial value
                    Verify.Exception <InvalidOperationException>(() => w.WriteValueEnd());

                    w.WriteValueStart();

                    // Write in partial value
                    Verify.Exception <InvalidOperationException>(() => w.Write(true));

                    w.WriteValueEnd();
                }
            }
        }
Beispiel #20
0
        private static void CopyRows(ITabularReader reader, ITabularWriter writer)
        {
            while (reader.NextRow())
            {
                for (int i = 0; i < reader.CurrentRowColumns; ++i)
                {
                    writer.Write(reader.Current(i).ToString8());
                }

                writer.NextRow();
            }
        }
Beispiel #21
0
        private static void WriteSampleFileWithIssues(Stream stream, Func <Stream, ITabularWriter> buildWriter)
        {
            Random  r      = new Random();
            string  huge   = new string('Z', 100000);
            String8 huge8  = String8.Convert(huge, new byte[String8.GetLength(huge)]);
            String8 abcdef = String8.Convert("ABCDEF", new byte[6]);

            using (ITabularWriter writer = buildWriter(stream))
            {
                writer.SetColumns(new string[] { "LineNumber", "Count", "Description" });

                for (int i = writer.RowCountWritten + 1; i <= 10000; ++i)
                {
                    if (i % 100 == 99)
                    {
                        // Write an empty row (1/100)
                        long rowStartPosition = stream.Position;

                        // Make the writer think everything is ok (it'll throw if you don't write enough values)
                        writer.Write(String8.Empty);
                        writer.Write(String8.Empty);

                        writer.WriteValueStart();
                        writer.WriteValueEnd();

                        // Wipe out what was written
                        stream.Seek(rowStartPosition, SeekOrigin.Begin);
                    }
                    else if (i == 5000)
                    {
                        // Write a huge row
                        writer.Write(i);

                        writer.WriteValueStart();
                        writer.WriteValuePart(r.Next(100000));
                        writer.WriteValueEnd();

                        writer.Write(huge8);
                    }
                    else
                    {
                        // Write a normal row
                        writer.Write(i);
                        writer.Write(r.Next(100000));
                        writer.Write(abcdef);
                    }

                    writer.NextRow();
                }
            }
        }
Beispiel #22
0
        private void CountWithinTimeout(string query, TimeSpan timeout, DateTime asOfDate, IHttpResponse response)
        {
            IXTable pipeline = null;

            try
            {
                XDatabaseContext context = _xDatabaseContext;

                // Build for another moment in time if requested
                if (asOfDate != _xDatabaseContext.RequestedAsOfDateTime)
                {
                    context = new XDatabaseContext(_xDatabaseContext)
                    {
                        RequestedAsOfDateTime = asOfDate
                    };
                }

                // Build a Pipeline for the Query
                pipeline = context.Query(query);

                // If there was no query, return an empty result
                if (pipeline == null)
                {
                    return;
                }

                // Try to get the count up to the timeout
                if (Debugger.IsAttached)
                {
                    timeout = TimeSpan.MaxValue;
                }
                RunResult result = pipeline.RunUntilTimeout(timeout);

                using (ITabularWriter writer = WriterForFormat("json", response))
                {
                    writer.SetColumns(new string[] { "Count", "IsComplete", "RuntimeMs" });
                    writer.Write(result.RowCount);
                    writer.Write(result.IsComplete);
                    writer.Write((int)result.Elapsed.TotalMilliseconds);
                    writer.NextRow();
                }
            }
            finally
            {
                if (pipeline != null)
                {
                    pipeline.Dispose();
                    pipeline = null;
                }
            }
        }
Beispiel #23
0
        public void Write(MessageType type, string sourceComponent, string message)
        {
            if (type == MessageType.AssertFailed)
            {
                this.Failed = true;
            }

            _writer.Write(DateTime.UtcNow);
            _writer.Write(_block.GetCopy(type.ToString()));
            _writer.Write(_block.GetCopy(sourceComponent));
            _writer.Write(_block.GetCopy(message));

            _writer.NextRow();
            _block.Clear();
        }
        public int Next(int desiredCount, CancellationToken cancellationToken)
        {
            // Build the writer only when we start getting rows
            if (_writer == null)
            {
                if (_outputFilePath == null)
                {
                    throw new InvalidOperationException("TabularFileWriter can't reset when passed an ITabularWriter instance");
                }
                if (_outputFilePath.Equals("cout", StringComparison.OrdinalIgnoreCase))
                {
                    _writer = new ConsoleTabularWriter();
                }
                else
                {
                    _writer = TabularFactory.BuildWriter(_streamProvider.OpenWrite(_outputFilePath), _outputFilePath);
                }

                _writer.SetColumns(_source.Columns.Select((col) => col.ColumnDetails.Name));
            }

            // Or smaller batch?
            int rowCount = _source.Next(desiredCount, cancellationToken);

            if (rowCount == 0)
            {
                return(0);
            }

            XArray[] arrays = new XArray[_stringColumnGetters.Length];
            for (int i = 0; i < _stringColumnGetters.Length; ++i)
            {
                arrays[i] = _stringColumnGetters[i]();
            }

            for (int rowIndex = 0; rowIndex < rowCount; ++rowIndex)
            {
                for (int colIndex = 0; colIndex < _stringColumnGetters.Length; ++colIndex)
                {
                    String8 value = ((String8[])arrays[colIndex].Array)[arrays[colIndex].Index(rowIndex)];
                    _writer.Write(value);
                }

                _writer.NextRow();
            }

            return(rowCount);
        }
Beispiel #25
0
        /// <summary>
        ///  Sanitize an input file into a given output file using this Sanitizer's configuration.
        /// </summary>
        /// <param name="inputFile">File Path to input file</param>
        /// <param name="outputFile">File Path to output file</param>
        public void Sanitize(string inputFile, string outputFile)
        {
            using (ITabularReader reader = TabularFactory.BuildReader(inputFile))
            {
                // Build an array of what we'll do with each input column, and the list of columns we'll actually write
                List <string>    columnsToOutput;
                IColumnHandler[] handlers = GetHandlersByColumnIndex(reader.Columns, out columnsToOutput);

                // Find the sample column index, if any, and calculate a hash cutoff for including rows
                int  sampleColumnIndex     = (String.IsNullOrEmpty(this.SampleColumnName) ? -1 : reader.ColumnIndex(this.SampleColumnName));
                uint sampleInclusionCutoff = (uint)(uint.MaxValue * this.SampleProbability);

                using (ITabularWriter writer = TabularFactory.BuildWriter(outputFile))
                {
                    writer.SetColumns(columnsToOutput);

                    while (reader.NextRow())
                    {
                        // If there's a sample column, decide whether to include this row
                        if (sampleColumnIndex > -1)
                        {
                            // Sample *without* the hashkey, so the same rows are consistently included or excluded.
                            uint sampleValueHash = Hashing.Hash(reader.Current(sampleColumnIndex).ToString8(), 0);
                            if (sampleValueHash > sampleInclusionCutoff)
                            {
                                continue;
                            }
                        }

                        // Run the handler for every input column, writing the output if there is one
                        for (int i = 0; i < reader.CurrentRowColumns; ++i)
                        {
                            IColumnHandler handler = handlers[i];
                            if (handler != null)
                            {
                                String8 value       = reader.Current(i).ToString8();
                                String8 replacement = handler.Sanitize(value);
                                writer.Write(replacement);
                            }
                        }

                        writer.NextRow();
                    }
                }
            }
        }
        private static void WriteSampleTsv(Stream stream, int seed, int rowCount)
        {
            DateTime start = new DateTime(2018, 01, 01, 0, 0, 0, DateTimeKind.Utc);

            Random         r      = new Random(seed);
            ITabularWriter writer = TabularFactory.BuildWriter(stream, "Unused.tsv");
            {
                writer.SetColumns(new string[] { "Zip", "LastScan", "IsArchived" });
                for (int i = 0; i < rowCount; ++i)
                {
                    writer.Write(r.Next(10000, 99999));
                    writer.Write(start.AddDays(-180.0 * r.NextDouble()));
                    writer.Write(r.Next(100) < 50);
                    writer.NextRow();
                }
            }
        }
Beispiel #27
0
        private static void EchoRow(ITabularReader reader, ITabularWriter writer)
        {
            if (writer != null)
            {
                if (writer.RowCountWritten == 0)
                {
                    writer.SetColumns(reader.Columns);
                }

                for (int i = 0; i < reader.CurrentRowColumns; ++i)
                {
                    writer.Write(reader.Current(i).ToString8());
                }

                writer.NextRow();
            }
        }
Beispiel #28
0
        private void WriteException(Exception ex, ITabularWriter writer, bool isValid = false)
        {
            String8Block block = new String8Block();

            if (ex is UsageException)
            {
                UsageException ue = ex as UsageException;
                WriteException(ue.Context, isValid, writer);
            }
            else
            {
                writer.SetColumns(new string[] { "Valid", "Message", "Stack" });
                writer.Write(false);
                writer.Write(block.GetCopy(ex.Message));
                writer.Write(block.GetCopy(ex.StackTrace));
                writer.NextRow();
            }
        }
Beispiel #29
0
        private static void WriteValidSample(Stream stream, Func <Stream, ITabularWriter> buildWriter)
        {
            String8Block block           = new String8Block();
            String8      simple          = block.GetCopy("Simple");
            String8      commasAndQuotes = block.GetCopy("Value, but with \"quotes\" and commas");

            using (ITabularWriter writer = buildWriter(stream))
            {
                writer.SetColumns(new string[] { "LineNumber", "Count", "Description" });

                for (int i = 2; i < 10; ++i)
                {
                    writer.Write(i);
                    writer.Write(simple);
                    writer.Write(commasAndQuotes);

                    writer.NextRow();
                }
            }
        }
Beispiel #30
0
        private static void OnlyIn(string inputFilePath, string outputFilePath, string onlyInInputFilePath, string onlyInColumnIdentifier)
        {
            String8Block      block  = new String8Block();
            HashSet <String8> values = new HashSet <String8>();

            // Read values in 'onlyInInputFilePath'
            using (ITabularReader reader = TabularFactory.BuildReader(onlyInInputFilePath))
            {
                int leftColumnIndex = reader.ColumnIndex(onlyInColumnIdentifier);
                while (reader.NextRow())
                {
                    values.Add(block.GetCopy(reader.Current(leftColumnIndex)));
                }
            }

            // Copy from input to output where the column value is in the "only in" set
            using (ITabularReader reader = TabularFactory.BuildReader(inputFilePath))
            {
                int valueColumnIndex = reader.ColumnIndex(onlyInColumnIdentifier);

                using (ITabularWriter writer = TabularFactory.BuildWriter(outputFilePath))
                {
                    writer.SetColumns(reader.Columns);

                    while (reader.NextRow())
                    {
                        if (values.Contains(reader.Current(valueColumnIndex).ToString8()))
                        {
                            for (int i = 0; i < reader.CurrentRowColumns; ++i)
                            {
                                writer.Write(reader.Current(i).ToString8());
                            }

                            writer.NextRow();
                        }
                    }

                    WriteSizeSummary(reader, writer);
                }
            }
        }