Ejemplo n.º 1
0
        public void Reader_Roundtrip(Func <string, bool, ITabularReader> buildReader, Func <Stream, ITabularWriter> buildWriter)
        {
            string filePath = "ValidSample.xsv";

            // Write a valid file with some values which require CSV escaping
            WriteValidSample(new FileStream(filePath, FileMode.Create, FileAccess.ReadWrite), buildWriter);

            // Direct Copy the file from the reader to the writer - every value unescaped and then escaped
            using (ITabularReader reader = buildReader(filePath, true))
            {
                using (ITabularWriter writer = buildWriter(new FileStream(filePath + ".new", FileMode.Create, FileAccess.ReadWrite)))
                {
                    writer.SetColumns(reader.Columns);

                    while (reader.NextRow())
                    {
                        for (int i = 0; i < reader.CurrentRowColumns; ++i)
                        {
                            writer.Write(reader.Current(i).ToString8());
                        }

                        writer.NextRow();
                    }
                }
            }

            // Verify files are identical
            string fileBefore = File.ReadAllText(filePath);
            string fileAfter  = File.ReadAllText(filePath + ".new");

            Assert.AreEqual(fileBefore, fileAfter);
        }
Ejemplo n.º 2
0
        private static void HtmlInnerText(string inputFilePath, string outputFilePath, string columnsDelimited)
        {
            using (ITabularReader reader = TabularFactory.BuildReader(inputFilePath))
            {
                List <int> columnIndicesToEscape = columnsDelimited.Split(',').Select((col) => reader.ColumnIndex(col.Trim())).ToList();

                using (ITabularWriter writer = TabularFactory.BuildWriter(outputFilePath))
                {
                    writer.SetColumns(reader.Columns);

                    while (reader.NextRow())
                    {
                        for (int i = 0; i < reader.CurrentRowColumns; ++i)
                        {
                            if (columnIndicesToEscape.Contains(i))
                            {
                                WriteHtmlEscaped(reader.Current(i).ToString8(), writer);
                            }
                            else
                            {
                                writer.Write(reader.Current(i).ToString8());
                            }
                        }

                        writer.NextRow();
                    }

                    WriteSizeSummary(reader, writer);
                }
            }
        }
Ejemplo n.º 3
0
        private static void Compare(string oldFilePath, string newFilePath, string outputFilePath, string columnIdentifier)
        {
            String8Block      block     = new String8Block();
            HashSet <String8> oldValues = new HashSet <String8>();
            HashSet <String8> newValues = new HashSet <String8>();

            using (ITabularReader oldReader = TabularFactory.BuildReader(oldFilePath))
            {
                int leftColumnIndex = oldReader.ColumnIndex(columnIdentifier);
                while (oldReader.NextRow())
                {
                    oldValues.Add(block.GetCopy(oldReader.Current(leftColumnIndex)));
                }

                Trace.WriteLine(String.Format("Old: {0:n0} values for \"{1}\" in {2:n0} rows.", oldValues.Count, columnIdentifier, oldReader.RowCountRead));
            }

            using (ITabularReader newReader = TabularFactory.BuildReader(newFilePath))
            {
                int rightColumnIndex = newReader.ColumnIndex(columnIdentifier);
                while (newReader.NextRow())
                {
                    newValues.Add(block.GetCopy(newReader.Current(rightColumnIndex)));
                }

                Trace.WriteLine(String.Format("New: {0:n0} values for \"{1}\" in {2:n0} rows.", newValues.Count, columnIdentifier, newReader.RowCountRead));
            }

            HashSet <String8> oldOnly = new HashSet <String8>(oldValues);

            oldOnly.ExceptWith(newValues);

            HashSet <String8> newOnly = new HashSet <String8>(newValues);

            newOnly.ExceptWith(oldValues);

            Trace.WriteLine(String.Format("{0:n0} values were only in \"{1}\".\r\n{2:n0} values were only in \"{3}\".", oldOnly.Count, oldFilePath, newOnly.Count, newFilePath));

            String8 leftMarker  = String8.Convert("-", new byte[1]);
            String8 rightMarker = String8.Convert("+", new byte[1]);

            using (ITabularWriter writer = TabularFactory.BuildWriter(outputFilePath))
            {
                writer.SetColumns(new string[] { "In", columnIdentifier });

                foreach (String8 value in oldOnly)
                {
                    writer.Write(leftMarker);
                    writer.Write(value);
                    writer.NextRow();
                }

                foreach (String8 value in newOnly)
                {
                    writer.Write(rightMarker);
                    writer.Write(value);
                    writer.NextRow();
                }
            }
        }
Ejemplo n.º 4
0
 private static int WhereMatchCount(string inputPath, string columnIdentifier, string op, string value)
 {
     using (ITabularReader reader = TabularFactory.BuildReader(inputPath))
     {
         return(WhereMatcher.Where(reader, columnIdentifier, op, value, null).MatchCount);
     }
 }
Ejemplo n.º 5
0
        private static void Copy(string inputFilePath, string outputFilePath, string columnsDelimited)
        {
            List <string> columns = new List <string>();

            foreach (string columnName in columnsDelimited.Split(','))
            {
                columns.Add(columnName.Trim());
            }

            using (ITabularReader reader = TabularFactory.BuildReader(inputFilePath))
            {
                int[] columnIndices = new int[columns.Count];
                for (int i = 0; i < columnIndices.Length; ++i)
                {
                    columnIndices[i] = reader.ColumnIndex(columns[i]);
                }

                using (ITabularWriter writer = TabularFactory.BuildWriter(outputFilePath))
                {
                    writer.SetColumns(columns);

                    while (reader.NextRow())
                    {
                        for (int i = 0; i < columnIndices.Length; ++i)
                        {
                            writer.Write(reader.Current(columnIndices[i]).ToString8());
                        }

                        writer.NextRow();
                    }

                    WriteSizeSummary(reader, writer);
                }
            }
        }
Ejemplo n.º 6
0
        private static void NotStartsWith(string inputFilePath, string outputFilePath, string valueColumnIdentifier, string nameColumnIdentifier)
        {
            using (ITabularReader reader = TabularFactory.BuildReader(inputFilePath))
            {
                int valueColumnIndex = reader.ColumnIndex(valueColumnIdentifier);
                int nameColumnIndex  = reader.ColumnIndex(nameColumnIdentifier);

                using (ITabularWriter writer = TabularFactory.BuildWriter(outputFilePath))
                {
                    writer.SetColumns(reader.Columns);

                    while (reader.NextRow())
                    {
                        String8 name  = reader.Current(nameColumnIndex).ToString8();
                        String8 value = reader.Current(valueColumnIndex).ToString8();

                        if (!value.StartsWith(name))
                        {
                            for (int i = 0; i < reader.CurrentRowColumns; ++i)
                            {
                                writer.Write(reader.Current(i).ToString8());
                            }

                            writer.NextRow();
                        }
                    }

                    WriteSizeSummary(reader, writer);
                }
            }
        }
Ejemplo n.º 7
0
        private static void Copy(string inputFilePath, string outputFilePath, int rowLimit = -1)
        {
            using (ITabularReader reader = TabularFactory.BuildReader(inputFilePath))
            {
                using (ITabularWriter writer = TabularFactory.BuildWriter(outputFilePath))
                {
                    writer.SetColumns(reader.Columns);

                    while (reader.NextRow())
                    {
                        for (int i = 0; i < reader.CurrentRowColumns; ++i)
                        {
                            writer.Write(reader.Current(i).ToString8());
                        }

                        writer.NextRow();
                        if (writer.RowCountWritten == rowLimit)
                        {
                            break;
                        }
                    }

                    WriteSizeSummary(reader, writer);
                }
            }
        }
Ejemplo n.º 8
0
        private static void WriteSizeSummary(ITabularReader reader, ITabularWriter writer)
        {
            if (reader != null)
            {
                long bytesRead = reader.BytesRead;
                if (bytesRead <= 0)
                {
                    Trace.WriteLine(String.Format("Read: {0:n0} rows.", reader.RowCountRead));
                }
                else
                {
                    Trace.WriteLine(String.Format("Read: {0}, {1:n0} rows.", bytesRead.SizeString(), reader.RowCountRead));
                }
            }

            if (writer != null)
            {
                long bytesWritten = writer.BytesWritten;
                if (bytesWritten <= 0)
                {
                    Trace.WriteLine(String.Format("Wrote: {0:n0} rows.", writer.RowCountWritten));
                }
                else
                {
                    Trace.WriteLine(String.Format("Wrote: {0}, {1:n0} rows.", bytesWritten.SizeString(), writer.RowCountWritten));
                }
            }
        }
Ejemplo n.º 9
0
        private static void MatchBoolCompare(ITabularReader reader, ITabularWriter writer, WhereResult result)
        {
            bool value = (bool)result.Value;

            while (reader.NextRow())
            {
                // Ensure the row has enough columns
                if (reader.CurrentRowColumns <= result.ColumnIndex)
                {
                    continue;
                }

                // Ensure the value converts
                bool columnValue;
                if (!reader.Current(result.ColumnIndex).ToString8().TryToBoolean(out columnValue))
                {
                    continue;
                }

                int compareResult = columnValue.CompareTo(value);
                if (!result.Op.Matches(compareResult))
                {
                    continue;
                }

                result.MatchCount++;

                // If this is the matching row, write it
                EchoRow(reader, writer);
            }
        }
Ejemplo n.º 10
0
        private static void Distinct(string inputFilePath, string outputFilePath, string columnIdentifier)
        {
            String8Block      block          = new String8Block();
            HashSet <String8> distinctValues = new HashSet <String8>();

            using (ITabularReader reader = TabularFactory.BuildReader(inputFilePath))
            {
                int columnIndex = reader.ColumnIndex(columnIdentifier);

                using (ITabularWriter writer = TabularFactory.BuildWriter(outputFilePath))
                {
                    writer.SetColumns(new string[] { reader.Columns[columnIndex] });

                    while (reader.NextRow())
                    {
                        String8 value = reader.Current(columnIndex).ToString8();

                        if (!distinctValues.Contains(value))
                        {
                            distinctValues.Add(block.GetCopy(value));
                            writer.Write(value);
                            writer.NextRow();
                        }
                    }

                    WriteSizeSummary(reader, writer);
                }
            }
        }
Ejemplo n.º 11
0
        private static void HtmlInnerText(string inputFilePath, string outputFilePath, string columnIdentifier)
        {
            using (ITabularReader reader = TabularFactory.BuildReader(inputFilePath))
            {
                int columnIndexToEscape = reader.ColumnIndex(columnIdentifier);

                using (ITabularWriter writer = TabularFactory.BuildWriter(outputFilePath))
                {
                    writer.SetColumns(reader.Columns);

                    while (reader.NextRow())
                    {
                        for (int i = 0; i < reader.CurrentRowColumns; ++i)
                        {
                            if (i == columnIndexToEscape)
                            {
                                WriteHtmlEscaped(reader.Current(i).ToString8(), writer);
                            }
                            else
                            {
                                writer.Write(reader.Current(i).ToString8());
                            }
                        }

                        writer.NextRow();
                    }

                    WriteSizeSummary(reader, writer);
                }
            }
        }
Ejemplo n.º 12
0
        private static void RowId(string inputFilePath, string outputFilePath, int firstId = 1)
        {
            int currentId = firstId;

            using (ITabularReader reader = TabularFactory.BuildReader(inputFilePath))
            {
                using (ITabularWriter writer = TabularFactory.BuildWriter(outputFilePath))
                {
                    List <string> columns = new List <string>();
                    columns.Add("ID");
                    columns.AddRange(reader.Columns);

                    writer.SetColumns(columns);

                    while (reader.NextRow())
                    {
                        writer.Write(currentId);
                        currentId++;

                        for (int i = 0; i < reader.CurrentRowColumns; ++i)
                        {
                            writer.Write(reader.Current(i).ToString8());
                        }

                        writer.NextRow();
                    }

                    WriteSizeSummary(reader, writer);
                }
            }
        }
Ejemplo n.º 13
0
        private static void MatchContains(ITabularReader reader, ITabularWriter writer, WhereResult result)
        {
            string  valueString = (string)result.Value;
            String8 value       = String8.Convert(valueString, new byte[String8.GetLength(valueString)]);

            while (reader.NextRow())
            {
                // Ensure the row has enough columns
                if (reader.CurrentRowColumns <= result.ColumnIndex)
                {
                    continue;
                }

                // Match the value
                if (reader.Current(result.ColumnIndex).ToString8().IndexOf(value) == -1)
                {
                    continue;
                }

                result.MatchCount++;

                // If this is the matching row, write it
                EchoRow(reader, writer);
            }
        }
Ejemplo n.º 14
0
        private static void Append(string inputFileOrFolderPath, string outputFilePath, string inputFileNamePattern = null)
        {
            string[] inputFilePaths;

            if (Directory.Exists(inputFileOrFolderPath))
            {
                if (String.IsNullOrEmpty(inputFileNamePattern))
                {
                    inputFileNamePattern = "*.*";
                }
                inputFilePaths = Directory.GetFiles(inputFileOrFolderPath, inputFileNamePattern);
            }
            else
            {
                inputFilePaths = new string[] { inputFileOrFolderPath };
            }

            ITabularWriter writer        = null;
            string         writerColumns = null;

            try
            {
                foreach (string inputFilePath in inputFilePaths)
                {
                    using (ITabularReader reader = TabularFactory.BuildReader(inputFilePath))
                    {
                        // Build the writer, if this is the first file
                        if (writer == null)
                        {
                            writer        = TabularFactory.AppendWriter(outputFilePath, reader.Columns);
                            writerColumns = String.Join(", ", reader.Columns);
                        }

                        // Validate columns match
                        string sourceColumns = String.Join(", ", reader.Columns);
                        if (string.Compare(writerColumns, sourceColumns, true) != 0)
                        {
                            throw new InvalidOperationException(string.Format("Can't append to \"{0}\" because the column names don't match.\r\nExpect: {1}\r\nActual: {2}", outputFilePath, writerColumns, sourceColumns));
                        }

                        // Copy the rows
                        CopyRows(reader, writer);

                        // Write a summary for this input file
                        Trace.WriteLine($" {inputFilePath}, {reader.RowCountRead:n0} rows; {reader.BytesRead.SizeString()}");
                    }
                }

                // Write a summary for the output file
                WriteSizeSummary(null, writer);
            }
            finally
            {
                if (writer != null)
                {
                    writer.Dispose();
                    writer = null;
                }
            }
        }
Ejemplo n.º 15
0
        public void Reader_NewlineVariations(Func <Stream, ITabularWriter> buildWriter, Func <string, bool, ITabularReader> buildReader)
        {
            string xsvPath = "NewlineVariations.xsv";
            Stream stream  = new FileStream(xsvPath, FileMode.Create, FileAccess.ReadWrite);

            using (ITabularWriter w = buildWriter(stream))
            {
                w.SetColumns(new string[] { "One", "Two", "Three" });

                for (int row = 0; row < 3; ++row)
                {
                    w.Write(3 * row + 1);
                    w.Write(3 * row + 2);
                    w.Write(3 * row + 3);

                    // Write the end of row but then override it
                    long position = stream.Position;
                    w.NextRow();

                    if (row == 0)
                    {
                        // Row 0 - newline only
                        stream.Seek(position, SeekOrigin.Begin);
                        stream.WriteByte(UTF8.Newline);
                    }
                    else if (row == 2)
                    {
                        // Row 2 - no end of line
                        stream.SetLength(position);
                    }
                }
            }

            using (ITabularReader r = buildReader(xsvPath, true))
            {
                // Verify column heading not clipped even though no '\r'
                Assert.AreEqual("Three", r.Columns[2]);

                Assert.IsTrue(r.NextRow());
                Assert.AreEqual(3, r.CurrentRowColumns);

                // Verify last column doesn't have extra '\r' when terminated with '\r\n'
                Assert.AreEqual("3", r.Current(2).ToString());

                Assert.IsTrue(r.NextRow());
                Assert.AreEqual(3, r.CurrentRowColumns);

                // Verify last column not clipped when terminated with '\n'
                Assert.AreEqual("6", r.Current(2).ToString());

                Assert.IsTrue(r.NextRow());
                Assert.AreEqual(3, r.CurrentRowColumns);

                // Verify last column not clipped when unterminated [EOF]
                Assert.AreEqual("9", r.Current(2).ToString());

                Assert.IsFalse(r.NextRow(), "Reader didn't stop after last line without newline");
            }
        }
Ejemplo n.º 16
0
 /// <summary>
 ///  Return a cell for the current row or String.Empty if the row doesn't have
 ///  enough columns.
 /// </summary>
 /// <param name="reader">ITabularReader</param>
 /// <param name="index">Zero-based column index</param>
 /// <returns>ITabularValue for column</returns>
 public static ITabularValue CurrentOrEmpty(this ITabularReader reader, int index)
 {
     if (reader.CurrentRowColumns > index)
     {
         return(reader.Current(index));
     }
     return(String8TabularValue.Empty);
 }
Ejemplo n.º 17
0
 public void Dispose()
 {
     if (_reader != null)
     {
         _reader.Dispose();
         _reader = null;
     }
 }
Ejemplo n.º 18
0
        private static void Concatenate(string inputFilePath, string outputFilePath, String8 delimiter)
        {
            using (ITabularReader reader = TabularFactory.BuildReader(inputFilePath))
            {
                using (ITabularWriter writer = TabularFactory.BuildWriter(outputFilePath))
                {
                    writer.SetColumns(reader.Columns);

                    String8Block block          = new String8Block();
                    String8[]    lastValues     = new String8[reader.CurrentRowColumns];
                    String8[]    combinedValues = new String8[reader.CurrentRowColumns];

                    while (reader.NextRow())
                    {
                        String8 firstColumn = reader.Current(0).ToString8();

                        if (reader.RowCountRead == 2)
                        {
                            // First Row - Get the first ID only
                            combinedValues[0] = block.GetCopy(firstColumn);
                        }
                        else if (firstColumn.CompareTo(combinedValues[0], true) != 0)
                        {
                            // If we have a new ID (and not first row)

                            // Write concatenated values for previous ID
                            WriteCombinedRow(writer, combinedValues);

                            // Reset for this ID
                            block.Clear();
                            combinedValues[0] = block.GetCopy(firstColumn);

                            for (int i = 1; i < combinedValues.Length; ++i)
                            {
                                combinedValues[i] = String8.Empty;
                            }
                        }

                        // Concatenate non-duplicate values to "row in progress"
                        for (int i = 1; i < reader.CurrentRowColumns; ++i)
                        {
                            String8 value = reader.Current(i).ToString8();

                            if (lastValues[i] != value)
                            {
                                lastValues[i]     = value;
                                combinedValues[i] = block.Concatenate(combinedValues[i], delimiter, value);
                            }
                        }
                    }

                    // After last row, write out values so far
                    WriteCombinedRow(writer, combinedValues);
                    WriteSizeSummary(reader, writer);
                }
            }
        }
Ejemplo n.º 19
0
        private static IEnumerable <DataBlock> ReadAsDataBlockBatch(ITabularReader reader, IList <string> columnNames)
        {
            // Build a DataBlock to hold a batch of rows
            int       columnCount = columnNames.Count;
            DataBlock result      = new DataBlock(columnNames, BatchSize);

            Value[][] columnArrays = new Value[columnCount][];
            for (int i = 0; i < columnCount; ++i)
            {
                columnArrays[i] = new Value[BatchSize];
                for (int j = 0; j < BatchSize; ++j)
                {
                    columnArrays[i][j] = Value.Create(null);
                }

                result.SetColumn(i, columnArrays[i]);
            }

            // Look up indices of the columns
            int[] columnIndices = new int[columnCount];
            for (int i = 0; i < columnCount; ++i)
            {
                columnIndices[i] = reader.ColumnIndex(columnNames[i]);
            }

            // Fill blocks with rows as we go
            int          currentRowCount = 0;
            String8Block block           = new String8Block();

            while (reader.NextRow())
            {
                for (int i = 0; i < columnCount; ++i)
                {
                    String8 cell = block.GetCopy(reader.Current(columnIndices[i]).ToString8());
                    columnArrays[i][currentRowCount].Assign(new ByteBlock(cell.Array, cell.Index, cell.Length));
                    //columnArrays[i][currentRowCount].Assign(cell.ToString());
                }

                currentRowCount++;

                if (currentRowCount == BatchSize)
                {
                    yield return(result);

                    currentRowCount = 0;
                    block.Clear();
                }
            }

            if (currentRowCount > 0)
            {
                yield return(result);
            }
        }
Ejemplo n.º 20
0
        public void Reset()
        {
            _reader = TabularFactory.BuildReader(_streamProvider.OpenRead(_filePath), _filePath);

            _columns = new TabularColumn[_reader.Columns.Count];
            _cells   = new String8[_reader.Columns.Count][];
            for (int i = 0; i < _reader.Columns.Count; ++i)
            {
                _columns[i] = new TabularColumn(this, _reader.Columns[i]);
            }
        }
Ejemplo n.º 21
0
        /// <summary>
        ///  Look up the column index of a given column by name.
        ///  Will only work if the file had a header row.
        ///  Column names are case insensitive.
        ///  Will throw if the column name wasn't found.
        /// </summary>
        /// <param name="columnNameOrIndex">Column name for which to find column index, or already an integer index</param>
        /// <returns>Index of column in TSV. Throws if column isn't found or no header row was read.</returns>
        public static int ColumnIndex(this ITabularReader reader, string columnNameOrIndex)
        {
            int columnIndex;

            if (reader.TryGetColumnIndex(columnNameOrIndex, out columnIndex))
            {
                return(columnIndex);
            }

            throw new ColumnNotFoundException(String.Format("Column Name \"{0}\" not found in file.\nKnown Columns: \"{1}\"", columnNameOrIndex, String.Join(", ", reader.Columns)));
        }
Ejemplo n.º 22
0
        private static void CopyRows(ITabularReader reader, ITabularWriter writer)
        {
            while (reader.NextRow())
            {
                for (int i = 0; i < reader.CurrentRowColumns; ++i)
                {
                    writer.Write(reader.Current(i).ToString8());
                }

                writer.NextRow();
            }
        }
Ejemplo n.º 23
0
        public void Sanitize_EndToEnd()
        {
            Assembly xsvTest = Assembly.GetExecutingAssembly();

            Resource.SaveStreamTo("Xsv.Test.Sanitize.SanitizeSampleSource.csv", "SanitizeSampleSource.csv", xsvTest);
            Resource.SaveStreamTo("Xsv.Test.Sanitize.SanitizeSampleSource.sanispec", "SanitizeSampleSource.sanispec", xsvTest);

            // Verify UsageException if no key is passed
            Assert.AreEqual(-2, Program.Main(new string[] { "sanitize", @"SanitizeSampleSource.csv", "SanitizeOutput.csv", @"SanitizeSampleSource.sanispec" }));

            // Verify success for base sanitize
            File.Delete("SanitizeOutput.csv");
            Assert.AreEqual(0, Program.Main(new string[] { "sanitize", @"SanitizeSampleSource.csv", "SanitizeOutput.csv", @"SanitizeSampleSource.sanispec", "Key1" }));

            // Validate the result
            using (ITabularReader r = TabularFactory.BuildReader("SanitizeOutput.csv"))
            {
                Assert.IsTrue(r.Columns.Contains("ID"), "ID column is kept (no spec line)");
                Assert.IsTrue(r.Columns.Contains("Path"), "Path column is kept (mapped)");
                Assert.IsTrue(r.Columns.Contains("IsEmptyPath"), "IsEmptyPath is kept (Keep line)");
                Assert.IsFalse(r.Columns.Contains("IsUnderXsv"), "IxUnderXsv column is dropped (Drop line)");

                int idColumnIndex          = r.ColumnIndex("ID");
                int pathColumnIndex        = r.ColumnIndex("Path");
                int isEmptyPathColumnIndex = r.ColumnIndex("IsEmptyPath");

                while (r.NextRow())
                {
                    int    id   = r.Current(idColumnIndex).ToInteger();
                    string path = r.Current(pathColumnIndex).ToString();

                    Assert.AreEqual(r.Current(isEmptyPathColumnIndex).ToBoolean(), String.IsNullOrEmpty(path), "IsEmptyPath condition matches whether mapped path is empty");

                    if (id == 5)
                    {
                        Assert.AreEqual("Elfie", path, "'Elfie' is echoed (Echo in spec)");
                    }
                    else if (!String.IsNullOrEmpty(path))
                    {
                        Assert.IsTrue(path.StartsWith("WarmBeggedTruth\\"), "Verify path is mapped in parts, and 'Elfie' is consistently mapped.");
                    }
                }

                Assert.IsTrue(r.RowCountRead < 7, "Verify sample excluded at least one row.");
            }

            // Run with another key
            Assert.AreEqual(0, Program.Main(new string[] { "sanitize", @"SanitizeSampleSource.csv", "SanitizeOutput2.csv", @"SanitizeSampleSource.sanispec", "Key2" }));

            // Verify mappings are different
            Assert.AreNotEqual(File.ReadAllText("SanitizeOutput2.csv"), File.ReadAllText("SanitizeOutput.csv"));
        }
Ejemplo n.º 24
0
        public static ITabularWriter AppendWriter(string filePath, IEnumerable <string> columnNames)
        {
            ITabularWriter writer;

            // If the file doesn't exist, make a new writer
            if (!File.Exists(filePath))
            {
                writer = BuildWriter(filePath);
                writer.SetColumns(columnNames);
                return(writer);
            }

            // Verify columns match
            string expectedColumns = string.Join(", ", columnNames);

            using (ITabularReader r = TabularFactory.BuildReader(filePath))
            {
                string actualColumns = string.Join(", ", r.Columns);
                if (string.Compare(expectedColumns, actualColumns, true) != 0)
                {
                    throw new InvalidOperationException(string.Format("Can't append to \"{0}\" because the column names don't match.\r\nExpect: {1}\r\nActual: {2}", filePath, expectedColumns, actualColumns));
                }
            }

            // Build the writer
            FileStream s = new FileStream(filePath, FileMode.Append, FileAccess.Write, FileShare.Read);

            string extension = Path.GetExtension(filePath).ToLowerInvariant().TrimStart('.');

            switch (extension)
            {
            case "csv":
                writer = new CsvWriter(s, false);
                break;

            case "tsv":
                writer = new TsvWriter(s, false);
                break;

            default:
                s.Dispose();
                throw new NotSupportedException(String.Format("Xsv does not know how to append to \"{0}\". Known Extensions: [csv, tsv]", extension));
            }

            // Set the columns so the writer knows the count (writers shouldn't write the columns if writeHeaderRow was false)
            writer.SetColumns(columnNames);

            return(writer);
        }
Ejemplo n.º 25
0
        private static void EchoRow(ITabularReader reader, ITabularWriter writer)
        {
            if (writer != null)
            {
                if (writer.RowCountWritten == 0)
                {
                    writer.SetColumns(reader.Columns);
                }

                for (int i = 0; i < reader.CurrentRowColumns; ++i)
                {
                    writer.Write(reader.Current(i).ToString8());
                }

                writer.NextRow();
            }
        }
Ejemplo n.º 26
0
        /// <summary>
        ///  Sanitize an input file into a given output file using this Sanitizer's configuration.
        /// </summary>
        /// <param name="inputFile">File Path to input file</param>
        /// <param name="outputFile">File Path to output file</param>
        public void Sanitize(string inputFile, string outputFile)
        {
            using (ITabularReader reader = TabularFactory.BuildReader(inputFile))
            {
                // Build an array of what we'll do with each input column, and the list of columns we'll actually write
                List <string>    columnsToOutput;
                IColumnHandler[] handlers = GetHandlersByColumnIndex(reader.Columns, out columnsToOutput);

                // Find the sample column index, if any, and calculate a hash cutoff for including rows
                int  sampleColumnIndex     = (String.IsNullOrEmpty(this.SampleColumnName) ? -1 : reader.ColumnIndex(this.SampleColumnName));
                uint sampleInclusionCutoff = (uint)(uint.MaxValue * this.SampleProbability);

                using (ITabularWriter writer = TabularFactory.BuildWriter(outputFile))
                {
                    writer.SetColumns(columnsToOutput);

                    while (reader.NextRow())
                    {
                        // If there's a sample column, decide whether to include this row
                        if (sampleColumnIndex > -1)
                        {
                            // Sample *without* the hashkey, so the same rows are consistently included or excluded.
                            uint sampleValueHash = Hashing.Hash(reader.Current(sampleColumnIndex).ToString8(), 0);
                            if (sampleValueHash > sampleInclusionCutoff)
                            {
                                continue;
                            }
                        }

                        // Run the handler for every input column, writing the output if there is one
                        for (int i = 0; i < reader.CurrentRowColumns; ++i)
                        {
                            IColumnHandler handler = handlers[i];
                            if (handler != null)
                            {
                                String8 value       = reader.Current(i).ToString8();
                                String8 replacement = handler.Sanitize(value);
                                writer.Write(replacement);
                            }
                        }

                        writer.NextRow();
                    }
                }
            }
        }
Ejemplo n.º 27
0
        public void WhereMatcher_Basics()
        {
            Assert.AreEqual(1000, WhereMatchCount(s_sampleFilePath, "0", ">=", "0"), "Should match all rows (by column index)");

            Assert.AreEqual(500, WhereMatchCount(s_sampleFilePath, "ID", ">=", "500"), "Should match half of rows (int)");
            Assert.AreEqual(500, WhereMatchCount(s_sampleFilePath, "IsEven", "==", "true"), "Should match half of rows (boolean)");
            Assert.AreEqual(90, WhereMatchCount(s_sampleFilePath, "WhenAdded", "<", "2017-05-23 01:30:00 AM"), "Should match 90 rows (DateTime)");
            Assert.AreEqual(250, WhereMatchCount(s_sampleFilePath, "Name", "=", "Sophie"), "Should match 250 rows (string)");

            Assert.AreEqual(750, WhereMatchCount(s_sampleFilePath, "Name", "!=", "Sophie"), "Should match 250 rows (string !=)");
            Assert.AreEqual(250, WhereMatchCount(s_sampleFilePath, "Name", "|>", "Sop"), "Should match 250 rows (string StartsWith)");
            Assert.AreEqual(250, WhereMatchCount(s_sampleFilePath, "Name", ":", "ophie"), "Should match 250 rows (string Contains)");
            Assert.AreEqual(250, WhereMatchCount(s_sampleFilePath, "Name", ">", "Scott"), "Should match 250 rows (string >)");
            Assert.AreEqual(500, WhereMatchCount(s_sampleFilePath, "Name", ">=", "Scott"), "Should match 500 rows (string >=)");
            Assert.AreEqual(500, WhereMatchCount(s_sampleFilePath, "Name", "<", "Scott"), "Should match 500 rows (string <)");
            Assert.AreEqual(750, WhereMatchCount(s_sampleFilePath, "Name", "<=", "Scott"), "Should match 750 rows (string <=)");

            Assert.AreEqual(0, WhereMatchCount(s_sampleFilePath, "Name", "!=", "false"), "Should match 0 rows (bool, can't convert type)");
            Assert.AreEqual(0, WhereMatchCount(s_sampleFilePath, "Name", "!=", "0"), "Should match 0 rows (int, can't convert type)");
            Assert.AreEqual(0, WhereMatchCount(s_sampleFilePath, "Name", "!=", "2017-01-01"), "Should match 0 rows (DateTime, can't convert type)");

            // Column name doesn't exist
            Verify.Exception <ColumnNotFoundException>(() => WhereMatchCount(s_sampleFilePath, "MissingColumn", "==", "Jeff"));

            // Column index out of range
            Verify.Exception <ColumnNotFoundException>(() => WhereMatchCount(s_sampleFilePath, "-1", "==", "Jeff"));

            // Unknown operator
            Verify.Exception <UsageException>(() => WhereMatchCount(s_sampleFilePath, "Name", "->", "Jeff"));

            // Try with output enabled
            using (ITabularReader reader = TabularFactory.BuildReader(s_sampleFilePath))
            {
                using (ITabularWriter writer = TabularFactory.BuildWriter("Sample.Under2.csv"))
                {
                    WhereMatcher.Where(reader, "ID", "<", "2", writer);
                    Assert.AreEqual(2, writer.RowCountWritten);
                }

                string content = File.ReadAllText("Sample.Under2.csv");
                Assert.IsTrue(content.Contains("\"0\""));
                Assert.IsTrue(content.Contains("\"1\""));
                Assert.IsFalse(content.Contains("\"2\""));
            }
        }
Ejemplo n.º 28
0
        private static TableMetadata Build(IStreamProvider streamProvider, string tableRootPath)
        {
            TableMetadata metadata       = new TableMetadata();
            string        schemaFilePath = Path.Combine(tableRootPath, SchemaFileName);

            using (ITabularReader sr = TabularFactory.BuildReader(streamProvider.OpenRead(schemaFilePath), SchemaFileName))
            {
                int nameIndex = sr.ColumnIndex("Name");
                int typeIndex = sr.ColumnIndex("Type");

                while (sr.NextRow())
                {
                    metadata.Schema.Add(new ColumnDetails(sr.Current(nameIndex).ToString(), TypeProviderFactory.Get(sr.Current(typeIndex).ToString()).Type));
                }
            }

            using (ITabularReader mr = TabularFactory.BuildReader(streamProvider.OpenRead(Path.Combine(tableRootPath, MetadataFileName)), MetadataFileName))
            {
                int nameIndex    = mr.ColumnIndex("Name");
                int contextIndex = mr.ColumnIndex("Context");
                int valueIndex   = mr.ColumnIndex("Value");

                while (mr.NextRow())
                {
                    String8       name    = mr.Current(nameIndex).ToString8();
                    String8       context = mr.Current(contextIndex).ToString8();
                    ITabularValue value   = mr.Current(valueIndex);

                    if (name.Equals("RowCount"))
                    {
                        metadata.RowCount = value.ToInteger();
                    }
                    else
                    {
                        throw new NotImplementedException($"TableMetadataSerializer.Read doesn't know how to read Metadata '{name}'");
                    }
                }
            }

            metadata.Query = streamProvider.ReadAllText(Path.Combine(tableRootPath, ConfigQueryPath));

            return(metadata);
        }
Ejemplo n.º 29
0
        private static void OnlyIn(string inputFilePath, string outputFilePath, string onlyInInputFilePath, string onlyInColumnIdentifier)
        {
            String8Block      block  = new String8Block();
            HashSet <String8> values = new HashSet <String8>();

            // Read values in 'onlyInInputFilePath'
            using (ITabularReader reader = TabularFactory.BuildReader(onlyInInputFilePath))
            {
                int leftColumnIndex = reader.ColumnIndex(onlyInColumnIdentifier);
                while (reader.NextRow())
                {
                    values.Add(block.GetCopy(reader.Current(leftColumnIndex)));
                }
            }

            // Copy from input to output where the column value is in the "only in" set
            using (ITabularReader reader = TabularFactory.BuildReader(inputFilePath))
            {
                int valueColumnIndex = reader.ColumnIndex(onlyInColumnIdentifier);

                using (ITabularWriter writer = TabularFactory.BuildWriter(outputFilePath))
                {
                    writer.SetColumns(reader.Columns);

                    while (reader.NextRow())
                    {
                        if (values.Contains(reader.Current(valueColumnIndex).ToString8()))
                        {
                            for (int i = 0; i < reader.CurrentRowColumns; ++i)
                            {
                                writer.Write(reader.Current(i).ToString8());
                            }

                            writer.NextRow();
                        }
                    }

                    WriteSizeSummary(reader, writer);
                }
            }
        }
Ejemplo n.º 30
0
        public void Reader_Performance(string sampleFilePath, Func <string, bool, ITabularReader> buildReader)
        {
            long rowCountRead   = 0;
            long xsvLengthBytes = new FileInfo(sampleFilePath).Length;

            // Goal: 100MB/sec [Surface Book i7]
            Verify.PerformanceByBytes(50 * LongExtensions.Megabyte, () =>
            {
                int iterations = 100;
                for (int iteration = 0; iteration < iterations; ++iteration)
                {
                    using (ITabularReader r = buildReader(sampleFilePath, true))
                    {
                        int lineNumberIndex  = r.ColumnIndex("LineNumber");
                        int countIndex       = r.ColumnIndex("Count");
                        int descriptionIndex = r.ColumnIndex("Description");

                        while (r.NextRow())
                        {
                            rowCountRead++;

                            if (r.CurrentRowColumns < 2)
                            {
                                continue;
                            }

                            int lineNumber;
                            r.Current(lineNumberIndex).TryToInteger(out lineNumber);

                            int count;
                            r.Current(countIndex).TryToInteger(out count);

                            String8 description = r.Current(descriptionIndex).ToString8();
                        }
                    }
                }

                return(iterations * xsvLengthBytes);
            });
        }