public void Reader_Roundtrip(Func <string, bool, ITabularReader> buildReader, Func <Stream, ITabularWriter> buildWriter) { string filePath = "ValidSample.xsv"; // Write a valid file with some values which require CSV escaping WriteValidSample(new FileStream(filePath, FileMode.Create, FileAccess.ReadWrite), buildWriter); // Direct Copy the file from the reader to the writer - every value unescaped and then escaped using (ITabularReader reader = buildReader(filePath, true)) { using (ITabularWriter writer = buildWriter(new FileStream(filePath + ".new", FileMode.Create, FileAccess.ReadWrite))) { writer.SetColumns(reader.Columns); while (reader.NextRow()) { for (int i = 0; i < reader.CurrentRowColumns; ++i) { writer.Write(reader.Current(i).ToString8()); } writer.NextRow(); } } } // Verify files are identical string fileBefore = File.ReadAllText(filePath); string fileAfter = File.ReadAllText(filePath + ".new"); Assert.AreEqual(fileBefore, fileAfter); }
private static void HtmlInnerText(string inputFilePath, string outputFilePath, string columnsDelimited) { using (ITabularReader reader = TabularFactory.BuildReader(inputFilePath)) { List <int> columnIndicesToEscape = columnsDelimited.Split(',').Select((col) => reader.ColumnIndex(col.Trim())).ToList(); using (ITabularWriter writer = TabularFactory.BuildWriter(outputFilePath)) { writer.SetColumns(reader.Columns); while (reader.NextRow()) { for (int i = 0; i < reader.CurrentRowColumns; ++i) { if (columnIndicesToEscape.Contains(i)) { WriteHtmlEscaped(reader.Current(i).ToString8(), writer); } else { writer.Write(reader.Current(i).ToString8()); } } writer.NextRow(); } WriteSizeSummary(reader, writer); } } }
private static void Compare(string oldFilePath, string newFilePath, string outputFilePath, string columnIdentifier) { String8Block block = new String8Block(); HashSet <String8> oldValues = new HashSet <String8>(); HashSet <String8> newValues = new HashSet <String8>(); using (ITabularReader oldReader = TabularFactory.BuildReader(oldFilePath)) { int leftColumnIndex = oldReader.ColumnIndex(columnIdentifier); while (oldReader.NextRow()) { oldValues.Add(block.GetCopy(oldReader.Current(leftColumnIndex))); } Trace.WriteLine(String.Format("Old: {0:n0} values for \"{1}\" in {2:n0} rows.", oldValues.Count, columnIdentifier, oldReader.RowCountRead)); } using (ITabularReader newReader = TabularFactory.BuildReader(newFilePath)) { int rightColumnIndex = newReader.ColumnIndex(columnIdentifier); while (newReader.NextRow()) { newValues.Add(block.GetCopy(newReader.Current(rightColumnIndex))); } Trace.WriteLine(String.Format("New: {0:n0} values for \"{1}\" in {2:n0} rows.", newValues.Count, columnIdentifier, newReader.RowCountRead)); } HashSet <String8> oldOnly = new HashSet <String8>(oldValues); oldOnly.ExceptWith(newValues); HashSet <String8> newOnly = new HashSet <String8>(newValues); newOnly.ExceptWith(oldValues); Trace.WriteLine(String.Format("{0:n0} values were only in \"{1}\".\r\n{2:n0} values were only in \"{3}\".", oldOnly.Count, oldFilePath, newOnly.Count, newFilePath)); String8 leftMarker = String8.Convert("-", new byte[1]); String8 rightMarker = String8.Convert("+", new byte[1]); using (ITabularWriter writer = TabularFactory.BuildWriter(outputFilePath)) { writer.SetColumns(new string[] { "In", columnIdentifier }); foreach (String8 value in oldOnly) { writer.Write(leftMarker); writer.Write(value); writer.NextRow(); } foreach (String8 value in newOnly) { writer.Write(rightMarker); writer.Write(value); writer.NextRow(); } } }
private static int WhereMatchCount(string inputPath, string columnIdentifier, string op, string value) { using (ITabularReader reader = TabularFactory.BuildReader(inputPath)) { return(WhereMatcher.Where(reader, columnIdentifier, op, value, null).MatchCount); } }
private static void Copy(string inputFilePath, string outputFilePath, string columnsDelimited) { List <string> columns = new List <string>(); foreach (string columnName in columnsDelimited.Split(',')) { columns.Add(columnName.Trim()); } using (ITabularReader reader = TabularFactory.BuildReader(inputFilePath)) { int[] columnIndices = new int[columns.Count]; for (int i = 0; i < columnIndices.Length; ++i) { columnIndices[i] = reader.ColumnIndex(columns[i]); } using (ITabularWriter writer = TabularFactory.BuildWriter(outputFilePath)) { writer.SetColumns(columns); while (reader.NextRow()) { for (int i = 0; i < columnIndices.Length; ++i) { writer.Write(reader.Current(columnIndices[i]).ToString8()); } writer.NextRow(); } WriteSizeSummary(reader, writer); } } }
private static void NotStartsWith(string inputFilePath, string outputFilePath, string valueColumnIdentifier, string nameColumnIdentifier) { using (ITabularReader reader = TabularFactory.BuildReader(inputFilePath)) { int valueColumnIndex = reader.ColumnIndex(valueColumnIdentifier); int nameColumnIndex = reader.ColumnIndex(nameColumnIdentifier); using (ITabularWriter writer = TabularFactory.BuildWriter(outputFilePath)) { writer.SetColumns(reader.Columns); while (reader.NextRow()) { String8 name = reader.Current(nameColumnIndex).ToString8(); String8 value = reader.Current(valueColumnIndex).ToString8(); if (!value.StartsWith(name)) { for (int i = 0; i < reader.CurrentRowColumns; ++i) { writer.Write(reader.Current(i).ToString8()); } writer.NextRow(); } } WriteSizeSummary(reader, writer); } } }
private static void Copy(string inputFilePath, string outputFilePath, int rowLimit = -1) { using (ITabularReader reader = TabularFactory.BuildReader(inputFilePath)) { using (ITabularWriter writer = TabularFactory.BuildWriter(outputFilePath)) { writer.SetColumns(reader.Columns); while (reader.NextRow()) { for (int i = 0; i < reader.CurrentRowColumns; ++i) { writer.Write(reader.Current(i).ToString8()); } writer.NextRow(); if (writer.RowCountWritten == rowLimit) { break; } } WriteSizeSummary(reader, writer); } } }
private static void WriteSizeSummary(ITabularReader reader, ITabularWriter writer) { if (reader != null) { long bytesRead = reader.BytesRead; if (bytesRead <= 0) { Trace.WriteLine(String.Format("Read: {0:n0} rows.", reader.RowCountRead)); } else { Trace.WriteLine(String.Format("Read: {0}, {1:n0} rows.", bytesRead.SizeString(), reader.RowCountRead)); } } if (writer != null) { long bytesWritten = writer.BytesWritten; if (bytesWritten <= 0) { Trace.WriteLine(String.Format("Wrote: {0:n0} rows.", writer.RowCountWritten)); } else { Trace.WriteLine(String.Format("Wrote: {0}, {1:n0} rows.", bytesWritten.SizeString(), writer.RowCountWritten)); } } }
private static void MatchBoolCompare(ITabularReader reader, ITabularWriter writer, WhereResult result) { bool value = (bool)result.Value; while (reader.NextRow()) { // Ensure the row has enough columns if (reader.CurrentRowColumns <= result.ColumnIndex) { continue; } // Ensure the value converts bool columnValue; if (!reader.Current(result.ColumnIndex).ToString8().TryToBoolean(out columnValue)) { continue; } int compareResult = columnValue.CompareTo(value); if (!result.Op.Matches(compareResult)) { continue; } result.MatchCount++; // If this is the matching row, write it EchoRow(reader, writer); } }
private static void Distinct(string inputFilePath, string outputFilePath, string columnIdentifier) { String8Block block = new String8Block(); HashSet <String8> distinctValues = new HashSet <String8>(); using (ITabularReader reader = TabularFactory.BuildReader(inputFilePath)) { int columnIndex = reader.ColumnIndex(columnIdentifier); using (ITabularWriter writer = TabularFactory.BuildWriter(outputFilePath)) { writer.SetColumns(new string[] { reader.Columns[columnIndex] }); while (reader.NextRow()) { String8 value = reader.Current(columnIndex).ToString8(); if (!distinctValues.Contains(value)) { distinctValues.Add(block.GetCopy(value)); writer.Write(value); writer.NextRow(); } } WriteSizeSummary(reader, writer); } } }
private static void HtmlInnerText(string inputFilePath, string outputFilePath, string columnIdentifier) { using (ITabularReader reader = TabularFactory.BuildReader(inputFilePath)) { int columnIndexToEscape = reader.ColumnIndex(columnIdentifier); using (ITabularWriter writer = TabularFactory.BuildWriter(outputFilePath)) { writer.SetColumns(reader.Columns); while (reader.NextRow()) { for (int i = 0; i < reader.CurrentRowColumns; ++i) { if (i == columnIndexToEscape) { WriteHtmlEscaped(reader.Current(i).ToString8(), writer); } else { writer.Write(reader.Current(i).ToString8()); } } writer.NextRow(); } WriteSizeSummary(reader, writer); } } }
private static void RowId(string inputFilePath, string outputFilePath, int firstId = 1) { int currentId = firstId; using (ITabularReader reader = TabularFactory.BuildReader(inputFilePath)) { using (ITabularWriter writer = TabularFactory.BuildWriter(outputFilePath)) { List <string> columns = new List <string>(); columns.Add("ID"); columns.AddRange(reader.Columns); writer.SetColumns(columns); while (reader.NextRow()) { writer.Write(currentId); currentId++; for (int i = 0; i < reader.CurrentRowColumns; ++i) { writer.Write(reader.Current(i).ToString8()); } writer.NextRow(); } WriteSizeSummary(reader, writer); } } }
private static void MatchContains(ITabularReader reader, ITabularWriter writer, WhereResult result) { string valueString = (string)result.Value; String8 value = String8.Convert(valueString, new byte[String8.GetLength(valueString)]); while (reader.NextRow()) { // Ensure the row has enough columns if (reader.CurrentRowColumns <= result.ColumnIndex) { continue; } // Match the value if (reader.Current(result.ColumnIndex).ToString8().IndexOf(value) == -1) { continue; } result.MatchCount++; // If this is the matching row, write it EchoRow(reader, writer); } }
private static void Append(string inputFileOrFolderPath, string outputFilePath, string inputFileNamePattern = null) { string[] inputFilePaths; if (Directory.Exists(inputFileOrFolderPath)) { if (String.IsNullOrEmpty(inputFileNamePattern)) { inputFileNamePattern = "*.*"; } inputFilePaths = Directory.GetFiles(inputFileOrFolderPath, inputFileNamePattern); } else { inputFilePaths = new string[] { inputFileOrFolderPath }; } ITabularWriter writer = null; string writerColumns = null; try { foreach (string inputFilePath in inputFilePaths) { using (ITabularReader reader = TabularFactory.BuildReader(inputFilePath)) { // Build the writer, if this is the first file if (writer == null) { writer = TabularFactory.AppendWriter(outputFilePath, reader.Columns); writerColumns = String.Join(", ", reader.Columns); } // Validate columns match string sourceColumns = String.Join(", ", reader.Columns); if (string.Compare(writerColumns, sourceColumns, true) != 0) { throw new InvalidOperationException(string.Format("Can't append to \"{0}\" because the column names don't match.\r\nExpect: {1}\r\nActual: {2}", outputFilePath, writerColumns, sourceColumns)); } // Copy the rows CopyRows(reader, writer); // Write a summary for this input file Trace.WriteLine($" {inputFilePath}, {reader.RowCountRead:n0} rows; {reader.BytesRead.SizeString()}"); } } // Write a summary for the output file WriteSizeSummary(null, writer); } finally { if (writer != null) { writer.Dispose(); writer = null; } } }
public void Reader_NewlineVariations(Func <Stream, ITabularWriter> buildWriter, Func <string, bool, ITabularReader> buildReader) { string xsvPath = "NewlineVariations.xsv"; Stream stream = new FileStream(xsvPath, FileMode.Create, FileAccess.ReadWrite); using (ITabularWriter w = buildWriter(stream)) { w.SetColumns(new string[] { "One", "Two", "Three" }); for (int row = 0; row < 3; ++row) { w.Write(3 * row + 1); w.Write(3 * row + 2); w.Write(3 * row + 3); // Write the end of row but then override it long position = stream.Position; w.NextRow(); if (row == 0) { // Row 0 - newline only stream.Seek(position, SeekOrigin.Begin); stream.WriteByte(UTF8.Newline); } else if (row == 2) { // Row 2 - no end of line stream.SetLength(position); } } } using (ITabularReader r = buildReader(xsvPath, true)) { // Verify column heading not clipped even though no '\r' Assert.AreEqual("Three", r.Columns[2]); Assert.IsTrue(r.NextRow()); Assert.AreEqual(3, r.CurrentRowColumns); // Verify last column doesn't have extra '\r' when terminated with '\r\n' Assert.AreEqual("3", r.Current(2).ToString()); Assert.IsTrue(r.NextRow()); Assert.AreEqual(3, r.CurrentRowColumns); // Verify last column not clipped when terminated with '\n' Assert.AreEqual("6", r.Current(2).ToString()); Assert.IsTrue(r.NextRow()); Assert.AreEqual(3, r.CurrentRowColumns); // Verify last column not clipped when unterminated [EOF] Assert.AreEqual("9", r.Current(2).ToString()); Assert.IsFalse(r.NextRow(), "Reader didn't stop after last line without newline"); } }
/// <summary> /// Return a cell for the current row or String.Empty if the row doesn't have /// enough columns. /// </summary> /// <param name="reader">ITabularReader</param> /// <param name="index">Zero-based column index</param> /// <returns>ITabularValue for column</returns> public static ITabularValue CurrentOrEmpty(this ITabularReader reader, int index) { if (reader.CurrentRowColumns > index) { return(reader.Current(index)); } return(String8TabularValue.Empty); }
public void Dispose() { if (_reader != null) { _reader.Dispose(); _reader = null; } }
private static void Concatenate(string inputFilePath, string outputFilePath, String8 delimiter) { using (ITabularReader reader = TabularFactory.BuildReader(inputFilePath)) { using (ITabularWriter writer = TabularFactory.BuildWriter(outputFilePath)) { writer.SetColumns(reader.Columns); String8Block block = new String8Block(); String8[] lastValues = new String8[reader.CurrentRowColumns]; String8[] combinedValues = new String8[reader.CurrentRowColumns]; while (reader.NextRow()) { String8 firstColumn = reader.Current(0).ToString8(); if (reader.RowCountRead == 2) { // First Row - Get the first ID only combinedValues[0] = block.GetCopy(firstColumn); } else if (firstColumn.CompareTo(combinedValues[0], true) != 0) { // If we have a new ID (and not first row) // Write concatenated values for previous ID WriteCombinedRow(writer, combinedValues); // Reset for this ID block.Clear(); combinedValues[0] = block.GetCopy(firstColumn); for (int i = 1; i < combinedValues.Length; ++i) { combinedValues[i] = String8.Empty; } } // Concatenate non-duplicate values to "row in progress" for (int i = 1; i < reader.CurrentRowColumns; ++i) { String8 value = reader.Current(i).ToString8(); if (lastValues[i] != value) { lastValues[i] = value; combinedValues[i] = block.Concatenate(combinedValues[i], delimiter, value); } } } // After last row, write out values so far WriteCombinedRow(writer, combinedValues); WriteSizeSummary(reader, writer); } } }
private static IEnumerable <DataBlock> ReadAsDataBlockBatch(ITabularReader reader, IList <string> columnNames) { // Build a DataBlock to hold a batch of rows int columnCount = columnNames.Count; DataBlock result = new DataBlock(columnNames, BatchSize); Value[][] columnArrays = new Value[columnCount][]; for (int i = 0; i < columnCount; ++i) { columnArrays[i] = new Value[BatchSize]; for (int j = 0; j < BatchSize; ++j) { columnArrays[i][j] = Value.Create(null); } result.SetColumn(i, columnArrays[i]); } // Look up indices of the columns int[] columnIndices = new int[columnCount]; for (int i = 0; i < columnCount; ++i) { columnIndices[i] = reader.ColumnIndex(columnNames[i]); } // Fill blocks with rows as we go int currentRowCount = 0; String8Block block = new String8Block(); while (reader.NextRow()) { for (int i = 0; i < columnCount; ++i) { String8 cell = block.GetCopy(reader.Current(columnIndices[i]).ToString8()); columnArrays[i][currentRowCount].Assign(new ByteBlock(cell.Array, cell.Index, cell.Length)); //columnArrays[i][currentRowCount].Assign(cell.ToString()); } currentRowCount++; if (currentRowCount == BatchSize) { yield return(result); currentRowCount = 0; block.Clear(); } } if (currentRowCount > 0) { yield return(result); } }
public void Reset() { _reader = TabularFactory.BuildReader(_streamProvider.OpenRead(_filePath), _filePath); _columns = new TabularColumn[_reader.Columns.Count]; _cells = new String8[_reader.Columns.Count][]; for (int i = 0; i < _reader.Columns.Count; ++i) { _columns[i] = new TabularColumn(this, _reader.Columns[i]); } }
/// <summary> /// Look up the column index of a given column by name. /// Will only work if the file had a header row. /// Column names are case insensitive. /// Will throw if the column name wasn't found. /// </summary> /// <param name="columnNameOrIndex">Column name for which to find column index, or already an integer index</param> /// <returns>Index of column in TSV. Throws if column isn't found or no header row was read.</returns> public static int ColumnIndex(this ITabularReader reader, string columnNameOrIndex) { int columnIndex; if (reader.TryGetColumnIndex(columnNameOrIndex, out columnIndex)) { return(columnIndex); } throw new ColumnNotFoundException(String.Format("Column Name \"{0}\" not found in file.\nKnown Columns: \"{1}\"", columnNameOrIndex, String.Join(", ", reader.Columns))); }
private static void CopyRows(ITabularReader reader, ITabularWriter writer) { while (reader.NextRow()) { for (int i = 0; i < reader.CurrentRowColumns; ++i) { writer.Write(reader.Current(i).ToString8()); } writer.NextRow(); } }
public void Sanitize_EndToEnd() { Assembly xsvTest = Assembly.GetExecutingAssembly(); Resource.SaveStreamTo("Xsv.Test.Sanitize.SanitizeSampleSource.csv", "SanitizeSampleSource.csv", xsvTest); Resource.SaveStreamTo("Xsv.Test.Sanitize.SanitizeSampleSource.sanispec", "SanitizeSampleSource.sanispec", xsvTest); // Verify UsageException if no key is passed Assert.AreEqual(-2, Program.Main(new string[] { "sanitize", @"SanitizeSampleSource.csv", "SanitizeOutput.csv", @"SanitizeSampleSource.sanispec" })); // Verify success for base sanitize File.Delete("SanitizeOutput.csv"); Assert.AreEqual(0, Program.Main(new string[] { "sanitize", @"SanitizeSampleSource.csv", "SanitizeOutput.csv", @"SanitizeSampleSource.sanispec", "Key1" })); // Validate the result using (ITabularReader r = TabularFactory.BuildReader("SanitizeOutput.csv")) { Assert.IsTrue(r.Columns.Contains("ID"), "ID column is kept (no spec line)"); Assert.IsTrue(r.Columns.Contains("Path"), "Path column is kept (mapped)"); Assert.IsTrue(r.Columns.Contains("IsEmptyPath"), "IsEmptyPath is kept (Keep line)"); Assert.IsFalse(r.Columns.Contains("IsUnderXsv"), "IxUnderXsv column is dropped (Drop line)"); int idColumnIndex = r.ColumnIndex("ID"); int pathColumnIndex = r.ColumnIndex("Path"); int isEmptyPathColumnIndex = r.ColumnIndex("IsEmptyPath"); while (r.NextRow()) { int id = r.Current(idColumnIndex).ToInteger(); string path = r.Current(pathColumnIndex).ToString(); Assert.AreEqual(r.Current(isEmptyPathColumnIndex).ToBoolean(), String.IsNullOrEmpty(path), "IsEmptyPath condition matches whether mapped path is empty"); if (id == 5) { Assert.AreEqual("Elfie", path, "'Elfie' is echoed (Echo in spec)"); } else if (!String.IsNullOrEmpty(path)) { Assert.IsTrue(path.StartsWith("WarmBeggedTruth\\"), "Verify path is mapped in parts, and 'Elfie' is consistently mapped."); } } Assert.IsTrue(r.RowCountRead < 7, "Verify sample excluded at least one row."); } // Run with another key Assert.AreEqual(0, Program.Main(new string[] { "sanitize", @"SanitizeSampleSource.csv", "SanitizeOutput2.csv", @"SanitizeSampleSource.sanispec", "Key2" })); // Verify mappings are different Assert.AreNotEqual(File.ReadAllText("SanitizeOutput2.csv"), File.ReadAllText("SanitizeOutput.csv")); }
public static ITabularWriter AppendWriter(string filePath, IEnumerable <string> columnNames) { ITabularWriter writer; // If the file doesn't exist, make a new writer if (!File.Exists(filePath)) { writer = BuildWriter(filePath); writer.SetColumns(columnNames); return(writer); } // Verify columns match string expectedColumns = string.Join(", ", columnNames); using (ITabularReader r = TabularFactory.BuildReader(filePath)) { string actualColumns = string.Join(", ", r.Columns); if (string.Compare(expectedColumns, actualColumns, true) != 0) { throw new InvalidOperationException(string.Format("Can't append to \"{0}\" because the column names don't match.\r\nExpect: {1}\r\nActual: {2}", filePath, expectedColumns, actualColumns)); } } // Build the writer FileStream s = new FileStream(filePath, FileMode.Append, FileAccess.Write, FileShare.Read); string extension = Path.GetExtension(filePath).ToLowerInvariant().TrimStart('.'); switch (extension) { case "csv": writer = new CsvWriter(s, false); break; case "tsv": writer = new TsvWriter(s, false); break; default: s.Dispose(); throw new NotSupportedException(String.Format("Xsv does not know how to append to \"{0}\". Known Extensions: [csv, tsv]", extension)); } // Set the columns so the writer knows the count (writers shouldn't write the columns if writeHeaderRow was false) writer.SetColumns(columnNames); return(writer); }
private static void EchoRow(ITabularReader reader, ITabularWriter writer) { if (writer != null) { if (writer.RowCountWritten == 0) { writer.SetColumns(reader.Columns); } for (int i = 0; i < reader.CurrentRowColumns; ++i) { writer.Write(reader.Current(i).ToString8()); } writer.NextRow(); } }
/// <summary> /// Sanitize an input file into a given output file using this Sanitizer's configuration. /// </summary> /// <param name="inputFile">File Path to input file</param> /// <param name="outputFile">File Path to output file</param> public void Sanitize(string inputFile, string outputFile) { using (ITabularReader reader = TabularFactory.BuildReader(inputFile)) { // Build an array of what we'll do with each input column, and the list of columns we'll actually write List <string> columnsToOutput; IColumnHandler[] handlers = GetHandlersByColumnIndex(reader.Columns, out columnsToOutput); // Find the sample column index, if any, and calculate a hash cutoff for including rows int sampleColumnIndex = (String.IsNullOrEmpty(this.SampleColumnName) ? -1 : reader.ColumnIndex(this.SampleColumnName)); uint sampleInclusionCutoff = (uint)(uint.MaxValue * this.SampleProbability); using (ITabularWriter writer = TabularFactory.BuildWriter(outputFile)) { writer.SetColumns(columnsToOutput); while (reader.NextRow()) { // If there's a sample column, decide whether to include this row if (sampleColumnIndex > -1) { // Sample *without* the hashkey, so the same rows are consistently included or excluded. uint sampleValueHash = Hashing.Hash(reader.Current(sampleColumnIndex).ToString8(), 0); if (sampleValueHash > sampleInclusionCutoff) { continue; } } // Run the handler for every input column, writing the output if there is one for (int i = 0; i < reader.CurrentRowColumns; ++i) { IColumnHandler handler = handlers[i]; if (handler != null) { String8 value = reader.Current(i).ToString8(); String8 replacement = handler.Sanitize(value); writer.Write(replacement); } } writer.NextRow(); } } } }
public void WhereMatcher_Basics() { Assert.AreEqual(1000, WhereMatchCount(s_sampleFilePath, "0", ">=", "0"), "Should match all rows (by column index)"); Assert.AreEqual(500, WhereMatchCount(s_sampleFilePath, "ID", ">=", "500"), "Should match half of rows (int)"); Assert.AreEqual(500, WhereMatchCount(s_sampleFilePath, "IsEven", "==", "true"), "Should match half of rows (boolean)"); Assert.AreEqual(90, WhereMatchCount(s_sampleFilePath, "WhenAdded", "<", "2017-05-23 01:30:00 AM"), "Should match 90 rows (DateTime)"); Assert.AreEqual(250, WhereMatchCount(s_sampleFilePath, "Name", "=", "Sophie"), "Should match 250 rows (string)"); Assert.AreEqual(750, WhereMatchCount(s_sampleFilePath, "Name", "!=", "Sophie"), "Should match 250 rows (string !=)"); Assert.AreEqual(250, WhereMatchCount(s_sampleFilePath, "Name", "|>", "Sop"), "Should match 250 rows (string StartsWith)"); Assert.AreEqual(250, WhereMatchCount(s_sampleFilePath, "Name", ":", "ophie"), "Should match 250 rows (string Contains)"); Assert.AreEqual(250, WhereMatchCount(s_sampleFilePath, "Name", ">", "Scott"), "Should match 250 rows (string >)"); Assert.AreEqual(500, WhereMatchCount(s_sampleFilePath, "Name", ">=", "Scott"), "Should match 500 rows (string >=)"); Assert.AreEqual(500, WhereMatchCount(s_sampleFilePath, "Name", "<", "Scott"), "Should match 500 rows (string <)"); Assert.AreEqual(750, WhereMatchCount(s_sampleFilePath, "Name", "<=", "Scott"), "Should match 750 rows (string <=)"); Assert.AreEqual(0, WhereMatchCount(s_sampleFilePath, "Name", "!=", "false"), "Should match 0 rows (bool, can't convert type)"); Assert.AreEqual(0, WhereMatchCount(s_sampleFilePath, "Name", "!=", "0"), "Should match 0 rows (int, can't convert type)"); Assert.AreEqual(0, WhereMatchCount(s_sampleFilePath, "Name", "!=", "2017-01-01"), "Should match 0 rows (DateTime, can't convert type)"); // Column name doesn't exist Verify.Exception <ColumnNotFoundException>(() => WhereMatchCount(s_sampleFilePath, "MissingColumn", "==", "Jeff")); // Column index out of range Verify.Exception <ColumnNotFoundException>(() => WhereMatchCount(s_sampleFilePath, "-1", "==", "Jeff")); // Unknown operator Verify.Exception <UsageException>(() => WhereMatchCount(s_sampleFilePath, "Name", "->", "Jeff")); // Try with output enabled using (ITabularReader reader = TabularFactory.BuildReader(s_sampleFilePath)) { using (ITabularWriter writer = TabularFactory.BuildWriter("Sample.Under2.csv")) { WhereMatcher.Where(reader, "ID", "<", "2", writer); Assert.AreEqual(2, writer.RowCountWritten); } string content = File.ReadAllText("Sample.Under2.csv"); Assert.IsTrue(content.Contains("\"0\"")); Assert.IsTrue(content.Contains("\"1\"")); Assert.IsFalse(content.Contains("\"2\"")); } }
private static TableMetadata Build(IStreamProvider streamProvider, string tableRootPath) { TableMetadata metadata = new TableMetadata(); string schemaFilePath = Path.Combine(tableRootPath, SchemaFileName); using (ITabularReader sr = TabularFactory.BuildReader(streamProvider.OpenRead(schemaFilePath), SchemaFileName)) { int nameIndex = sr.ColumnIndex("Name"); int typeIndex = sr.ColumnIndex("Type"); while (sr.NextRow()) { metadata.Schema.Add(new ColumnDetails(sr.Current(nameIndex).ToString(), TypeProviderFactory.Get(sr.Current(typeIndex).ToString()).Type)); } } using (ITabularReader mr = TabularFactory.BuildReader(streamProvider.OpenRead(Path.Combine(tableRootPath, MetadataFileName)), MetadataFileName)) { int nameIndex = mr.ColumnIndex("Name"); int contextIndex = mr.ColumnIndex("Context"); int valueIndex = mr.ColumnIndex("Value"); while (mr.NextRow()) { String8 name = mr.Current(nameIndex).ToString8(); String8 context = mr.Current(contextIndex).ToString8(); ITabularValue value = mr.Current(valueIndex); if (name.Equals("RowCount")) { metadata.RowCount = value.ToInteger(); } else { throw new NotImplementedException($"TableMetadataSerializer.Read doesn't know how to read Metadata '{name}'"); } } } metadata.Query = streamProvider.ReadAllText(Path.Combine(tableRootPath, ConfigQueryPath)); return(metadata); }
private static void OnlyIn(string inputFilePath, string outputFilePath, string onlyInInputFilePath, string onlyInColumnIdentifier) { String8Block block = new String8Block(); HashSet <String8> values = new HashSet <String8>(); // Read values in 'onlyInInputFilePath' using (ITabularReader reader = TabularFactory.BuildReader(onlyInInputFilePath)) { int leftColumnIndex = reader.ColumnIndex(onlyInColumnIdentifier); while (reader.NextRow()) { values.Add(block.GetCopy(reader.Current(leftColumnIndex))); } } // Copy from input to output where the column value is in the "only in" set using (ITabularReader reader = TabularFactory.BuildReader(inputFilePath)) { int valueColumnIndex = reader.ColumnIndex(onlyInColumnIdentifier); using (ITabularWriter writer = TabularFactory.BuildWriter(outputFilePath)) { writer.SetColumns(reader.Columns); while (reader.NextRow()) { if (values.Contains(reader.Current(valueColumnIndex).ToString8())) { for (int i = 0; i < reader.CurrentRowColumns; ++i) { writer.Write(reader.Current(i).ToString8()); } writer.NextRow(); } } WriteSizeSummary(reader, writer); } } }
public void Reader_Performance(string sampleFilePath, Func <string, bool, ITabularReader> buildReader) { long rowCountRead = 0; long xsvLengthBytes = new FileInfo(sampleFilePath).Length; // Goal: 100MB/sec [Surface Book i7] Verify.PerformanceByBytes(50 * LongExtensions.Megabyte, () => { int iterations = 100; for (int iteration = 0; iteration < iterations; ++iteration) { using (ITabularReader r = buildReader(sampleFilePath, true)) { int lineNumberIndex = r.ColumnIndex("LineNumber"); int countIndex = r.ColumnIndex("Count"); int descriptionIndex = r.ColumnIndex("Description"); while (r.NextRow()) { rowCountRead++; if (r.CurrentRowColumns < 2) { continue; } int lineNumber; r.Current(lineNumberIndex).TryToInteger(out lineNumber); int count; r.Current(countIndex).TryToInteger(out count); String8 description = r.Current(descriptionIndex).ToString8(); } } } return(iterations * xsvLengthBytes); }); }