public static void ReadAll_LumenWorks(DelimitedRecordReaderBenchmarkArguments args) { using (var reader = new LW.CsvReader(new StreamReader(args.Path, args.Encoding, true, args.BufferSize), false, LW.CsvReader.DefaultDelimiter, LW.CsvReader.DefaultQuote, LW.CsvReader.DefaultEscape, LW.CsvReader.DefaultComment, args.TrimWhiteSpaces ? LW.ValueTrimmingOptions.All : LW.ValueTrimmingOptions.None, args.BufferSize)) { reader.SkipEmptyLines = args.SkipEmptyLines; string s; if (args.FieldIndex < 0) { while (reader.ReadNextRecord()) { for (int i = 0; i < reader.FieldCount; i++) s = reader[i]; } } else { while (reader.ReadNextRecord()) { for (int i = 0; i < args.FieldIndex + 1; i++) s = reader[i]; } } } }
public static void ReadAll(DelimitedRecordReaderBenchmarkArguments args) { using (var reader = new DelimitedRecordReader(new StreamReader(args.Path, args.Encoding, true, args.BufferSize), args.BufferSize)) { reader.AdvancedEscapingEnabled = args.AdvancedEscapingEnabled; reader.DoubleQuoteEscapingEnabled = args.DoubleQuoteEscapingEnabled; reader.SkipEmptyLines = args.SkipEmptyLines; reader.TrimWhiteSpaces = args.TrimWhiteSpaces; if (args.FieldIndex > -1) { reader.DynamicColumnCount = false; for (int i = 0; i < args.FieldIndex + 1; i++) reader.Columns.Add(new DelimitedRecordColumn(reader.GetDefaultColumnName(i))); } string s; while (reader.Read() != ReadResult.EndOfFile) { for (int i = 0; i < reader.Columns.Count - 1; i++) s = reader[i]; } } }
public static void ReadAll_LumenWorks(DelimitedRecordReaderBenchmarkArguments args) { using (var reader = new LW.CsvReader(new StreamReader(args.Path, args.Encoding, true, args.BufferSize), false, LW.CsvReader.DefaultDelimiter, LW.CsvReader.DefaultQuote, LW.CsvReader.DefaultEscape, LW.CsvReader.DefaultComment, args.TrimWhiteSpaces ? LW.ValueTrimmingOptions.All : LW.ValueTrimmingOptions.None, args.BufferSize)) { reader.SkipEmptyLines = args.SkipEmptyLines; string s; if (args.FieldIndex < 0) { while (reader.ReadNextRecord()) { for (int i = 0; i < reader.FieldCount; i++) { s = reader[i]; } } } else { while (reader.ReadNextRecord()) { for (int i = 0; i < args.FieldIndex + 1; i++) { s = reader[i]; } } } } }
public static void ReadAll(DelimitedRecordReaderBenchmarkArguments args) { using (var reader = new DelimitedRecordReader(new StreamReader(args.Path, args.Encoding, true, args.BufferSize), args.BufferSize)) { reader.AdvancedEscapingEnabled = args.AdvancedEscapingEnabled; reader.DoubleQuoteEscapingEnabled = args.DoubleQuoteEscapingEnabled; reader.SkipEmptyLines = args.SkipEmptyLines; reader.TrimWhiteSpaces = args.TrimWhiteSpaces; if (args.FieldIndex > -1) { reader.DynamicColumnCount = false; for (int i = 0; i < args.FieldIndex + 1; i++) { reader.Columns.Add(new DelimitedRecordColumn(reader.GetDefaultColumnName(i))); } } string s; while (reader.Read() != ReadResult.EndOfFile) { for (int i = 0; i < reader.Columns.Count - 1; i++) { s = reader[i]; } } } }
public static void ReadAll_DataStreams(DelimitedRecordReaderBenchmarkArguments args) { using (var reader = new DS.CsvReader(new StreamReader(args.Path, args.Encoding, true, args.BufferSize))) { reader.Settings.CaptureRawRecord = false; reader.Settings.CaseSensitive = false; reader.Settings.SafetySwitch = false; reader.Settings.UseComments = true; if (args.AdvancedEscapingEnabled) { reader.Settings.EscapeMode = DS.EscapeMode.Backslash; } else { reader.Settings.EscapeMode = DS.EscapeMode.Doubled; } reader.Settings.SkipEmptyRecords = args.SkipEmptyLines; reader.Settings.TrimWhitespace = args.TrimWhiteSpaces; string s; if (args.FieldIndex < 0) { while (reader.ReadRecord()) { for (int i = 0; i < reader.ColumnCount; i++) { s = reader[i]; } } } else { while (reader.ReadRecord()) { for (int i = 0; i < args.FieldIndex + 1; i++) { s = reader[i]; } } } } }
public static void ReadAll_Regex(DelimitedRecordReaderBenchmarkArguments args) { // regex from Jeffrey Friedl's Mastering Regular Expressions 2nd edition, p. 271 // does NOT handle trimming and multiline fields Regex regex = new Regex(@" \G(^|,) ""(?<field> (?> [^""]*) (?> """" [^""]* )* )"" | (?<field> [^"",]* )" , RegexOptions.Compiled | RegexOptions.ExplicitCapture | RegexOptions.IgnorePatternWhitespace); int fieldGroupIndex = regex.GroupNumberFromName("field"); using (var sr = new StreamReader(args.Path, args.Encoding, true, args.BufferSize)) { string s; if (args.FieldIndex < 0) { while ((s = sr.ReadLine()) != null) { MatchCollection mc = regex.Matches(s); for (int i = 0; i < mc.Count; i += 2) { s = mc[i].Groups[fieldGroupIndex].Value; } } } else { while ((s = sr.ReadLine()) != null) { MatchCollection mc = regex.Matches(s); for (int i = 0; i < args.FieldIndex + 1; i++) { s = mc[i * 2].Groups[fieldGroupIndex].Value; } } } } }
public static void ReadAll_CsvHelper(DelimitedRecordReaderBenchmarkArguments args) { var config = new CH.Configuration.CsvConfiguration { BufferSize = args.BufferSize, AllowComments = true, IgnoreBlankLines = args.SkipEmptyLines, HasHeaderRecord = false, DetectColumnCountChanges = true, TrimFields = args.TrimWhiteSpaces, TrimHeaders = args.TrimWhiteSpaces }; using (var reader = new CH.CsvReader(new StreamReader(args.Path, args.Encoding, true, args.BufferSize), config)) { string s; if (args.FieldIndex < 0) { while (reader.Read()) { var record = reader.CurrentRecord; for (int i = 0; i < record.Length; i++) { s = record[i]; } } } else { while (reader.Read()) { for (int i = 0; i < args.FieldIndex + 1; i++) { s = reader[i]; } } } } }
public static void ReadAll_OleDb(DelimitedRecordReaderBenchmarkArguments args) { string directory = Path.GetDirectoryName(args.Path); string file = Path.GetFileName(args.Path); using (var cnn = new OleDbConnection(@"Provider=Microsoft.Jet.OLEDB.4.0;Data Source=" + directory + @";Extended Properties=""Text;HDR=No;FMT=Delimited""")) using (var cmd = cnn.CreateCommand()) { cmd.CommandText = $"SELECT * FROM {file}"; cnn.Open(); using (var reader = cmd.ExecuteReader(CommandBehavior.SequentialAccess)) { string s; if (args.FieldIndex < 0) { while (reader.Read()) { for (int i = 0; i < reader.FieldCount; i++) { s = reader.GetValue(i) as string; } } } else { while (reader.Read()) { for (int i = 0; i < args.FieldIndex + 1; i++) { s = reader.GetValue(i) as string; } } } } } }
public static void ReadAll_DataStreams(DelimitedRecordReaderBenchmarkArguments args) { using (var reader = new DS.CsvReader(new StreamReader(args.Path, args.Encoding, true, args.BufferSize))) { reader.Settings.CaptureRawRecord = false; reader.Settings.CaseSensitive = false; reader.Settings.SafetySwitch = false; reader.Settings.UseComments = true; if (args.AdvancedEscapingEnabled) reader.Settings.EscapeMode = DS.EscapeMode.Backslash; else reader.Settings.EscapeMode = DS.EscapeMode.Doubled; reader.Settings.SkipEmptyRecords = args.SkipEmptyLines; reader.Settings.TrimWhitespace = args.TrimWhiteSpaces; string s; if (args.FieldIndex < 0) { while (reader.ReadRecord()) { for (int i = 0; i < reader.ColumnCount; i++) s = reader[i]; } } else { while (reader.ReadRecord()) { for (int i = 0; i < args.FieldIndex + 1; i++) s = reader[i]; } } } }
public static void ReadAll_Regex(DelimitedRecordReaderBenchmarkArguments args) { // regex from Jeffrey Friedl's Mastering Regular Expressions 2nd edition, p. 271 // does NOT handle trimming and multiline fields Regex regex = new Regex(@" \G(^|,) ""(?<field> (?> [^""]*) (?> """" [^""]* )* )"" | (?<field> [^"",]* )", RegexOptions.Compiled | RegexOptions.ExplicitCapture | RegexOptions.IgnorePatternWhitespace); int fieldGroupIndex = regex.GroupNumberFromName("field"); using (var sr = new StreamReader(args.Path, args.Encoding, true, args.BufferSize)) { string s; if (args.FieldIndex < 0) { while ((s = sr.ReadLine()) != null) { MatchCollection mc = regex.Matches(s); for (int i = 0; i < mc.Count; i += 2) s = mc[i].Groups[fieldGroupIndex].Value; } } else { while ((s = sr.ReadLine()) != null) { MatchCollection mc = regex.Matches(s); for (int i = 0; i < args.FieldIndex + 1; i++) s = mc[i * 2].Groups[fieldGroupIndex].Value; } } } }
public static void ReadAll_OleDb(DelimitedRecordReaderBenchmarkArguments args) { string directory = Path.GetDirectoryName(args.Path); string file = Path.GetFileName(args.Path); using (var cnn = new OleDbConnection(@"Provider=Microsoft.Jet.OLEDB.4.0;Data Source=" + directory + @";Extended Properties=""Text;HDR=No;FMT=Delimited""")) using (var cmd = cnn.CreateCommand()) { cmd.CommandText = $"SELECT * FROM {file}"; cnn.Open(); using (var reader = cmd.ExecuteReader(CommandBehavior.SequentialAccess)) { string s; if (args.FieldIndex < 0) { while (reader.Read()) { for (int i = 0; i < reader.FieldCount; i++) s = reader.GetValue(i) as string; } } else { while (reader.Read()) { for (int i = 0; i < args.FieldIndex + 1; i++) s = reader.GetValue(i) as string; } } } } }
public static void ReadAll_CsvHelper(DelimitedRecordReaderBenchmarkArguments args) { var config = new CH.Configuration.CsvConfiguration { BufferSize = args.BufferSize, AllowComments = true, IgnoreBlankLines = args.SkipEmptyLines, HasHeaderRecord = false, DetectColumnCountChanges = true, TrimFields = args.TrimWhiteSpaces, TrimHeaders = args.TrimWhiteSpaces }; using (var reader = new CH.CsvReader(new StreamReader(args.Path, args.Encoding, true, args.BufferSize), config)) { string s; if (args.FieldIndex < 0) { while (reader.Read()) { var record = reader.CurrentRecord; for (int i = 0; i < record.Length; i++) s = record[i]; } } else { while (reader.Read()) { for (int i = 0; i < args.FieldIndex + 1; i++) s = reader[i]; } } } }
//TODO: for now, the main program code is very tied to IO benchmarks. Needs to refactor to make it more modular. static void Main(string[] args) { AppDomain.CurrentDomain.UnhandledException += new UnhandledExceptionEventHandler(CurrentDomain_UnhandledException); var benchmarkOptions = new BenchmarkOptions { ActionIterationCount = 1, BenchmarkIterationCount = 3, ConcurrencyLevel = 1 }; var tests = BenchmarkTests.All; bool profiling = false; var converter = new StringValueConverter(); if (args.Length > 0) { tests = (BenchmarkTests) converter.ConvertTo(args[0], TrimmingOptions.Both, typeof(BenchmarkTests), BenchmarkTests.All); if (args.Length > 1) benchmarkOptions.BenchmarkIterationCount = converter.ConvertToInt64(args[1], TrimmingOptions.Both, benchmarkOptions.BenchmarkIterationCount, null); if (args.Length > 2) profiling = converter.ConvertToBoolean(args[2], TrimmingOptions.Both, false, null); } #region FixedWidthReader if (tests.HasFlag(BenchmarkTests.FixedWidthReader)) { var files = profiling ? new string[] { @"IO\Text\files\fixed.txt" } : new string[] { @"IO\Text\files\fixed.txt", @"IO\Text\files\test1.csv", @"IO\Text\files\test2.csv" }; foreach (var file in files) { Console.WriteLine("--- FixedWidthReader - {0} ---", file); var benchmarkArgs = new FixedRecordReaderBenchmarkArguments(); benchmarkArgs.Path = file; Benchmark.Execute("NLight", benchmarkOptions, benchmarkArgs, OutputResults, FixedRecordReaderBenchmarks.ReadAll); Benchmark.Execute("DataStreams", benchmarkOptions, benchmarkArgs, OutputResults, FixedRecordReaderBenchmarks.ReadAll_DataStreams); } } #endregion #region DelimitedReader if (tests.HasFlag(BenchmarkTests.DelimitedReader)) { var files = profiling ? new string[] { @"IO\Text\files\test1.csv" } : new string[] { @"IO\Text\files\test1.csv", @"IO\Text\files\test2.csv", @"IO\Text\files\test3.csv", @"IO\Text\files\test4.csv", @"IO\Text\files\test5.csv" }; foreach (var file in files) { Console.WriteLine("--- DelimitedReader - {0} ---", file); var benchmarkArgs = new DelimitedRecordReaderBenchmarkArguments(); benchmarkArgs.Path = file; benchmarkArgs.TrimWhiteSpaces = true; Benchmark.Execute("LumenWorks", benchmarkOptions, benchmarkArgs, OutputResults, DelimitedRecordReaderBenchmarks.ReadAll_LumenWorks); Benchmark.Execute("NLight", benchmarkOptions, benchmarkArgs, OutputResults, DelimitedRecordReaderBenchmarks.ReadAll); Benchmark.Execute("DataStreams", benchmarkOptions, benchmarkArgs, OutputResults, DelimitedRecordReaderBenchmarks.ReadAll_DataStreams); Benchmark.Execute("CsvHelper", benchmarkOptions, benchmarkArgs, OutputResults, DelimitedRecordReaderBenchmarks.ReadAll_CsvHelper); //Benchmark.Execute("OleDb", benchmarkOptions, benchmarkArgs, OutputResults, DelimitedRecordReaderBenchmarks.ReadAll_OleDb); //Benchmark.Execute("Regex", benchmarkOptions, benchmarkArgs, OutputResults, DelimitedRecordReaderBenchmarks.ReadAll_Regex); } } #endregion #region DelimitedReaderAdvancedEscaping if (tests.HasFlag(BenchmarkTests.DelimitedReaderAdvancedEscaping)) { var files = profiling ? new string[] { @"IO\Text\files\test4.csv" } : new string[] { @"IO\Text\files\test3.csv", @"IO\Text\files\test4.csv" }; foreach (var file in files) { Console.WriteLine("--- DelimitedReader with advanced escaping - {0} ---", file); var benchmarkArgs = new DelimitedRecordReaderBenchmarkArguments(); benchmarkArgs.Path = file; benchmarkArgs.TrimWhiteSpaces = true; benchmarkArgs.AdvancedEscapingEnabled = true; Benchmark.Execute("NLight", benchmarkOptions, benchmarkArgs, OutputResults, DelimitedRecordReaderBenchmarks.ReadAll); Benchmark.Execute("DataStreams", benchmarkOptions, benchmarkArgs, OutputResults, DelimitedRecordReaderBenchmarks.ReadAll_DataStreams); } } #endregion Console.WriteLine("\nDone"); Console.ReadLine(); }