/// <summary> /// Sorts a fixed width file given a alphanumeric key. /// </summary> /// <param name="sourcefilePath">Full path and file name of file to be sorted</param> /// <param name="getKey">Function to construct the key</param> /// <param name="dataFilter">Function to filter out a data line (true to include data or false to exclude data)</param> /// <param name="dataTransportation">Define the data transportation method.</param> /// <param name="destinationFolder">Folder path where sorted and/or duplicate files will be place. (Uses folder of sourcefilePath when null)</param> /// <param name="hasHeader">Does the file have a header row</param> /// <param name="isUniqueKey">If true duplicates will not be included in the sorted file.</param> /// <param name="returnDuplicates">If true duplicates will be written out to file only if isUniqueKey is true.</param> /// <param name="sortDir">The sort direction of the key.</param> /// <param name="progress">A method to report progress</param> /// <param name="maxBatchSize">Control the max insert batch size</param> public static SortResults SortFixedWidthByAlphaNumKey(string sourcefilePath, Func<string, string> getKey, Func<string, bool> dataFilter = null, DataTransportation dataTransportation = null, string destinationFolder = null, bool hasHeader = true, bool isUniqueKey = false, bool returnDuplicates = false, SortDirection sortDir = SortDirection.Ascending, Action<SortProgress> progress = null, int maxBatchSize = 250000) { SortDefinitions sortDefs = new SortDefinitions(); sortDefs.Add(new SortDefinition { DataType = KeyType.AlphaNumeric, Direction = sortDir, IsUniqueKey = isUniqueKey }); return SortFile.SortFixedWidthByKeyDefinitions( sourcefilePath: sourcefilePath, sortDefinitions: sortDefs, dataFilter: dataFilter, setKeys: (line, keyValues) => keyValues[0] = getKey(line), dataTransportation: dataTransportation, destinationFolder: destinationFolder, hasHeader: hasHeader, returnDuplicates: returnDuplicates, progress: progress, maxBatchSize: maxBatchSize); }
/// <summary> /// Sorts a delimited file given a alphanumeric key. /// </summary> /// <param name="sourcefilePath">Full path and file name of file to be sorted</param> /// <param name="dataFilter">Function to filter out a data line (true to include data or false to exclude data)</param> /// <param name="dataTransportation">Define the data transportation method.</param> /// <param name="destinationFolder">Folder path where sorted and/or duplicate files will be place. (Uses folder of sourcefilePath when null)</param> /// <param name="delimiter">Character delimiter</param> /// <param name="hasHeader">Does the file have a header row</param> /// <param name="keyColumn">The zero based column number to be used as the key to sort</param> /// <param name="keyLength">The length of the key right justified with zeros if less than length specified</param> /// <param name="isUniqueKey">If true duplicates will not be included in the sorted file.</param> /// <param name="returnDuplicates">If true duplicates will be written out to file only if isUniqueKey is true.</param> /// <param name="sortDir">The sort direction of the key.</param> /// <param name="progress">A method to report progress</param> /// <param name="maxBatchSize">Control the max insert batch size</param> public static SortResults SortDelimitedByAlphaNumKey(string sourcefilePath, Func<string[], string, bool> dataFilter = null, DataTransportation dataTransportation = null, string destinationFolder = null, string delimiter = Constants.Delimiters.Comma, bool hasHeader = true, int keyColumn = 0, int keyLength = 15, bool isUniqueKey = false, bool returnDuplicates = false, SortDirection sortDir = SortDirection.Ascending, Action<SortProgress> progress = null, int maxBatchSize = 250000) { SortDefinitions sortDefs = new SortDefinitions(); sortDefs.Add(new SortDefinition { DataType = KeyType.AlphaNumeric, Direction = sortDir, IsUniqueKey = isUniqueKey }); return SortDelimitedByKeyDefCore(sourcefilePath: sourcefilePath, sortDefinitions: sortDefs, setKeys: (fields, line, keyValues) => keyValues[0] = fields[keyColumn].PadKeyWithZero(keyLength), dataFilter: dataFilter, destinationFolder: destinationFolder, delimiter: delimiter, hasHeader: hasHeader, returnDuplicates: returnDuplicates, dataTransportation: dataTransportation, progress: progress, maxBatchSize: maxBatchSize); }
/// <summary> /// Sorts a delimited file by defined set of key definitions. /// </summary> /// <param name="sourcefilePath">Full path and file name of file to be sorted</param> /// <param name="sortDefinitions">Define the keys values and sort directions</param> /// <param name="setKeys">Action method to set the key values</param> /// <param name="dataFilter">Function to filter out a data line (true to include data or false to exclude data)</param> /// <param name="dataTransportation">Define the data transportation method.</param> /// <param name="destinationFolder">Folder path where sorted and/or duplicate files will be place. (Uses folder of sourcefilePath when null)</param> /// <param name="delimiter">Character delimiter</param> /// <param name="hasHeader">Does the file have a header row</param> /// <param name="returnDuplicates">If true duplicates will be written out to file only if isUniqueKey is true in any of the key definitions.</param> /// <param name="progress">A method to report progress</param> /// <param name="maxBatchSize">Control the max insert batch size</param> /// <returns></returns> public static SortResults SortDelimitedByKeyDefinitions( string sourcefilePath, SortDefinitions sortDefinitions, Action<string[], string, string[]> setKeys, Func<string[], string, bool> dataFilter = null, DataTransportation dataTransportation = null, string destinationFolder = null, string delimiter = Constants.Delimiters.Comma, bool hasHeader = true, bool returnDuplicates = false, Action<SortProgress> progress = null, int maxBatchSize = 250000) { return SortDelimitedByKeyDefCore( sourcefilePath: sourcefilePath, sortDefinitions: sortDefinitions, setKeys: setKeys, dataFilter: dataFilter, destinationFolder: destinationFolder, delimiter: delimiter, hasHeader: hasHeader, returnDuplicates: returnDuplicates, progress: progress, maxBatchSize: maxBatchSize, dataTransportation: dataTransportation); }
private bool WriteoutSortedFile(DataTransportation dataTransportation) { if (dataTransportation == null || (dataTransportation.TransportType == DataTransport.File || dataTransportation.TransportType == (DataTransport.File | DataTransport.Passthrough))) { return(true); } return(false); }
private void DoDataTransportPassthrough(DataTransportation dataTransportation, string sqlLiteoutLine) { if (dataTransportation != null && (dataTransportation.TransportType == DataTransport.Passthrough || dataTransportation.TransportType == (DataTransport.File | DataTransport.Passthrough))) { if (dataTransportation.PassthroughAction != null) { dataTransportation.PassthroughAction(sqlLiteoutLine); } } }
internal static SortResults SortFixedWidthByKeyDefCore(string sourcefilePath, SortDefinitions sortDefinitions, Action<string, string[]> setKeys, Func<string, bool> dataFilter = null, string destinationFolder = null, bool hasHeader = true, bool returnDuplicates = false, Action<SortProgress> progress = null, DataTransportation dataTransportation = null, bool deleteDbConnPath = true, bool writeOutSortFile = true, int maxBatchSize = 250000) { ArgumentValidation.Validate(sourcefilePath, setKeys, destinationFolder); SortVars srtVars = new SortVars(sourcefilePath, destinationFolder); SortResults srtResults = new SortResults(sourcefilePath, srtVars.DestFolder, srtVars.DbConnPath); SortProgress srtProgress = new SortProgress(); try { srtResults.DeleteDuplicatesFile(); int lineCount = 1; using (StreamReader reader = new StreamReader(sourcefilePath)) using (SqliteSortDefBulkInserter sortBulkInserter = new SqliteSortDefBulkInserter(srtVars.DbConnPath, sortDefinitions, maxBatchSize)) { string line; srtVars.Header = GetHeader(hasHeader, reader); srtProgress.InitReading(); while ((line = reader.ReadLine()) != null) { srtResults.IncrementLinesRead(); ReportReadProgress(progress, srtProgress, srtResults.LinesRead); if (dataFilter == null || dataFilter(line)) { string[] keyValues = new string[sortDefinitions.GetKeys().Count]; setKeys(line, keyValues); sortBulkInserter.Add(new SortKeyData { KeyValues = keyValues, Data = (line + Constants.Common.PreserveCharacter).Compress() }); lineCount++; } else { srtResults.IncrementFiltered(); } } sortBulkInserter.InsertAnyLeftOvers(); sortBulkInserter.AddUnUniqueIndex(); } srtProgress.InitWriting(); if (writeOutSortFile) { srtResults.WriteOutSorted(dbConnPath: srtVars.DbConnPath, header: srtVars.Header, sortDefinitions: sortDefinitions, delimiter: Constants.Delimiters.Tab, returnDuplicates: returnDuplicates, dupesFilePath: srtResults.DuplicatesFilePath, compressed: true, progress: (counter) => { srtProgress.Counter = counter; if (progress != null) { progress(srtProgress); } }, dataTransportation: dataTransportation, deleteDb: deleteDbConnPath); } else { srtResults.Header = srtVars.Header; } srtResults.DeleteDuplicatesFileIfNoDuplicates(); } catch (Exception) { CleanUp(srtVars, srtResults); srtProgress = null; throw; } return srtResults; }
/// <summary> /// Sorts a delimited file given a numeric or string key. /// </summary> /// <param name="sourcefilePath">Full path and file name of file to be sorted</param> /// <param name="getKey">Function to construct the key</param> /// <param name="dataFilter">Function to filter out a data line (true to include data or false to exclude data)</param> /// <param name="destinationFolder">Folder path where sorted and/or duplicate files will be place. (Uses folder of sourcefilePath when null)</param> /// <param name="delimiter">Character delimiter</param> /// <param name="hasHeader">Does the file have a header row</param> /// <param name="isUniqueKey">If true duplicates will not be included in the sorted file.</param> /// <param name="returnDuplicates">If true duplicates will be written out to file only if isUniqueKey is true.</param> /// <param name="sortDir">The sort direction of the key.</param> /// <param name="progress">A method to report progress</param> /// <param name="dataTransportation">Define the data transportation method.</param> /// <param name="maxBatchSize">Control the max insert batch size</param> internal static SortResults SortDelimitedByKeyCore<T>( string sourcefilePath, Func<string[], string, T> getKey, Func<string[], string, bool> dataFilter = null, string destinationFolder = null, string delimiter = Constants.Delimiters.Comma, bool hasHeader = true, bool isUniqueKey = false, bool returnDuplicates = false, SortDirection sortDir = SortDirection.Ascending, Action<SortProgress> progress = null, DataTransportation dataTransportation = null, bool deleteDbConnPath = true, bool writeOutSortFile = true, int maxBatchSize = 250000) { ArgumentValidation.Validate<T>(sourcefilePath, getKey, delimiter, destinationFolder, maxBatchSize); SortVars srtVars = new SortVars(sourcefilePath, destinationFolder); SortResults srtResults = new SortResults(sourcefilePath, srtVars.DestFolder, srtVars.DbConnPath); SortProgress srtProgress = new SortProgress(); try { srtResults.DeleteDuplicatesFile(); int lineCount = 1; using (StreamReader reader = new StreamReader(sourcefilePath)) using (SqliteSortKeyBulkInserter<T> sortBulkInserter = new SqliteSortKeyBulkInserter<T>(srtVars.DbConnPath, uniqueKey: isUniqueKey, maxBatchSize: maxBatchSize)) { string line; srtVars.Header = GetHeader(hasHeader, reader); srtProgress.InitReading(); while ((line = reader.ReadLine()) != null) { srtResults.IncrementLinesRead(); ReportReadProgress(progress, srtProgress, srtResults.LinesRead); FileParser.ParseDelimitedString(new StringReader(line), (fields, lNum) => { if (dataFilter == null || dataFilter(fields, line)) { T key = getKey(fields, line); sortBulkInserter.Add(key, line + SortFileHelpers.EscapeByDelimiter(delimiter)); lineCount++; } else { srtResults.IncrementFiltered(); } }, delimiter); } sortBulkInserter.InsertAnyLeftOvers(); sortBulkInserter.AddUnUniqueIndex(); } srtProgress.InitWriting(); if (writeOutSortFile) { srtResults.WriteOutSorted(srtVars.DbConnPath, srtVars.Header, sortDir, delimiter, hasUniqueIndex: isUniqueKey, returnDuplicates: returnDuplicates, dupesFilePath: srtResults.DuplicatesFilePath, progress: (counter) => { srtProgress.Counter = counter; if (progress != null) { progress(srtProgress); } }, dataTransportation: dataTransportation, deleteDb: deleteDbConnPath); } else { srtResults.Header = srtVars.Header; } srtResults.DeleteDuplicatesFileIfNoDuplicates(); } catch (Exception) { CleanUp(srtVars, srtResults); srtProgress = null; throw; } return srtResults; }
internal void WriteOutSorted(string dbConnPath, string header, SortDirection sortDir, string delimiter = Constants.Delimiters.Comma, bool hasUniqueIndex = false, bool returnDuplicates = false, string dupesFilePath = "", bool compressed = false, Action <int> progress = null, DataTransportation dataTransportation = null, bool deleteDb = true) { bool writeSortedFile = WriteoutSortedFile(dataTransportation); if (writeSortedFile) { DeleteSortedFile(); } this.Header = header; StreamWriter dupeWriter = !string.IsNullOrEmpty(dupesFilePath) ? new StreamWriter(dupesFilePath) : null; StreamWriter sw = writeSortedFile ? new StreamWriter(SortedFilePath) : null; using (sw) using (dupeWriter) { if (!string.IsNullOrWhiteSpace(header)) { if (sw != null) { sw.WriteLine(header); } if (returnDuplicates) { WriteHeaderForDuplicatesFile(true, header, dupeWriter); } } using (var cn = new SQLiteConnection(@"Data Source=" + dbConnPath)) { string selectCmd = "SELECT * FROM FileData ORDER BY SortKey"; if (sortDir == SortDirection.Descending) { selectCmd += " DESC"; } cn.Open(); using (var cmd = new SQLiteCommand(selectCmd, cn)) using (SQLiteDataReader rdr = cmd.ExecuteReader()) { dynamic lastReadKey = null; while (rdr.Read()) { string sqlLiteData = (string)rdr["LineData"]; string sqlLiteoutLine = SortFileHelpers.UnEscapeByDelimiter(compressed ? sqlLiteData.Decompress() : sqlLiteData, delimiter); if (hasUniqueIndex) { dynamic sqlLiteKey = rdr["SortKey"]; if (sqlLiteKey.Equals(lastReadKey)) { if (returnDuplicates) { dupeWriter.WriteLine(sqlLiteoutLine); this.IncrementDuplicates(); } continue; } lastReadKey = sqlLiteKey; } if (sw != null) { sw.WriteLine(sqlLiteoutLine); } IncrementLinesSorted(); ReportProgress(progress, LinesSorted); DoDataTransportPassthrough(dataTransportation, sqlLiteoutLine); } } cn.Close(); } } if (deleteDb) { SortFileHelpers.DeleteFileIfExists(dbConnPath); } }
internal void WriteOutSorted(string dbConnPath, string header, SortDefinitions sortDefinitions, string delimiter = Constants.Delimiters.Comma, bool returnDuplicates = false, string dupesFilePath = "", bool compressed = false, Action <int> progress = null, DataTransportation dataTransportation = null, bool deleteDb = true) { bool writeSortedFile = WriteoutSortedFile(dataTransportation); if (writeSortedFile) { DeleteSortedFile(); } this.Header = header; StreamWriter dupeWriter = !string.IsNullOrEmpty(dupesFilePath) ? new StreamWriter(dupesFilePath) : null; StreamWriter sw = writeSortedFile ? new StreamWriter(SortedFilePath) : null; using (sw) using (dupeWriter) { if (!string.IsNullOrWhiteSpace(header)) { if (sw != null) { sw.WriteLine(header); } if (returnDuplicates) { WriteHeaderForDuplicatesFile(true, header, dupeWriter); } } using (var cn = new SQLiteConnection(@"Data Source=" + dbConnPath)) { string selectCmd = "SELECT * FROM FileData ORDER BY " + sortDefinitions.BuildOrderClause(); cn.Open(); using (var cmd = new SQLiteCommand(selectCmd, cn)) using (SQLiteDataReader rdr = cmd.ExecuteReader()) { var lastReadKeyList = GetNewDynamicListForKeys(sortDefinitions); while (rdr.Read()) { string sqlLiteData = (string)rdr["LineData"]; string sqlLiteoutLine = SortFileHelpers.UnEscapeByDelimiter(compressed ? sqlLiteData.Decompress() : sqlLiteData, delimiter); if (lastReadKeyList.Count > 0) { var currentReadKeyList = SetNewDynamicListForKeysValues(sortDefinitions, rdr); if (KeysEqual(currentReadKeyList, lastReadKeyList)) { if (returnDuplicates) { dupeWriter.WriteLine(sqlLiteoutLine); this.IncrementDuplicates(); } continue; } lastReadKeyList = currentReadKeyList; } if (sw != null) { sw.WriteLine(sqlLiteoutLine); } IncrementLinesSorted(); ReportProgress(progress, LinesSorted); DoDataTransportPassthrough(dataTransportation, sqlLiteoutLine); } } cn.Close(); } } if (deleteDb) { SortFileHelpers.DeleteFileIfExists(dbConnPath); } }