internal static SortResults SortFixedWidthByKeyDefCore(string sourcefilePath, SortDefinitions sortDefinitions, Action<string, string[]> setKeys, Func<string, bool> dataFilter = null, string destinationFolder = null, bool hasHeader = true, bool returnDuplicates = false, Action<SortProgress> progress = null, DataTransportation dataTransportation = null, bool deleteDbConnPath = true, bool writeOutSortFile = true, int maxBatchSize = 250000) { ArgumentValidation.Validate(sourcefilePath, setKeys, destinationFolder); SortVars srtVars = new SortVars(sourcefilePath, destinationFolder); SortResults srtResults = new SortResults(sourcefilePath, srtVars.DestFolder, srtVars.DbConnPath); SortProgress srtProgress = new SortProgress(); try { srtResults.DeleteDuplicatesFile(); int lineCount = 1; using (StreamReader reader = new StreamReader(sourcefilePath)) using (SqliteSortDefBulkInserter sortBulkInserter = new SqliteSortDefBulkInserter(srtVars.DbConnPath, sortDefinitions, maxBatchSize)) { string line; srtVars.Header = GetHeader(hasHeader, reader); srtProgress.InitReading(); while ((line = reader.ReadLine()) != null) { srtResults.IncrementLinesRead(); ReportReadProgress(progress, srtProgress, srtResults.LinesRead); if (dataFilter == null || dataFilter(line)) { string[] keyValues = new string[sortDefinitions.GetKeys().Count]; setKeys(line, keyValues); sortBulkInserter.Add(new SortKeyData { KeyValues = keyValues, Data = (line + Constants.Common.PreserveCharacter).Compress() }); lineCount++; } else { srtResults.IncrementFiltered(); } } sortBulkInserter.InsertAnyLeftOvers(); sortBulkInserter.AddUnUniqueIndex(); } srtProgress.InitWriting(); if (writeOutSortFile) { srtResults.WriteOutSorted(dbConnPath: srtVars.DbConnPath, header: srtVars.Header, sortDefinitions: sortDefinitions, delimiter: Constants.Delimiters.Tab, returnDuplicates: returnDuplicates, dupesFilePath: srtResults.DuplicatesFilePath, compressed: true, progress: (counter) => { srtProgress.Counter = counter; if (progress != null) { progress(srtProgress); } }, dataTransportation: dataTransportation, deleteDb: deleteDbConnPath); } else { srtResults.Header = srtVars.Header; } srtResults.DeleteDuplicatesFileIfNoDuplicates(); } catch (Exception) { CleanUp(srtVars, srtResults); srtProgress = null; throw; } return srtResults; }
/// <summary> /// Sorts a delimited file given a numeric or string key. /// </summary> /// <param name="sourcefilePath">Full path and file name of file to be sorted</param> /// <param name="getKey">Function to construct the key</param> /// <param name="dataFilter">Function to filter out a data line (true to include data or false to exclude data)</param> /// <param name="destinationFolder">Folder path where sorted and/or duplicate files will be place. (Uses folder of sourcefilePath when null)</param> /// <param name="delimiter">Character delimiter</param> /// <param name="hasHeader">Does the file have a header row</param> /// <param name="isUniqueKey">If true duplicates will not be included in the sorted file.</param> /// <param name="returnDuplicates">If true duplicates will be written out to file only if isUniqueKey is true.</param> /// <param name="sortDir">The sort direction of the key.</param> /// <param name="progress">A method to report progress</param> /// <param name="dataTransportation">Define the data transportation method.</param> /// <param name="maxBatchSize">Control the max insert batch size</param> internal static SortResults SortDelimitedByKeyCore<T>( string sourcefilePath, Func<string[], string, T> getKey, Func<string[], string, bool> dataFilter = null, string destinationFolder = null, string delimiter = Constants.Delimiters.Comma, bool hasHeader = true, bool isUniqueKey = false, bool returnDuplicates = false, SortDirection sortDir = SortDirection.Ascending, Action<SortProgress> progress = null, DataTransportation dataTransportation = null, bool deleteDbConnPath = true, bool writeOutSortFile = true, int maxBatchSize = 250000) { ArgumentValidation.Validate<T>(sourcefilePath, getKey, delimiter, destinationFolder, maxBatchSize); SortVars srtVars = new SortVars(sourcefilePath, destinationFolder); SortResults srtResults = new SortResults(sourcefilePath, srtVars.DestFolder, srtVars.DbConnPath); SortProgress srtProgress = new SortProgress(); try { srtResults.DeleteDuplicatesFile(); int lineCount = 1; using (StreamReader reader = new StreamReader(sourcefilePath)) using (SqliteSortKeyBulkInserter<T> sortBulkInserter = new SqliteSortKeyBulkInserter<T>(srtVars.DbConnPath, uniqueKey: isUniqueKey, maxBatchSize: maxBatchSize)) { string line; srtVars.Header = GetHeader(hasHeader, reader); srtProgress.InitReading(); while ((line = reader.ReadLine()) != null) { srtResults.IncrementLinesRead(); ReportReadProgress(progress, srtProgress, srtResults.LinesRead); FileParser.ParseDelimitedString(new StringReader(line), (fields, lNum) => { if (dataFilter == null || dataFilter(fields, line)) { T key = getKey(fields, line); sortBulkInserter.Add(key, line + SortFileHelpers.EscapeByDelimiter(delimiter)); lineCount++; } else { srtResults.IncrementFiltered(); } }, delimiter); } sortBulkInserter.InsertAnyLeftOvers(); sortBulkInserter.AddUnUniqueIndex(); } srtProgress.InitWriting(); if (writeOutSortFile) { srtResults.WriteOutSorted(srtVars.DbConnPath, srtVars.Header, sortDir, delimiter, hasUniqueIndex: isUniqueKey, returnDuplicates: returnDuplicates, dupesFilePath: srtResults.DuplicatesFilePath, progress: (counter) => { srtProgress.Counter = counter; if (progress != null) { progress(srtProgress); } }, dataTransportation: dataTransportation, deleteDb: deleteDbConnPath); } else { srtResults.Header = srtVars.Header; } srtResults.DeleteDuplicatesFileIfNoDuplicates(); } catch (Exception) { CleanUp(srtVars, srtResults); srtProgress = null; throw; } return srtResults; }