private List <dynamic> GetNewDynamicListForKeys(SortDefinitions sortDefinitions) { var dynList = new List <dynamic>(); List <SortDefinition> srtKeys = sortDefinitions.GetKeys(); for (int i = 0; i < sortDefinitions.GetKeys().Count; i++) { if (srtKeys[i].IsUniqueKey) { dynList.Add(null); } } return(dynList); }
public SqliteSortDefBulkInserter(string connStr, SortDefinitions sortDefinitions, int maxBatchSize = 250000) { sortDefs = sortDefinitions; MaxBatchSize = maxBatchSize; sortKeyDataList = new List <SortKeyData>(); SQLiteConnectionStringBuilder connBldr = new SQLiteConnectionStringBuilder(); connBldr.DataSource = connStr; connBldr.JournalMode = SQLiteJournalModeEnum.Off; connBldr.Version = 3; dbConnection = new SQLiteConnection(connBldr.ConnectionString); dbConnection.Open(); CreateTable(); SetInsertBulkCommand(); }
/// <summary> /// Sorts a fixed width file by defined set of key definitions. /// </summary> /// <param name="sourcefilePath">Full path and file name of file to be sorted</param> /// <param name="sortDefinitions">Define the keys values and sort directions</param> /// <param name="setKeys">Action method to set the key values</param> /// <param name="dataTransportation">Define the data transportation method.</param> /// <param name="dataFilter">Function to filter out a data line (true to include data or false to exclude data)</param> /// <param name="destinationFolder">Folder path where sorted and/or duplicate files will be place. (Uses folder of sourcefilePath when null)</param> /// <param name="hasHeader">Does the file have a header row</param> /// <param name="returnDuplicates">If true duplicates will be written out to file only if isUniqueKey is true in any of the key definitions.</param> /// <param name="progress">A method to report progress</param> /// <param name="maxBatchSize">Control the max insert batch size</param> /// <returns></returns> public static SortResults SortFixedWidthByKeyDefinitions(string sourcefilePath, SortDefinitions sortDefinitions, Action<string, string[]> setKeys, Func<string, bool> dataFilter = null, DataTransportation dataTransportation = null, string destinationFolder = null, bool hasHeader = true, bool returnDuplicates = false, Action<SortProgress> progress = null, int maxBatchSize = 250000) { return SortFixedWidthByKeyDefCore(sourcefilePath: sourcefilePath, sortDefinitions: sortDefinitions, setKeys: setKeys, dataFilter: dataFilter, destinationFolder: destinationFolder, hasHeader: hasHeader, returnDuplicates: returnDuplicates, progress: progress, maxBatchSize: maxBatchSize, dataTransportation: dataTransportation); }
internal static SortResults SortFixedWidthByKeyDefCore(string sourcefilePath, SortDefinitions sortDefinitions, Action<string, string[]> setKeys, Func<string, bool> dataFilter = null, string destinationFolder = null, bool hasHeader = true, bool returnDuplicates = false, Action<SortProgress> progress = null, DataTransportation dataTransportation = null, bool deleteDbConnPath = true, bool writeOutSortFile = true, int maxBatchSize = 250000) { ArgumentValidation.Validate(sourcefilePath, setKeys, destinationFolder); SortVars srtVars = new SortVars(sourcefilePath, destinationFolder); SortResults srtResults = new SortResults(sourcefilePath, srtVars.DestFolder, srtVars.DbConnPath); SortProgress srtProgress = new SortProgress(); try { srtResults.DeleteDuplicatesFile(); int lineCount = 1; using (StreamReader reader = new StreamReader(sourcefilePath)) using (SqliteSortDefBulkInserter sortBulkInserter = new SqliteSortDefBulkInserter(srtVars.DbConnPath, sortDefinitions, maxBatchSize)) { string line; srtVars.Header = GetHeader(hasHeader, reader); srtProgress.InitReading(); while ((line = reader.ReadLine()) != null) { srtResults.IncrementLinesRead(); ReportReadProgress(progress, srtProgress, srtResults.LinesRead); if (dataFilter == null || dataFilter(line)) { string[] keyValues = new string[sortDefinitions.GetKeys().Count]; setKeys(line, keyValues); sortBulkInserter.Add(new SortKeyData { KeyValues = keyValues, Data = (line + Constants.Common.PreserveCharacter).Compress() }); lineCount++; } else { srtResults.IncrementFiltered(); } } sortBulkInserter.InsertAnyLeftOvers(); sortBulkInserter.AddUnUniqueIndex(); } srtProgress.InitWriting(); if (writeOutSortFile) { srtResults.WriteOutSorted(dbConnPath: srtVars.DbConnPath, header: srtVars.Header, sortDefinitions: sortDefinitions, delimiter: Constants.Delimiters.Tab, returnDuplicates: returnDuplicates, dupesFilePath: srtResults.DuplicatesFilePath, compressed: true, progress: (counter) => { srtProgress.Counter = counter; if (progress != null) { progress(srtProgress); } }, dataTransportation: dataTransportation, deleteDb: deleteDbConnPath); } else { srtResults.Header = srtVars.Header; } srtResults.DeleteDuplicatesFileIfNoDuplicates(); } catch (Exception) { CleanUp(srtVars, srtResults); srtProgress = null; throw; } return srtResults; }
internal void WriteOutSorted(string dbConnPath, string header, SortDefinitions sortDefinitions, string delimiter = Constants.Delimiters.Comma, bool returnDuplicates = false, string dupesFilePath = "", bool compressed = false, Action <int> progress = null, DataTransportation dataTransportation = null, bool deleteDb = true) { bool writeSortedFile = WriteoutSortedFile(dataTransportation); if (writeSortedFile) { DeleteSortedFile(); } this.Header = header; StreamWriter dupeWriter = !string.IsNullOrEmpty(dupesFilePath) ? new StreamWriter(dupesFilePath) : null; StreamWriter sw = writeSortedFile ? new StreamWriter(SortedFilePath) : null; using (sw) using (dupeWriter) { if (!string.IsNullOrWhiteSpace(header)) { if (sw != null) { sw.WriteLine(header); } if (returnDuplicates) { WriteHeaderForDuplicatesFile(true, header, dupeWriter); } } using (var cn = new SQLiteConnection(@"Data Source=" + dbConnPath)) { string selectCmd = "SELECT * FROM FileData ORDER BY " + sortDefinitions.BuildOrderClause(); cn.Open(); using (var cmd = new SQLiteCommand(selectCmd, cn)) using (SQLiteDataReader rdr = cmd.ExecuteReader()) { var lastReadKeyList = GetNewDynamicListForKeys(sortDefinitions); while (rdr.Read()) { string sqlLiteData = (string)rdr["LineData"]; string sqlLiteoutLine = SortFileHelpers.UnEscapeByDelimiter(compressed ? sqlLiteData.Decompress() : sqlLiteData, delimiter); if (lastReadKeyList.Count > 0) { var currentReadKeyList = SetNewDynamicListForKeysValues(sortDefinitions, rdr); if (KeysEqual(currentReadKeyList, lastReadKeyList)) { if (returnDuplicates) { dupeWriter.WriteLine(sqlLiteoutLine); this.IncrementDuplicates(); } continue; } lastReadKeyList = currentReadKeyList; } if (sw != null) { sw.WriteLine(sqlLiteoutLine); } IncrementLinesSorted(); ReportProgress(progress, LinesSorted); DoDataTransportPassthrough(dataTransportation, sqlLiteoutLine); } } cn.Close(); } } if (deleteDb) { SortFileHelpers.DeleteFileIfExists(dbConnPath); } }