private static string[] GetMasterFields <T>(MasterDelimitedFileSource <T> master, string data) { string outLine = SortFileHelpers.UnEscapeByDelimiter(data, master.Delimiter); string[] results = null; FileParser.ParseDelimitedString(new StringReader(outLine), (fields, lNum) => { results = fields; }, master.Delimiter); return(results); }
public static MergePurgeResults MergePurge <T>(MasterFixedWidthFileSource <T> master, DelimitedFileSource <T> detail, Action <MergePurgeParam> processData, string destinationFolder = null, DataMode processMode = DataMode.Passive) { ArgumentValidation <T>(master, detail, processData, destinationFolder); MergePurgeResults mgPurgeResults = new MergePurgeResults(); SortVars mstSortVars = new SortVars(master.SourceFilePath, destinationFolder); SortResults srtResults = SortFixedWidth <T>(master, mstSortVars.DestFolder); mgPurgeResults.InitFilePaths(master.SourceFilePath, detail.SourceFilePath, mstSortVars.DestFolder); try { string hdr = string.Empty; using (StreamReader reader = new StreamReader(detail.SourceFilePath)) using (StreamWriter addSw = new StreamWriter(mgPurgeResults.AddsFilePath)) using (StreamWriter delSw = new StreamWriter(mgPurgeResults.DeletesFilePath)) using (StreamWriter updSw = new StreamWriter(mgPurgeResults.UpdatesFilePath)) using (StreamWriter ignSw = new StreamWriter(mgPurgeResults.IgnoredFilePath)) { StreamWriter[] actionWriters = { addSw, delSw, updSw, ignSw }; string line; hdr = GetHeader(detail.HasHeader, reader); WriteHeaderToActionWriters(processMode, detail.HasHeader, hdr, actionWriters); while ((line = reader.ReadLine()) != null) { MergePurgeParam mgPurgeParam = new MergePurgeParam(); FileParser.ParseDelimitedString(new StringReader(line), (fields, lNum) => { mgPurgeParam.DetailFields = fields; mgPurgeParam.DataAction = MergePurgeAction.Ignore; }, detail.Delimiter); FixedWidthSortMurgePurge <T>(srtResults.DbConnPath, line, master, detail, mgPurgeParam, processData, mgPurgeResults, processMode, actionWriters); } } mgPurgeResults.ClearSubFilesIfNoCount(); if (processMode == DataMode.Active) { mgPurgeResults.RemoveSubFilesAndFilePaths(); } srtResults.SortedFilePath = mgPurgeResults.NewMasterFilePath; srtResults.WriteOutSorted(dbConnPath: srtResults.DbConnPath, header: srtResults.Header, sortDir: master.SortDirection, delimiter: Constants.Delimiters.Tab, compressed: true, deleteDb: true); } catch (Exception) { ExceptionCleanUp(srtResults.DbConnPath, mgPurgeResults); throw; } return(mgPurgeResults); }
internal static SortResults SortDelimitedByKeyDefCore( string sourcefilePath, SortDefinitions sortDefinitions, Action<string[], string, string[]> setKeys, Func<string[], string, bool> dataFilter = null, string destinationFolder = null, string delimiter = Constants.Delimiters.Comma, bool hasHeader = true, bool returnDuplicates = false, Action<SortProgress> progress = null, DataTransportation dataTransportation = null, bool deleteDbConnPath = true, bool writeOutSortFile = true, int maxBatchSize = 250000) { ArgumentValidation.Validate(sourcefilePath, setKeys, delimiter, destinationFolder); SortVars srtVars = new SortVars(sourcefilePath, destinationFolder); SortResults srtResults = new SortResults(sourcefilePath, srtVars.DestFolder, srtVars.DbConnPath); SortProgress srtProgress = new SortProgress(); try { srtResults.DeleteDuplicatesFile(); int lineCount = 1; using (StreamReader reader = new StreamReader(sourcefilePath)) using (SqliteSortDefBulkInserter sortBulkInserter = new SqliteSortDefBulkInserter(srtVars.DbConnPath, sortDefinitions, maxBatchSize)) { string line; srtVars.Header = GetHeader(hasHeader, reader); srtProgress.InitReading(); while ((line = reader.ReadLine()) != null) { srtResults.IncrementLinesRead(); ReportReadProgress(progress, srtProgress, srtResults.LinesRead); FileParser.ParseDelimitedString(new StringReader(line), (fields, lNum) => { if (dataFilter == null || dataFilter(fields, line)) { string[] keyValues = new string[sortDefinitions.GetKeys().Count]; setKeys(fields, line, keyValues); sortBulkInserter.Add(new SortKeyData { KeyValues = keyValues, Data = line + SortFileHelpers.EscapeByDelimiter(delimiter) }); lineCount++; } else { srtResults.IncrementFiltered(); } }, delimiter); } sortBulkInserter.InsertAnyLeftOvers(); sortBulkInserter.AddUnUniqueIndex(); } srtProgress.InitWriting(); if (writeOutSortFile) { srtResults.WriteOutSorted(srtVars.DbConnPath, srtVars.Header, sortDefinitions, delimiter, returnDuplicates: returnDuplicates, dupesFilePath: srtResults.DuplicatesFilePath, progress: (counter) => { srtProgress.Counter = counter; if (progress != null) { progress(srtProgress); } }, dataTransportation: dataTransportation, deleteDb: deleteDbConnPath); } else { srtResults.Header = srtVars.Header; } srtResults.DeleteDuplicatesFileIfNoDuplicates(); } catch (Exception) { CleanUp(srtVars, srtResults); srtProgress = null; throw; } return srtResults; }