Exemplo n.º 1
0
        internal static SortResults SortFixedWidthByKeyDefCore(string sourcefilePath,
                                   SortDefinitions sortDefinitions,
                                   Action<string, string[]> setKeys,
                                   Func<string, bool> dataFilter = null,
                                   string destinationFolder = null,
                                   bool hasHeader = true,
                                   bool returnDuplicates = false,
                                   Action<SortProgress> progress = null,
                                   DataTransportation dataTransportation = null,
                                   bool deleteDbConnPath = true,
                                   bool writeOutSortFile = true,
                                   int maxBatchSize = 250000)
        {
            ArgumentValidation.Validate(sourcefilePath, setKeys, destinationFolder);
            SortVars srtVars = new SortVars(sourcefilePath, destinationFolder);
            SortResults srtResults = new SortResults(sourcefilePath, srtVars.DestFolder, srtVars.DbConnPath);
            SortProgress srtProgress = new SortProgress();
            try
            {
                srtResults.DeleteDuplicatesFile();
                int lineCount = 1;
                using (StreamReader reader = new StreamReader(sourcefilePath))
                using (SqliteSortDefBulkInserter sortBulkInserter = new SqliteSortDefBulkInserter(srtVars.DbConnPath, sortDefinitions, maxBatchSize))
                {
                    string line;
                    srtVars.Header = GetHeader(hasHeader, reader);
                    srtProgress.InitReading();
                    while ((line = reader.ReadLine()) != null)
                    {
                        srtResults.IncrementLinesRead();
                        ReportReadProgress(progress, srtProgress, srtResults.LinesRead);
                        if (dataFilter == null || dataFilter(line))
                        {

                            string[] keyValues = new string[sortDefinitions.GetKeys().Count];
                            setKeys(line, keyValues);
                            sortBulkInserter.Add(new SortKeyData { KeyValues = keyValues, Data = (line + Constants.Common.PreserveCharacter).Compress() });
                            lineCount++;
                        }
                        else
                        {
                            srtResults.IncrementFiltered();
                        }
                    }
                    sortBulkInserter.InsertAnyLeftOvers();
                    sortBulkInserter.AddUnUniqueIndex();
                }
                srtProgress.InitWriting();
                if (writeOutSortFile)
                {

                    srtResults.WriteOutSorted(dbConnPath: srtVars.DbConnPath,
                                              header: srtVars.Header,
                                              sortDefinitions: sortDefinitions,
                                              delimiter: Constants.Delimiters.Tab,
                                              returnDuplicates: returnDuplicates,
                                              dupesFilePath: srtResults.DuplicatesFilePath,
                                              compressed: true,
                                              progress: (counter) => { srtProgress.Counter = counter; if (progress != null) { progress(srtProgress); } },
                                              dataTransportation: dataTransportation,
                                              deleteDb: deleteDbConnPath);
                }
                else
                {
                    srtResults.Header = srtVars.Header;
                }

                srtResults.DeleteDuplicatesFileIfNoDuplicates();
            }
            catch (Exception)
            {
                CleanUp(srtVars, srtResults);
                srtProgress = null;
                throw;
            }
            return srtResults;
        }
Exemplo n.º 2
0
        /// <summary>
        /// Sorts a delimited file given a numeric or string key.
        /// </summary>
        /// <param name="sourcefilePath">Full path and file name of file to be sorted</param>
        /// <param name="getKey">Function to construct the key</param>
        /// <param name="dataFilter">Function to filter out a data line (true to include data or false to exclude data)</param>
        /// <param name="destinationFolder">Folder path where sorted and/or duplicate files will be place. (Uses folder of sourcefilePath when null)</param>
        /// <param name="delimiter">Character delimiter</param>
        /// <param name="hasHeader">Does the file have a header row</param>
        /// <param name="isUniqueKey">If true duplicates will not be included in the sorted file.</param>
        /// <param name="returnDuplicates">If true duplicates will be written out to file only if isUniqueKey is true.</param>
        /// <param name="sortDir">The sort direction of the key.</param>
        /// <param name="progress">A method to report progress</param>
        /// <param name="dataTransportation">Define the data transportation method.</param>
        /// <param name="maxBatchSize">Control the max insert batch size</param>
        internal static SortResults SortDelimitedByKeyCore<T>(
                                   string sourcefilePath,
                                   Func<string[], string, T> getKey,
                                   Func<string[], string, bool> dataFilter = null,
                                   string destinationFolder = null,
                                   string delimiter = Constants.Delimiters.Comma,
                                   bool hasHeader = true,
                                   bool isUniqueKey = false,
                                   bool returnDuplicates = false,
                                   SortDirection sortDir = SortDirection.Ascending,
                                   Action<SortProgress> progress = null,
                                   DataTransportation dataTransportation = null,
                                   bool deleteDbConnPath = true,
                                   bool writeOutSortFile = true,
                                   int maxBatchSize = 250000)
        {
            ArgumentValidation.Validate<T>(sourcefilePath, getKey,  delimiter,  destinationFolder, maxBatchSize);
            SortVars srtVars = new SortVars(sourcefilePath, destinationFolder);
            SortResults srtResults = new SortResults(sourcefilePath, srtVars.DestFolder, srtVars.DbConnPath);
            SortProgress srtProgress = new SortProgress();
            try
            {
                srtResults.DeleteDuplicatesFile();
                int lineCount = 1;
                using (StreamReader reader = new StreamReader(sourcefilePath))
                using (SqliteSortKeyBulkInserter<T> sortBulkInserter = new SqliteSortKeyBulkInserter<T>(srtVars.DbConnPath, uniqueKey: isUniqueKey, maxBatchSize: maxBatchSize))
                {
                    string line;
                    srtVars.Header = GetHeader(hasHeader, reader);
                    srtProgress.InitReading();
                    while ((line = reader.ReadLine()) != null)
                    {
                        srtResults.IncrementLinesRead();
                        ReportReadProgress(progress, srtProgress, srtResults.LinesRead);
                        FileParser.ParseDelimitedString(new StringReader(line), (fields, lNum) =>
                        {
                            if (dataFilter == null || dataFilter(fields, line))
                            {
                                T key = getKey(fields, line);
                                sortBulkInserter.Add(key, line + SortFileHelpers.EscapeByDelimiter(delimiter));
                                lineCount++;
                            }
                            else
                            {
                                srtResults.IncrementFiltered();
                            }
                        }, delimiter);
                    }
                    sortBulkInserter.InsertAnyLeftOvers();
                    sortBulkInserter.AddUnUniqueIndex();
                }
                srtProgress.InitWriting();

                if (writeOutSortFile)
                {
                    srtResults.WriteOutSorted(srtVars.DbConnPath, srtVars.Header, sortDir, delimiter, hasUniqueIndex: isUniqueKey, returnDuplicates: returnDuplicates, dupesFilePath: srtResults.DuplicatesFilePath, progress: (counter) => { srtProgress.Counter = counter; if (progress != null) { progress(srtProgress); } }, dataTransportation: dataTransportation, deleteDb: deleteDbConnPath);
                }
                else
                {
                    srtResults.Header = srtVars.Header;
                }


                srtResults.DeleteDuplicatesFileIfNoDuplicates();
            }
            catch (Exception)
            {
                CleanUp(srtVars, srtResults);
                srtProgress = null;
                throw;
            }
            return srtResults;
        }