예제 #1
0
        private List <dynamic> GetNewDynamicListForKeys(SortDefinitions sortDefinitions)
        {
            var dynList = new List <dynamic>();
            List <SortDefinition> srtKeys = sortDefinitions.GetKeys();

            for (int i = 0; i < sortDefinitions.GetKeys().Count; i++)
            {
                if (srtKeys[i].IsUniqueKey)
                {
                    dynList.Add(null);
                }
            }
            return(dynList);
        }
        public SqliteSortDefBulkInserter(string connStr, SortDefinitions sortDefinitions, int maxBatchSize = 250000)
        {
            sortDefs        = sortDefinitions;
            MaxBatchSize    = maxBatchSize;
            sortKeyDataList = new List <SortKeyData>();
            SQLiteConnectionStringBuilder connBldr = new SQLiteConnectionStringBuilder();

            connBldr.DataSource  = connStr;
            connBldr.JournalMode = SQLiteJournalModeEnum.Off;
            connBldr.Version     = 3;
            dbConnection         = new SQLiteConnection(connBldr.ConnectionString);
            dbConnection.Open();
            CreateTable();
            SetInsertBulkCommand();
        }
예제 #3
0
        /// <summary>
        /// Sorts a fixed width file by defined set of key definitions.
        /// </summary>
        /// <param name="sourcefilePath">Full path and file name of file to be sorted</param>
        /// <param name="sortDefinitions">Define the keys values and sort directions</param>
        /// <param name="setKeys">Action method to set the key values</param>
        /// <param name="dataTransportation">Define the data transportation method.</param>
        /// <param name="dataFilter">Function to filter out a data line (true to include data or false to exclude data)</param>
        /// <param name="destinationFolder">Folder path where sorted and/or duplicate files will be place. (Uses folder of sourcefilePath when null)</param>
        /// <param name="hasHeader">Does the file have a header row</param>
        /// <param name="returnDuplicates">If true duplicates will be written out to file only if isUniqueKey is true in any of the key definitions.</param>
        /// <param name="progress">A method to report progress</param>
        /// <param name="maxBatchSize">Control the max insert batch size</param>
        /// <returns></returns>
        public static SortResults SortFixedWidthByKeyDefinitions(string sourcefilePath,
                                   SortDefinitions sortDefinitions,
                                   Action<string, string[]> setKeys,
                                   Func<string, bool> dataFilter = null,
                                   DataTransportation dataTransportation = null,
                                   string destinationFolder = null,
                                   bool hasHeader = true,
                                   bool returnDuplicates = false,
                                   Action<SortProgress> progress = null,
                                    int maxBatchSize = 250000)
        {

            return SortFixedWidthByKeyDefCore(sourcefilePath: sourcefilePath,
                                   sortDefinitions: sortDefinitions,
                                   setKeys: setKeys,
                                   dataFilter: dataFilter,
                                   destinationFolder: destinationFolder,
                                   hasHeader: hasHeader,
                                   returnDuplicates: returnDuplicates,
                                   progress: progress,
                                   maxBatchSize: maxBatchSize,
                                   dataTransportation: dataTransportation);
        }
예제 #4
0
        internal static SortResults SortFixedWidthByKeyDefCore(string sourcefilePath,
                                   SortDefinitions sortDefinitions,
                                   Action<string, string[]> setKeys,
                                   Func<string, bool> dataFilter = null,
                                   string destinationFolder = null,
                                   bool hasHeader = true,
                                   bool returnDuplicates = false,
                                   Action<SortProgress> progress = null,
                                   DataTransportation dataTransportation = null,
                                   bool deleteDbConnPath = true,
                                   bool writeOutSortFile = true,
                                   int maxBatchSize = 250000)
        {
            ArgumentValidation.Validate(sourcefilePath, setKeys, destinationFolder);
            SortVars srtVars = new SortVars(sourcefilePath, destinationFolder);
            SortResults srtResults = new SortResults(sourcefilePath, srtVars.DestFolder, srtVars.DbConnPath);
            SortProgress srtProgress = new SortProgress();
            try
            {
                srtResults.DeleteDuplicatesFile();
                int lineCount = 1;
                using (StreamReader reader = new StreamReader(sourcefilePath))
                using (SqliteSortDefBulkInserter sortBulkInserter = new SqliteSortDefBulkInserter(srtVars.DbConnPath, sortDefinitions, maxBatchSize))
                {
                    string line;
                    srtVars.Header = GetHeader(hasHeader, reader);
                    srtProgress.InitReading();
                    while ((line = reader.ReadLine()) != null)
                    {
                        srtResults.IncrementLinesRead();
                        ReportReadProgress(progress, srtProgress, srtResults.LinesRead);
                        if (dataFilter == null || dataFilter(line))
                        {

                            string[] keyValues = new string[sortDefinitions.GetKeys().Count];
                            setKeys(line, keyValues);
                            sortBulkInserter.Add(new SortKeyData { KeyValues = keyValues, Data = (line + Constants.Common.PreserveCharacter).Compress() });
                            lineCount++;
                        }
                        else
                        {
                            srtResults.IncrementFiltered();
                        }
                    }
                    sortBulkInserter.InsertAnyLeftOvers();
                    sortBulkInserter.AddUnUniqueIndex();
                }
                srtProgress.InitWriting();
                if (writeOutSortFile)
                {

                    srtResults.WriteOutSorted(dbConnPath: srtVars.DbConnPath,
                                              header: srtVars.Header,
                                              sortDefinitions: sortDefinitions,
                                              delimiter: Constants.Delimiters.Tab,
                                              returnDuplicates: returnDuplicates,
                                              dupesFilePath: srtResults.DuplicatesFilePath,
                                              compressed: true,
                                              progress: (counter) => { srtProgress.Counter = counter; if (progress != null) { progress(srtProgress); } },
                                              dataTransportation: dataTransportation,
                                              deleteDb: deleteDbConnPath);
                }
                else
                {
                    srtResults.Header = srtVars.Header;
                }

                srtResults.DeleteDuplicatesFileIfNoDuplicates();
            }
            catch (Exception)
            {
                CleanUp(srtVars, srtResults);
                srtProgress = null;
                throw;
            }
            return srtResults;
        }
예제 #5
0
        internal void WriteOutSorted(string dbConnPath,
                                     string header,
                                     SortDefinitions sortDefinitions,
                                     string delimiter      = Constants.Delimiters.Comma,
                                     bool returnDuplicates = false,
                                     string dupesFilePath  = "",
                                     bool compressed       = false,
                                     Action <int> progress = null,
                                     DataTransportation dataTransportation = null,
                                     bool deleteDb = true)
        {
            bool writeSortedFile = WriteoutSortedFile(dataTransportation);

            if (writeSortedFile)
            {
                DeleteSortedFile();
            }

            this.Header = header;

            StreamWriter dupeWriter = !string.IsNullOrEmpty(dupesFilePath) ? new StreamWriter(dupesFilePath) : null;
            StreamWriter sw         = writeSortedFile ? new StreamWriter(SortedFilePath) : null;

            using (sw)
                using (dupeWriter)
                {
                    if (!string.IsNullOrWhiteSpace(header))
                    {
                        if (sw != null)
                        {
                            sw.WriteLine(header);
                        }

                        if (returnDuplicates)
                        {
                            WriteHeaderForDuplicatesFile(true, header, dupeWriter);
                        }
                    }

                    using (var cn = new SQLiteConnection(@"Data Source=" + dbConnPath))
                    {
                        string selectCmd = "SELECT * FROM FileData ORDER BY " + sortDefinitions.BuildOrderClause();
                        cn.Open();
                        using (var cmd = new SQLiteCommand(selectCmd, cn))
                            using (SQLiteDataReader rdr = cmd.ExecuteReader())
                            {
                                var lastReadKeyList = GetNewDynamicListForKeys(sortDefinitions);
                                while (rdr.Read())
                                {
                                    string sqlLiteData    = (string)rdr["LineData"];
                                    string sqlLiteoutLine = SortFileHelpers.UnEscapeByDelimiter(compressed ? sqlLiteData.Decompress() : sqlLiteData, delimiter);
                                    if (lastReadKeyList.Count > 0)
                                    {
                                        var currentReadKeyList = SetNewDynamicListForKeysValues(sortDefinitions, rdr);
                                        if (KeysEqual(currentReadKeyList, lastReadKeyList))
                                        {
                                            if (returnDuplicates)
                                            {
                                                dupeWriter.WriteLine(sqlLiteoutLine);
                                                this.IncrementDuplicates();
                                            }
                                            continue;
                                        }
                                        lastReadKeyList = currentReadKeyList;
                                    }
                                    if (sw != null)
                                    {
                                        sw.WriteLine(sqlLiteoutLine);
                                    }

                                    IncrementLinesSorted();
                                    ReportProgress(progress, LinesSorted);
                                    DoDataTransportPassthrough(dataTransportation, sqlLiteoutLine);
                                }
                            }
                        cn.Close();
                    }
                }
            if (deleteDb)
            {
                SortFileHelpers.DeleteFileIfExists(dbConnPath);
            }
        }