Esempio n. 1
0
        static void ProcessFile(FileInfo f, Stream s, string fname)
        {
#if DEBUG
            files++;
            Console.WriteLine("Processing (" + files.ToString() + "): " + (f == null ? fname : f.Name));
            InputFile inpfile;                                                                              // to aid debugging - do not use the thread pool (Task) and do files one by one
            if (f != null)
            {
                inpfile = new InputFile(f);
            }
            else
            {
                inpfile = new InputFile(s, fname);
            }
            fileprocstats fps = inpfile.Process();
            globals.totallinesread    += fps.linesread;
            globals.totallineswritten += fps.lineswritten;
#else
            int i = 0;

            while (true)
            {
                if (tasks[i] != null)                                                                       // check if task is complete
                {
                    if (tasks[i].IsCanceled || tasks[i].IsCompleted || tasks[i].IsFaulted)                  // did we get an error?
                    {
                        ProcessTaskStatus(tasks[i]);
                        tasks[i].Dispose();
                        tasks[i] = null;                                                                    // task is done, remove from array
                    }
                }
                if (tasks[i] == null)                                                                       // open task slot
                {
                    files++;
                    Console.WriteLine("Processing (" + files.ToString() + "): " + (f == null ? fname : f.Name));
                    tasks[i] = Task.Run <fileprocstats>(() => { InputFile inpfile;
                                                                if (f != null)
                                                                {
                                                                    inpfile = new InputFile(f);
                                                                }
                                                                else
                                                                {
                                                                    inpfile = new InputFile(s, fname);
                                                                }
                                                                return(inpfile.Process()); });
                    break;
                }
                if (++i >= globals.threads)
                {
                    i = 0;
                    Thread.Sleep(1000);
                }
            }
#endif
        }
Esempio n. 2
0
 static void ProcessTaskStatus(Task <fileprocstats> t)
 {
     if (t.IsFaulted)
     {
         foreach (var e in t.Exception.InnerExceptions)
         {
             Console.WriteLine("Import Task had an exception: " + e.Message);
         }
     }
     else if (t.IsCompleted)                                                             // if task completed successfully
     {
         fileprocstats fps = t.Result;
         globals.totallinesread    += fps.linesread;
         globals.totallineswritten += fps.lineswritten;
     }
 }
Esempio n. 3
0
        public fileprocstats Process()
        {
            int           ind, charrd;
            StringBuilder colval = new StringBuilder(8000);                                         // string buffer to build column value
            bool          instr = false, escape = false, errorinrow = false;
            char          prevchnewline = ' ';
            Task          bctask        = null;
            Task <int>    rdtask        = null;
            GZipStream    gzfs          = null;
            StreamReader  sr            = null;

            char[] buffer;

            try
            {
                IOBuffer iobuffer = new IOBuffer();                                                 // allocate buffers for file IO
                dt = globals.dt.Clone();                                                            // set up a DataTable with the table schema
                dr = dt.NewRow();                                                                   // get a new row with the schema structure
                bc = new SqlBulkCopy(globals.constr, SqlBulkCopyOptions.TableLock | SqlBulkCopyOptions.KeepNulls | SqlBulkCopyOptions.KeepIdentity)
                {
                    DestinationTableName = "[" + globals.schema + "].[" + globals.tablename + "]"
                };
                bc.BulkCopyTimeout = 0;                                                             // never time out

                if (fi == null)                                                                     // file from Azure Blob Storage or ADL?
                {
                    if (gzfile)                                                                     // is the file a gzip?
                    {
                        gzfs = new GZipStream(str, CompressionMode.Decompress);                     // pull input stream through decompression stream
                        sr   = new StreamReader(gzfs, true);                                        // stream through the stream reader we want
                    }
                    else
                    {
                        sr = new StreamReader(str, true);                                           // not gzip, so just need a StreamReader
                    }
                }
                else
                {                                                                                   // local filesystem file
                    if (gzfile)                                                                     // is the file a gzip?
                    {
                        str  = fi.OpenRead();                                                       // open stream from FileInfo
                        gzfs = new GZipStream(str, CompressionMode.Decompress);                     // decompression stream
                        sr   = new StreamReader(gzfs, true);                                        // finally the stream reader we want
                    }
                    else
                    {
                        sr = new StreamReader(fi.FullName, true);                                   // not gzip, so just need a StreamReader
                    }
                }

                while (true)                                                                        // main loop
                {
                    if (rdtask == null)                                                             // do we have a previous read task?
                    {
                        charrd = sr.ReadBlock(iobuffer.CurrentBuffer(), 0, globals.buflen);         // the first time we need to synchronously read
                        if (charrd == 0)                                                            // if we get nothing on the first read, this file must be empty
                        {
                            Console.WriteLine("File " + filename + " is empty.");                   // put out a warning and move on
                            break;
                        }
                    }
                    else
                    {
                        rdtask.Wait();                                                              // wait for any previous buffer read to complete
                        if (!rdtask.IsFaulted)                                                      // did the task have a fault (we assume it either completed or faulted, since we don't cancel tasks)
                        {
                            charrd = rdtask.Result;                                                 // grab the characters read from the completed async read block
                            iobuffer.Switch();                                                      // flip to the other buffer
                        }
                        else
                        {
                            foreach (var e in rdtask.Exception.InnerExceptions)                     // process any task exceptions
                            {
                                Console.WriteLine("Exception on read task of " + filename + ": " + e.Message);
                            }
                            break;
                        }
                        if (charrd == 0)
                        {
                            break;                                                                  // read and processed the whole file - so blow out of the outer while loop
                        }
                    }
                    rdtask = sr.ReadBlockAsync(iobuffer.OtherBuffer(), 0, globals.buflen);          // read the next part of the file into the other buffer while we are processing the current one
                    buffer = iobuffer.CurrentBuffer();                                              // simplify access to current character buffer for code below
                    for (ind = 0; ind < charrd; ind++)                                              // for each character in the current buffer;
                    {
                        if (buffer[ind] == '\\')                                                    // treat a backslash as an escape character
                        {
                            if (escape)                                                             // double escape is not required, so leave alone as two backslash characters
                            {
                                colval.Append("\\\\");                                              // append 2 backslashes
                                escape = false;                                                     // clear escape flag
                            }
                            else
                            {
                                escape = true;                                                      // check for escaped string delimiter or escaped column separator
                            }
                            prevchnewline = ' ';                                                    // have a non newline character, so clear flag
                        }
                        else if (globals.usestrdelim && buffer[ind] == globals.delim)               // using string delimiter and this character is one (otherwise fall through and just append delimiter)
                        {
                            if (escape)                                                             // previous character an "escape"?
                            {
                                colval.Append(globals.delim);                                       // add just the string delimiter (remove escape) to the string value
                                escape = false;                                                     // turn off flag
                            }
                            else
                            {
                                if (instr)
                                {
                                    instr = false;                                                  // end of delimited column
                                }
                                else
                                {
                                    instr = true;                                                   // in a delimited column
                                }
                            }
                            prevchnewline = ' ';                                                    // have a non newline character, so clear flag
                        }
                        else if (buffer[ind] == globals.colsep)                                     // character is a column separator character
                        {
                            if (escape || (globals.usestrdelim && instr))                           // if not using string delimiter then a column separator in a column value needs to be escaped to be part of the value
                            {
                                colval.Append(globals.colsep);                                      // append just the column separator character - removing the escape
                                escape = false;                                                     // turn off flag
                            }
                            else
                            {
                                if (colval.Length != 0 && globals.Columns[curcol].stringtype)                 // if we have a column value (vs. an empty string, i.e. null) and it is a character type column
                                {                                                                             // check if user wants any CR/LF substitution
                                    if (globals.CRsub != 0 && colval.ToString().ToUpper() != globals.nullstr) // no point in checking null values
                                    {
                                        colval.Replace(globals.CRsub, '\r');                                  // replace in column carriage return substitution characters with an actual CR
                                    }
                                    if (globals.LFsub != 0 && colval.ToString().ToUpper() != globals.nullstr)
                                    {
                                        colval.Replace(globals.LFsub, '\n');                        // replace in column line feed substitution characters with an actual LF
                                    }
                                }
                                if (CheckColumnValue(ref colval))                                             // at the end of a column - process this column value by checking the value and saving it
                                {
                                    if (colval.Length == 0 || colval.ToString().ToUpper() == globals.nullstr) // zero length columns are treated as nulls and user can specify a string to treat as null
                                    {
                                        dr.SetField <string>(curcol, null);                                   // set null value in DataRow for this column
                                    }
                                    else
                                    {
                                        dr.SetField <string>(curcol, colval.ToString());             // set the current or corrected value for this column in the DataRow
                                    }
                                }
                                else
                                {
                                    errorinrow = true;                                              // if the check of the value failed - set the error flag
                                }
                                colval.Clear();                                                     // set up for next column value
                                if (++curcol >= dt.Columns.Count)                                   // increment to the next column in the DataRow
                                {
                                    Console.WriteLine("File: " + filename + " Too many columns: Line: " + filelines.ToString() + " Row: " + (lines + 1).ToString());
                                }
                            }
                            prevchnewline = ' ';                                                    // have a non newline character, so clear flag
                        }
                        else if (buffer[ind] == '\n' || buffer[ind] == '\r')                        // have a newline character (either a carriage return or line feed)
                        {
                            if (prevchnewline == ' ' || buffer[ind] == prevchnewline)               // was last character a newline that we counted and not the same as the last one (i.e. 2 LFs = 2 lines)
                            {
                                filelines++;                                                        // increment actual line in file for debugging input values
                                prevchnewline = buffer[ind];                                        // save this character to compare to the next one
                            }
                            else
                            {
                                prevchnewline = ' ';                                                // we only want to increment line once for CR+LF or LF+CR sequences (since the are treated as a single line end)
                            }
                            if (escape || (globals.usestrdelim && instr))                           // if string delimiter turned off, new line characters in column values need to be escaped
                            {
                                colval.Append(buffer[ind]);                                         // append newline character to column value
                                escape = false;
                            }
                            else if (colval.Length == 0 && curcol == 0)
                            {
                                continue;                                                           // ignore blank lines (often at end of file) or second newline character
                            }
                            else
                            {                                                                       // otherwise we have the end of the line, so need to process the last column and the row
                                if (colval.Length != 0)                                             // if we have a column value (vs. an empty string, i.e. null)
                                {                                                                   // check if user wants any CR/LF substitution
                                    if (globals.CRsub != 0)
                                    {
                                        colval.Replace(globals.CRsub, '\r');                        // replace in column carriage return substitution characters with an actual CR
                                    }
                                    if (globals.LFsub != 0)
                                    {
                                        colval.Replace(globals.LFsub, '\n');                        // replace in column line feed substitution characters with an actual LF
                                    }
                                }
                                if (curcol >= dt.Columns.Count)                                     // too many columns?
                                {
                                    Console.WriteLine("File: " + filename + " Too many columns (" + curcol.ToString() + "): Line: " + filelines.ToString() + " Row: " + (lines + 1).ToString());
                                }
                                else
                                {
                                    if (curcol != dt.Columns.Count - 1)                             // warn on not enough columns
                                    {
                                        Console.WriteLine("File: " + filename + " Too few columns (" + curcol.ToString() + "): Line: " + filelines.ToString() + " Row: " + (lines + 1).ToString());
                                    }
                                    if (CheckColumnValue(ref colval))                                             // at the end of a column - process this column value by checking the value and saving it
                                    {
                                        if (colval.Length == 0 || colval.ToString().ToUpper() == globals.nullstr) // zero length columns are treated as nulls as well as user specified string
                                        {
                                            dr.SetField <string>(curcol, null);                                   // set null value in DataRow for this column
                                        }
                                        else
                                        {
                                            dr.SetField <string>(curcol, colval.ToString());         // set the current or corrected value for this column in the DataRow
                                        }
                                    }
                                    else
                                    {
                                        errorinrow = true;                                          // if the check of the value failed - set the error flag
                                    }
                                    if (!errorinrow)                                                // don't save the row if error (or we couldn't fix it transparently) and user has selected to ignore errors
                                    {
                                        dt.Rows.Add(dr);                                            // add row to table
                                        lineswritten++;                                             // increment output line count
                                    }
                                }
                                colval.Clear();                                                     // set up for next row
                                errorinrow = false;                                                 // clear error flag for row
                                curcol     = 0;                                                     // start on new line
                                lines++;                                                            // increment "line" count in the input file - i.e. the input row count
                                if (--rowsperbcwrite == 0)                                          // decrement rows per SQL write counter until we hit zero
                                {
                                    if (bctask != null)
                                    {
                                        bctask.Wait();                                              // did we have a previous async SQL bulk copy transfer? If so, wait for completion...
                                    }
                                    olddt          = dt;                                            // save reference to current data table
                                    bctask         = DoBulkCopy(olddt);                             // asynchronously bulk write table to SQL table
                                    dt             = globals.dt.Clone();                            // clone table metadata only to reset DataTable
                                    rowsperbcwrite = globals.rowsperbcwrite;                        // reset count down counter for next SQL write
                                }
                                dr = dt.NewRow();                                                   // get a new row to play with in the new DataTable
                            }
                        }
                        else                                                                        // processing a "normal" character
                        {
                            if (escape)                                                             // was previous character an "escape"?
                            {
                                colval.Append('\\');                                                // was not an escape, just a backslash, so just append the character to the column value
                                escape = false;                                                     // turn off flag
                            }
                            colval.Append(buffer[ind]);                                             // append current character to column value
                            prevchnewline = ' ';                                                    // have a non newline character, so clear flag
                        }
                    }
                }
                if (curcol != 0 || colval.Length != 0)                                              // incomplete record at end of file?
                {
                    Console.WriteLine("File: " + filename + " Too few columns (" + curcol.ToString() + "): Line: " + filelines.ToString() + " Row: " + (lines + 1).ToString() + " Incomplete value for Column Name: " + globals.Columns[curcol].Name + " - ROW NOT SAVED");
                }
                if (bctask != null)                                                                 // did we have a previous async SQL buk copy transfer?
                {
                    bctask.Wait();                                                                  // wait for previous bulk copy to finish
                    olddt.Clear();                                                                  // clear data table we were writing
                    olddt.Dispose();
                }
                bc.WriteToServer(dt);                                                               // bulk write datatable to SQL Server
                bc.Close();
                dt.Clear();                                                                         // clean up table
            }
            catch (Exception e)
            {
                Console.WriteLine("Exception: " + e.Message + "\nFile: " + filename + " Line: " + filelines.ToString() + " Row: " + (lines + 1).ToString() + " Column: " + globals.Columns[curcol].Name + " Type: " + globals.Columns[curcol].Type + " Value Length: " + colval.Length.ToString() + " Column Length: " + globals.Columns[curcol].Length.ToString() + " Value: " + colval.ToString());
            }

            sr.Close();                                                                             // close stream reader
            if (gzfs != null)
            {
                gzfs.Close();                                                                       // close gz stream decompressor, if we
            }
            if (str != null)
            {
                str.Close();                                                                        // close the
            }
            fileprocstats ret = new fileprocstats();                                                // return multiple values from the worker thread using a simple class structure

            ret.linesread    = lines;
            ret.lineswritten = lineswritten;
            return(ret);
        }