private void backgroundWorker_SplitIntoChunks(object sender, System.ComponentModel.DoWorkEventArgs e)
        {
            Homer  homer  = (Homer)e.Argument;
            Random random = new Random();

            if (!String.IsNullOrEmpty(homer.randSeedString))
            {
                random = new Random(int.Parse(homer.randSeedString));
            }

            string filenamePadding    = "D" + homer.numberOfSamples.ToString().Length.ToString();
            string quoteString        = homer.GetQuote().ToString();
            string escapedQuoteString = homer.GetQuote().ToString() + homer.GetQuote().ToString();
            int    numCols            = homer.retainedIndices.Count;



            try
            {
                if (homer.HasHeader())
                {
                    using (var fileStreamIn = File.OpenRead(homer.GetInputFile()))
                        using (var streamReader = new StreamReader(fileStreamIn, encoding: homer.GetEncoding()))
                        {
                            var csvDat = CsvParser.ParseHeadAndTail(streamReader, homer.GetDelim(), homer.GetQuote());

                            string[] headerRow;
                            headerRow = csvDat.Item1.ToArray <string>();
                            string headerRowToWriteString = RowCleaner.CleanRow(headerRow, homer.GetDelim(), quoteString, escapedQuoteString, numCols, hoju.retainedIndices);

                            ulong sampleNumber     = 0;
                            ulong rowsWritten      = 0;
                            ulong rowsWrittenTotal = 0;

                            FileStream   fileStreamOut = null;
                            StreamWriter streamWriter  = null;


                            foreach (var line in csvDat.Item2)
                            {
                                //open up a new file to write out
                                if (rowsWritten == 0)
                                {
                                    string filenameOut = Path.Combine(homer.GetOutputLocation(), "subsample" + (sampleNumber + 1).ToString(filenamePadding) + ".csv");
                                    fileStreamOut = new FileStream(filenameOut, FileMode.Create, FileAccess.Write, FileShare.None);
                                    streamWriter  = new StreamWriter(fileStreamOut, homer.GetEncoding());
                                    streamWriter.Write(headerRowToWriteString);
                                }

                                string rowToWriteString = RowCleaner.CleanRow(line.ToArray <string>(), homer.GetDelim(), quoteString, escapedQuoteString, numCols, hoju.retainedIndices);
                                streamWriter.Write(rowToWriteString);

                                rowsWritten++;
                                rowsWrittenTotal++;

                                if (rowsWritten == homer.rowsPerSample)
                                {
                                    rowsWritten = 0;
                                    sampleNumber++;
                                    streamWriter.Close();
                                    streamWriter.Dispose();
                                    fileStreamOut.Close();
                                    fileStreamOut.Dispose();
                                }
                                ;


                                if (rowsWrittenTotal % 1000 == 0)
                                {
                                    int pctDone = (int)Math.Round((((double)rowsWrittenTotal / homer.GetRowCount()) * 10000), 0, MidpointRounding.AwayFromZero);
                                    (sender as BackgroundWorker).ReportProgress(pctDone);

                                    if ((sender as BackgroundWorker).CancellationPending)
                                    {
                                        streamWriter.Close();
                                        streamWriter.Dispose();
                                        fileStreamOut.Close();
                                        fileStreamOut.Dispose();
                                        e.Result = "Cancelled";
                                        break;
                                    }
                                }
                            }



                            //everything has been written, so now we just close up shop
                            try
                            {
                                streamWriter.Close();
                                streamWriter.Dispose();
                                fileStreamOut.Close();
                                fileStreamOut.Dispose();
                            }
                            catch
                            {
                            }
                        }
                }
                else
                {
                    using (var fileStreamIn = File.OpenRead(homer.GetInputFile()))
                        using (var streamReader = new StreamReader(fileStreamIn, encoding: homer.GetEncoding()))
                        {
                            var csvDat = CsvParser.Parse(streamReader, homer.GetDelim(), homer.GetQuote());

                            ulong sampleNumber     = 0;
                            ulong rowsWritten      = 0;
                            ulong rowsWrittenTotal = 0;

                            FileStream   fileStreamOut = null;
                            StreamWriter streamWriter  = null;

                            foreach (var line in csvDat)
                            {
                                //open up a new file to write out
                                if (rowsWritten == 0)
                                {
                                    string filenameOut = Path.Combine(homer.GetOutputLocation(), "subsample" + (sampleNumber + 1).ToString(filenamePadding) + ".csv");
                                    fileStreamOut = new FileStream(filenameOut, FileMode.Create, FileAccess.Write, FileShare.None);
                                    streamWriter  = new StreamWriter(fileStreamOut, homer.GetEncoding());
                                }

                                string rowToWriteString = RowCleaner.CleanRow(line.ToArray <string>(), homer.GetDelim(), quoteString, escapedQuoteString, numCols, hoju.retainedIndices);
                                streamWriter.Write(rowToWriteString);

                                rowsWritten++;
                                rowsWrittenTotal++;

                                if (rowsWritten == homer.rowsPerSample)
                                {
                                    rowsWritten = 0;
                                    sampleNumber++;
                                    streamWriter.Close();
                                    streamWriter.Dispose();
                                    fileStreamOut.Close();
                                    fileStreamOut.Dispose();
                                }
                                ;


                                if (rowsWrittenTotal % 1000 == 0)
                                {
                                    int pctDone = (int)Math.Round((((double)rowsWrittenTotal / homer.GetRowCount()) * 10000), 0, MidpointRounding.AwayFromZero);
                                    (sender as BackgroundWorker).ReportProgress(pctDone);


                                    if ((sender as BackgroundWorker).CancellationPending)
                                    {
                                        streamWriter.Close();
                                        streamWriter.Dispose();
                                        fileStreamOut.Close();
                                        fileStreamOut.Dispose();
                                        e.Result = "Cancelled";
                                        break;
                                    }
                                }
                            }


                            //everything has been written, so now we just close up shop
                            try
                            {
                                streamWriter.Close();
                                streamWriter.Dispose();
                                fileStreamOut.Close();
                                fileStreamOut.Dispose();
                            }
                            catch
                            {
                            }
                        }
                }
            }
            catch
            {
                MessageBox.Show("There was an error in writing your output file(s). This often occurs when your output file is already open in another application.", "D'oh!", MessageBoxButtons.OK, MessageBoxIcon.Error);
                e.Result = "Cancelled";
                return;
            }


            return;
        }
Пример #2
0
        private void backgroundWorker_TargetedSubsampling(object sender, System.ComponentModel.DoWorkEventArgs e)
        {
            Homer homer = (Homer)e.Argument;

            string quoteString        = homer.GetQuote().ToString();
            string escapedQuoteString = homer.GetQuote().ToString() + homer.GetQuote().ToString();
            int    numCols            = homer.retainedIndices.Count;



            #region Get Busy Writin' or Get Busy Dyin'
            try
            {
                using (FileStream fileStreamOut = new FileStream(homer.GetOutputLocation(), FileMode.Create, FileAccess.Write, FileShare.None))
                    using (StreamWriter streamWriter = new StreamWriter(fileStreamOut, homer.GetEncoding()))
                    {
                        if (homer.HasHeader())
                        {
                            string[] headerRow;

                            using (var fileStreamIn = new FileStream(homer.GetInputFile(), FileMode.Open, FileAccess.Read, FileShare.ReadWrite))
                                using (var streamReader = new StreamReader(fileStreamIn, encoding: homer.GetEncoding()))
                                {
                                    var csvDat = CsvParser.ParseHeadAndTail(streamReader, homer.GetDelim(), homer.GetQuote());

                                    headerRow = csvDat.Item1.ToArray <string>();
                                    string rowToWriteString = RowCleaner.CleanRow(headerRow, homer.GetDelim(), quoteString, escapedQuoteString, numCols, hoju.retainedIndices);

                                    //write the header row
                                    streamWriter.Write(rowToWriteString);


                                    ulong rowNumber = 0;

                                    foreach (var line in csvDat.Item2)
                                    {
                                        rowNumber++;

                                        if (rowNumber % 1000 == 0)
                                        {
                                            if ((sender as BackgroundWorker).CancellationPending)
                                            {
                                                e.Result = "Cancelled";
                                                break;
                                            }

                                            //report progress
                                            //MessageBox.Show((((double)sampleNumber / homer.numberOfSamples) * 100).ToString());
                                            int pctDone = (int)Math.Round((((double)rowNumber / homer.endRow) * 10000), 0, MidpointRounding.AwayFromZero);
                                            (sender as BackgroundWorker).ReportProgress(pctDone);
                                        }


                                        if (rowNumber >= homer.startRow && rowNumber <= homer.endRow)
                                        {
                                            rowToWriteString = RowCleaner.CleanRow(line.ToArray <string>(), homer.GetDelim(), quoteString, escapedQuoteString, numCols, hoju.retainedIndices);
                                            streamWriter.Write(rowToWriteString);
                                        }

                                        if (rowNumber == homer.endRow)
                                        {
                                            break;
                                        }
                                    }
                                }
                        }
                        else
                        {
                            using (var fileStreamIn = File.OpenRead(homer.GetInputFile()))
                                using (var streamReader = new StreamReader(fileStreamIn, encoding: homer.GetEncoding()))
                                {
                                    var csvDat = CsvParser.Parse(streamReader, homer.GetDelim(), homer.GetQuote());

                                    ulong  rowNumber = 0;
                                    string rowToWriteString;

                                    foreach (var line in csvDat)
                                    {
                                        rowNumber++;

                                        if (rowNumber % 1000 == 0)
                                        {
                                            if ((sender as BackgroundWorker).CancellationPending)
                                            {
                                                e.Result = "Cancelled";
                                                break;
                                            }

                                            //report progress
                                            //MessageBox.Show((((double)sampleNumber / homer.numberOfSamples) * 100).ToString());
                                            int pctDone = (int)Math.Round((((double)rowNumber / homer.endRow) * 10000), 0, MidpointRounding.AwayFromZero);
                                            (sender as BackgroundWorker).ReportProgress(pctDone);
                                        }


                                        if (rowNumber >= homer.startRow && rowNumber <= homer.endRow)
                                        {
                                            rowToWriteString = RowCleaner.CleanRow(line.ToArray <string>(), homer.GetDelim(), quoteString, escapedQuoteString, numCols, hoju.retainedIndices);
                                            streamWriter.Write(rowToWriteString);
                                        }

                                        if (rowNumber == homer.endRow)
                                        {
                                            break;
                                        }
                                    }
                                }
                        }
                    }
                #endregion
            }
            catch
            {
                MessageBox.Show(genericProcessingError, "D'oh!", MessageBoxButtons.OK, MessageBoxIcon.Error);
                e.Result = "Cancelled";
                return;
            }


            return;
        }
Пример #3
0
        private void backgroundWorker_SubSampleWithoutReplacement(object sender, System.ComponentModel.DoWorkEventArgs e)
        {
            Homer  homer  = (Homer)e.Argument;
            Random random = new Random();

            if (!String.IsNullOrEmpty(homer.randSeedString))
            {
                random = new Random(int.Parse(homer.randSeedString));
            }

            string filenamePadding    = "D" + homer.numberOfSamples.ToString().Length.ToString();
            string quoteString        = homer.GetQuote().ToString();
            string escapedQuoteString = homer.GetQuote().ToString() + homer.GetQuote().ToString();
            int    numCols            = homer.retainedIndices.Count;
            int    pctDone            = 0;


            ulong actualSamplesToBeWritten;

            if (homer.numberOfSamples * homer.rowsPerSample > homer.GetRowCount())
            {
                actualSamplesToBeWritten = (ulong)Math.Round((homer.GetRowCount() / (double)homer.rowsPerSample) * 100, 0, MidpointRounding.AwayFromZero);
            }
            else
            {
                actualSamplesToBeWritten = homer.numberOfSamples;
            }



            HashSet <ulong> rowsToKeep;

            ulong[] rowsToSample = new ulong[homer.GetRowCount()];


            #region Randomize order of sample
            for (ulong i = 0; i < homer.GetRowCount(); i++)
            {
                rowsToSample[i] = i + 1;
            }
            rowsToSample = rowsToSample.OrderBy(x => random.NextLong()).ToArray <ulong>();
            #endregion



            //this is our outermost block within the bgworker: the timer that we use to report progress
            TimeSpan reportPeriod = TimeSpan.FromMinutes(0.01);
            using (new System.Threading.Timer(
                       _ => (sender as BackgroundWorker).ReportProgress(pctDone), null, reportPeriod, reportPeriod))
            {
                for (ulong sampleNumber = 0; sampleNumber < homer.numberOfSamples; sampleNumber++)
                {
                    if ((sender as BackgroundWorker).CancellationPending)
                    {
                        e.Result = "Cancelled";
                        break;
                    }


                    ulong skipToVal = (sampleNumber * homer.rowsPerSample);
                    ulong takeVal   = homer.rowsPerSample;

                    if (skipToVal > homer.GetRowCount())
                    {
                        break;
                    }

                    if (skipToVal + takeVal > (ulong)rowsToSample.Length)
                    {
                        takeVal = (ulong)rowsToSample.Length - skipToVal;
                    }

                    ulong[] subsample = rowsToSample.Skip((int)skipToVal).Take((int)takeVal).ToArray();

                    rowsToKeep = subsample.ToHashSet <ulong>();


                    #region Get Busy Writin' or Get Busy Dyin'

                    ulong rowsWritten = 0;

                    //first we need to open up our output filename
                    string filenameOut;
                    if (String.IsNullOrEmpty(homer.randSeedString))
                    {
                        filenameOut = Path.Combine(homer.GetOutputLocation(), "subsample" + (sampleNumber + 1).ToString(filenamePadding) + ".csv");
                    }
                    else
                    {
                        filenameOut = Path.Combine(homer.GetOutputLocation(), homer.randSeedString + "_subsample" + (sampleNumber + 1).ToString(filenamePadding) + ".csv");
                    }



                    try
                    {
                        using (FileStream fileStreamOut = new FileStream(filenameOut, FileMode.Create, FileAccess.Write, FileShare.None))
                            using (StreamWriter streamWriter = new StreamWriter(fileStreamOut, homer.GetEncoding()))
                            {
                                if (homer.HasHeader())
                                {
                                    string[] headerRow;

                                    using (var fileStreamIn = File.OpenRead(homer.GetInputFile()))
                                        using (var streamReader = new StreamReader(fileStreamIn, encoding: homer.GetEncoding()))
                                        {
                                            var csvDat = CsvParser.ParseHeadAndTail(streamReader, homer.GetDelim(), homer.GetQuote());

                                            headerRow = csvDat.Item1.ToArray <string>();

                                            //write the header row
                                            streamWriter.Write(RowCleaner.CleanRow(headerRow, homer.GetDelim(), quoteString, escapedQuoteString, numCols, hoju.retainedIndices));

                                            ulong  rowNumber = 0;
                                            string rowToWriteString;

                                            foreach (var line in csvDat.Item2)
                                            {
                                                rowNumber++;
                                                //calculate how far long we are
                                                if (rowNumber % 1000 == 0)
                                                {
                                                    pctDone = calcPctDone(rowsWritten, homer.rowsPerSample, sampleNumber, homer.numberOfSamples);
                                                    if ((sender as BackgroundWorker).CancellationPending)
                                                    {
                                                        e.Result = "Cancelled";
                                                        break;
                                                    }
                                                }


                                                if (rowsToKeep.Contains(rowNumber))
                                                {
                                                    rowToWriteString = RowCleaner.CleanRow(line.ToArray <string>(), homer.GetDelim(), quoteString, escapedQuoteString, numCols, hoju.retainedIndices);
                                                    streamWriter.Write(rowToWriteString);

                                                    rowsWritten++;

                                                    if (rowsWritten == homer.rowsPerSample)
                                                    {
                                                        break;
                                                    }
                                                }
                                            }
                                        }
                                }
                                else
                                {
                                    using (var fileStreamIn = File.OpenRead(homer.GetInputFile()))
                                        using (var streamReader = new StreamReader(fileStreamIn, encoding: homer.GetEncoding()))
                                        {
                                            var   csvDat    = CsvParser.Parse(streamReader, homer.GetDelim(), homer.GetQuote());
                                            ulong rowNumber = 0;
                                            foreach (var line in csvDat)
                                            {
                                                rowNumber++;
                                                //calculate how far long we are
                                                if (rowNumber % 1000 == 0)
                                                {
                                                    pctDone = calcPctDone(rowsWritten, homer.rowsPerSample, sampleNumber, homer.numberOfSamples);
                                                    if ((sender as BackgroundWorker).CancellationPending)
                                                    {
                                                        e.Result = "Cancelled";
                                                        break;
                                                    }
                                                }

                                                if (rowsToKeep.Contains(rowNumber))
                                                {
                                                    string rowToWriteString = RowCleaner.CleanRow(line.ToArray <string>(), homer.GetDelim(), quoteString, escapedQuoteString, numCols, hoju.retainedIndices);
                                                    streamWriter.Write(rowToWriteString);

                                                    rowsWritten++;

                                                    if (rowsWritten == homer.rowsPerSample)
                                                    {
                                                        break;
                                                    }
                                                }
                                            }
                                        }
                                }
                            }
                        #endregion
                    }
                    catch
                    {
                        MessageBox.Show(genericProcessingError, "D'oh!", MessageBoxButtons.OK, MessageBoxIcon.Error);
                        e.Result = "Cancelled";
                        return;
                    }
                }
            }



            return;
        }
        private void backgroundWorker_SubSampleWithReplacement(object sender, System.ComponentModel.DoWorkEventArgs e)
        {
            Homer  homer  = (Homer)e.Argument;
            Random random = new Random();

            if (!String.IsNullOrEmpty(homer.randSeedString))
            {
                random = new Random(int.Parse(homer.randSeedString));
            }

            string filenamePadding    = "D" + homer.numberOfSamples.ToString().Length.ToString();
            string quoteString        = homer.GetQuote().ToString();
            string escapedQuoteString = homer.GetQuote().ToString() + homer.GetQuote().ToString();
            int    numCols            = homer.retainedIndices.Count;
            int    pctDone            = 0;


            //this is our outermost block within the bgworker: the timer that we use to report progress
            TimeSpan reportPeriod = TimeSpan.FromMinutes(0.01);

            using (new System.Threading.Timer(
                       _ => (sender as BackgroundWorker).ReportProgress(pctDone), null, reportPeriod, reportPeriod))
            {
                for (ulong sampleNumber = 0; sampleNumber < homer.numberOfSamples; sampleNumber++)
                {
                    //break out of this method if the user cancels from the form
                    if ((sender as BackgroundWorker).CancellationPending)
                    {
                        e.Result = "Cancelled";
                        break;
                    }



                    Dictionary <ulong, int> rowsToSample = new Dictionary <ulong, int>();

                    #region Determine Our Samples Needed


                    ulong rowsSampledCount = 0;

                    while (rowsSampledCount < homer.rowsPerSample)
                    {
                        ulong randomDraw = random.NextLong(1, homer.GetRowCount());

                        if (rowsToSample.ContainsKey(randomDraw))
                        {
                            rowsToSample[randomDraw]++;
                        }
                        else
                        {
                            rowsToSample.Add(randomDraw, 1);
                        }

                        rowsSampledCount++;
                    }

                    #endregion



                    #region Get Busy Writin' or Get Busy Dyin'
                    ulong rowsWritten = 0;

                    //first we need to open up our output filename
                    string filenameOut;
                    if (String.IsNullOrEmpty(homer.randSeedString))
                    {
                        filenameOut = Path.Combine(homer.GetOutputLocation(), "subsample" + (sampleNumber + 1).ToString(filenamePadding) + ".csv");
                    }
                    else
                    {
                        filenameOut = Path.Combine(homer.GetOutputLocation(), homer.randSeedString + "_subsample" + (sampleNumber + 1).ToString(filenamePadding) + ".csv");
                    }


                    try
                    {
                        using (FileStream fileStreamOut = new FileStream(filenameOut, FileMode.Create, FileAccess.Write, FileShare.None))
                            using (StreamWriter streamWriter = new StreamWriter(fileStreamOut, homer.GetEncoding()))
                            {
                                if (homer.HasHeader())
                                {
                                    string[] headerRow;

                                    using (var fileStreamIn = File.OpenRead(homer.GetInputFile()))
                                        using (var streamReader = new StreamReader(fileStreamIn, encoding: homer.GetEncoding()))
                                        {
                                            var csvDat = CsvParser.ParseHeadAndTail(streamReader, homer.GetDelim(), homer.GetQuote());

                                            headerRow = csvDat.Item1.ToArray <string>();
                                            string rowToWriteString = RowCleaner.CleanRow(headerRow, homer.GetDelim(), quoteString, escapedQuoteString, numCols, hoju.retainedIndices);

                                            //write the header row
                                            streamWriter.Write(rowToWriteString);


                                            ulong rowNumber = 0;

                                            foreach (var line in csvDat.Item2)
                                            {
                                                rowNumber++;
                                                //calculate how far long we are
                                                if (rowNumber % 1000 == 0)
                                                {
                                                    pctDone = calcPctDone(rowsWritten, homer.rowsPerSample, sampleNumber, homer.numberOfSamples);
                                                    if ((sender as BackgroundWorker).CancellationPending)
                                                    {
                                                        e.Result = "Cancelled";
                                                        break;
                                                    }
                                                }

                                                if (rowsToSample.ContainsKey(rowNumber))
                                                {
                                                    rowToWriteString = RowCleaner.CleanRow(line.ToArray <string>(), homer.GetDelim(), quoteString, escapedQuoteString, numCols, hoju.retainedIndices);
                                                    for (int numDraws = 0; numDraws < rowsToSample[rowNumber]; numDraws++)
                                                    {
                                                        streamWriter.Write(rowToWriteString);
                                                    }

                                                    rowsWritten += (ulong)rowsToSample[rowNumber];

                                                    if (rowsWritten == homer.rowsPerSample)
                                                    {
                                                        break;
                                                    }
                                                }
                                            }
                                        }
                                }
                                else
                                {
                                    using (var fileStreamIn = File.OpenRead(homer.GetInputFile()))
                                        using (var streamReader = new StreamReader(fileStreamIn, encoding: homer.GetEncoding()))
                                        {
                                            var csvDat = CsvParser.Parse(streamReader, homer.GetDelim(), homer.GetQuote());

                                            ulong rowNumber = 0;

                                            foreach (var line in csvDat)
                                            {
                                                rowNumber++;
                                                //calculate how far long we are
                                                if (rowNumber % 1000 == 0)
                                                {
                                                    pctDone = calcPctDone(rowsWritten, homer.rowsPerSample, sampleNumber, homer.numberOfSamples);
                                                    if ((sender as BackgroundWorker).CancellationPending)
                                                    {
                                                        e.Result = "Cancelled";
                                                        break;
                                                    }
                                                }

                                                if (rowsToSample.ContainsKey(rowNumber))
                                                {
                                                    string rowToWriteString = RowCleaner.CleanRow(line.ToArray <string>(), homer.GetDelim(), quoteString, escapedQuoteString, numCols, hoju.retainedIndices);
                                                    for (int numDraws = 0; numDraws < rowsToSample[rowNumber]; numDraws++)
                                                    {
                                                        streamWriter.Write(rowToWriteString);
                                                    }

                                                    rowsWritten += (ulong)rowsToSample[rowNumber];

                                                    if (rowsWritten == homer.rowsPerSample)
                                                    {
                                                        break;
                                                    }
                                                }
                                            }
                                        }
                                }
                            }
                        #endregion
                    }
                    catch
                    {
                        MessageBox.Show(genericProcessingError, "D'oh!", MessageBoxButtons.OK, MessageBoxIcon.Error);
                        e.Result = "Cancelled";
                        return;
                    }
                }
            }



            return;
        }