Example #1
0
        private void cmdUnion_Click(object sender, EventArgs e)
        {
            int N = 3; //Levenshtein Comparator Threshold
            LevenshteinDistanceStringComparer lev_comparator = new LevenshteinDistanceStringComparer(N);

            this.Cursor = Cursors.WaitCursor;

            if (lstHeaders.SelectedIndex == -1)
            {
                MessageBox.Show("You must select a 'Shared Header' to perform the intersection on.");
                return;
            }

            string match_header        = (string)lstHeaders.SelectedItem;
            List <FileStructure> files = listBox1.Items.Cast <FileStructure>().ToList();
            //List<string> headers = FileOperations.ReadHeadersCSV(files[0].FilePath);
            List <string> intersects = new List <string>();

            bool first_pass = true;

            foreach (FileStructure f_struct in files)
            {
                #region Retrieve data in column that matches header selected to intersect against

                //Get Index Match

                int index_header = 0;
                for (int i = 0; i < f_struct.Headers.Count; i++)
                {
                    FieldHeaderComparer comparer = new FieldHeaderComparer();

                    if (comparer.Equals(f_struct.Headers[i], match_header))
                    {
                        index_header = i;
                        break;
                    }
                }

                //Get Column Data From Matching Header
                List <string[]> entries     = FileOperations.ReadCSV(f_struct.FilePath);
                List <string>   data_column = new List <string>();
                foreach (string[] entry in entries)
                {
                    data_column.Add(entry[index_header].Trim());
                }

                #endregion

                #region Perform Intersect Operation

                if (!first_pass)
                {
                    //1 -> 256 Matches
                    //2 -> 253 Matches
                    //3 -> 242 Matches
                    //4 -> 223 Matches
                    //5 -> 208 Matches
                    //6 -> 195 Matches
                    intersects = intersects.Intersect(data_column, lev_comparator).ToList();
                }
                else
                {
                    intersects = data_column;
                    first_pass = false;
                }

                #endregion
            }

            //Find Largest DB File

            FileStructure output_db = files.OrderByDescending(x => x.Filesize).First();

            //Match Intersects with rows in file
            List <string[]> input_entries  = FileOperations.ReadCSV(output_db.FilePath);
            List <string[]> output_matches = new List <string[]>();

            int output_index_header = 0;
            for (int i = 0; i < output_db.Headers.Count; i++)
            {
                FieldHeaderComparer comparer = new FieldHeaderComparer();

                if (comparer.Equals(output_db.Headers[i], match_header))
                {
                    output_index_header = i;
                    break;
                }
            }

            foreach (string[] entry in input_entries)
            {
                bool is_match = false;
                foreach (string intersect_match in intersects)
                {
                    if (lev_comparator.Equals(entry[output_index_header], intersect_match))
                    {
                        is_match = true;
                        break;
                    }
                }

                if (is_match)
                {
                    output_matches.Add(entry);
                }
            }

            //Eliminate Empties and Duplicates

            List <string[]> output_matches_new = new List <string[]>();
            foreach (string[] entry in output_matches)
            {
                if (string.IsNullOrWhiteSpace(entry[output_index_header]))
                {
                    continue;
                }

                bool duplicate = false;
                foreach (string[] new_entry in output_matches_new)
                {
                    if (new_entry[output_index_header] == entry[output_index_header])
                    {
                        duplicate = true;
                        break;
                    }
                }

                if (!duplicate)
                {
                    output_matches_new.Add(entry);
                }
            }


            //Write Out

            using (StreamWriter writer = new StreamWriter(FilePath + "\\intersection.csv", false))
            {
                foreach (string[] entry in output_matches_new)
                {
                    StringBuilder builder = new StringBuilder();
                    foreach (string field in entry)
                    {
                        builder.Append(field + ",");
                    }
                    //builder.Remove(builder.Length - 1, 1);
                    writer.WriteLine(builder.ToString());
                }
            }

            this.Cursor = Cursors.Default;
        }
Example #2
0
        private void cmdAddFolder_Click(object sender, EventArgs e)
        {
            this.Cursor = Cursors.WaitCursor;
            //string FilePath = @"C:\Users\Shawn\Documents\5) Oaktree\2) Sweepstakes\Projects\2) 2017\3) Gene Excel Union";// folderBrowserDialog1.SelectedPath;
            bool convert_excel = chkConvert.Checked;

            //if(folderBrowserDialog1.ShowDialog(this) == System.Windows.Forms.DialogResult.OK)
            //{
            //string FileName = Path.GetFileName(FilePath);

            #region Convert Excel Files without Corresponding CSV

            if (convert_excel)
            {
                List <string> csv_files = Directory.GetFiles(FilePath, "*.csv", SearchOption.TopDirectoryOnly)
                                          .ToList();
                csv_files.ForEach(s => Path.GetFileNameWithoutExtension(s));
                List <string> excel_files = Directory.GetFiles(FilePath, "*.xlsx", SearchOption.TopDirectoryOnly)
                                            .ToList();

                for (int i = 0; i < excel_files.Count; i++)
                {
                    excel_files[i] = Path.GetFileNameWithoutExtension(excel_files[i]);
                }

                excel_files = excel_files.Where(s => !csv_files.Contains(s)).ToList();

                for (int i = 0; i < excel_files.Count; i++)
                {
                    excel_files[i] = FilePath + "\\" + excel_files[i] + ".xlsx";
                }

                foreach (string s in excel_files)
                {
                    FileOperations.SaveAsCSV(s, Path.GetFileNameWithoutExtension(s));
                }
            }
            #endregion

            List <string> files = Directory.GetFiles(FilePath, "*.csv", SearchOption.TopDirectoryOnly)
                                  .ToList();

            #region Get Extension and FileType

            FileStructure file_struct = null;
            foreach (string file in files)
            {
                string extension = Path.GetExtension(file);
                string filename  = Path.GetFileNameWithoutExtension(file);
                string csvfile   = file;
                //If not CSV, convert
                if (extension != ".csv")
                {
                    csvfile = FileOperations.SaveAsCSV(file, filename);
                }

                List <string> field_names = new List <string>();
                file_struct = new FileStructure(csvfile);

                //switch(extension)
                //{
                //    case ".csv":
                //        //FileType = SweepstakesOS.FileType.CSV;
                //        break;
                //    case ".xls":
                //    case ".xlsx":
                //        //FileType = SweepstakesOS.FileType.CSV;
                //        //FilePath = FileOperations.SaveAsCSV(FilePath, file);
                //        break;
                //    default:
                //        break;
                //}
                listBox1.Items.Add(file_struct);
            }

            #endregion

            //}
            GetSharedHeaders();
            this.Cursor = Cursors.Default;
        }