private bool OpenFile2(string filename) { var success = true; // check that the file exists before opening it if (!File.Exists(mstrLoadedfileName)) { return false; } var fExt = Path.GetExtension(filename); if (!ValidExtension(fExt)) { MessageBox.Show("Filetype not allowed.", "Error!"); return false; } string rowID; string rcmd; bool FactorsValid; switch (mDataSetType) { case enmDataType.ESET: #region Load Expressions FactorsValid = true; var mDTloaded = clsDataTable.LoadFile2DataTableFastCSVReader(mstrLoadedfileName); if (mDTloaded == null) { return false; } mDTloaded.TableName = "AllEset"; //Select columns var mfrmSelectCols = new frmSelectColumns { PopulateListBox = clsDataTable.DataTableColumns(mDTloaded, false), Proteins = mhtDatasets.ContainsKey("Protein Info") }; if (mfrmSelectCols.ShowDialog() == DialogResult.OK) { rowID = mfrmSelectCols.RowIDColumn; //mass tags var dataCols = mfrmSelectCols.DataColumns; DataTable mDTselectedEset1; try { mDTselectedEset1 = ArrangeDataTable(mDTloaded, rowID, dataCols); // create the datatable mDTselectedEset1.TableName = "Eset"; } catch (Exception ex) { MessageBox.Show("Error: " + ex.Message, "Exception while extracting data columns"); return false; } //clsRarray.rowNamesID = mDTselectedEset.Columns[0].ToString(); clsRarray.rowNamesID = "Row_ID"; if (mRConnector.SendTable2RmatrixNumeric("Eset", mDTselectedEset1)) // Duplicates are handled during the call 'SendTable2RmatrixNumeric' { try { if (mhtDatasets.ContainsKey("Factors")) { rcmd = "FactorsValid<-identical(as.array(colnames(factors)),as.array(colnames(Eset)))"; mRConnector.EvaluateNoReturn(rcmd); FactorsValid = mRConnector.GetSymbolAsBool("FactorsValid"); } if (!FactorsValid) { success = false; } else { mRConnector.GetTableFromRmatrix("Eset"); // Get the cleaned data matrix mDTselectedEset1 = mRConnector.DataTable.Copy(); mDTselectedEset1.TableName = "Eset"; mRConnector.EvaluateNoReturn("print(dim(Eset))"); mRConnector.EvaluateNoReturn("cat(\"Expressions loaded.\n\")"); AddDataset2HashTable(mDTselectedEset1); } } catch (Exception ex) { MessageBox.Show("Error: " + ex.Message, "Exception while talking to R"); success = false; } } else { success = false; } if (mfrmSelectCols.Proteins && success) // Protein info needs to be loaded ? { // loads to mDataTableProtInfo and then sends to R var mdtProts = LoadProtColumns(mDTloaded, mfrmSelectCols.ProteinIDColumn, rowID); mdtProts.TableName = "ProtInfo"; AddDataset2HashTable(mdtProts); } } else { success = false; } #endregion break; case enmDataType.PROTINFO: #region Load Protein Info var mDTtmp = clsDataTable.LoadFile2DataTableFastCSVReader(mstrLoadedfileName); if (mDTtmp == null) { return false; } var mfrmSelectProts = new frmSelectProtInfo { PopulateListBox = clsDataTable.DataTableColumns(mDTtmp, false) }; if (mfrmSelectProts.ShowDialog() == DialogResult.OK) { rowID = mfrmSelectProts.RowIDColumn; //mass tags var protIPI = mfrmSelectProts.ProteinIDColumn; var mdtProts = LoadProtColumns(mDTtmp, protIPI, rowID); mdtProts.TableName = "ProtInfo"; AddDataset2HashTable(mdtProts); } else { success = false; } #endregion break; case enmDataType.FACTORS: #region Load Factors FactorsValid = true; var mDTFactors = clsDataTable.LoadFile2DataTableFastCSVReader(mstrLoadedfileName); if (mDTFactors == null) { return false; } if (mDTFactors.Rows.Count > frmDefFactors.MAX_LEVELS) { MessageBox.Show("Factors file has too many factors; max allowed is " + frmDefFactors.MAX_LEVELS + " factors", "Error", MessageBoxButtons.OK, MessageBoxIcon.Exclamation); return false; } if (mRConnector.SendTable2RmatrixNonNumeric("factors", mDTFactors)) { try { if (mhtDatasets.ContainsKey("Expressions")) { rcmd = "FactorsValid<-identical(as.array(colnames(factors)),as.array(colnames(Eset)))"; mRConnector.EvaluateNoReturn(rcmd); FactorsValid = mRConnector.GetSymbolAsBool("FactorsValid"); } if (!FactorsValid) { success = false; } else { UpdateFactorInfoArray(); mDTFactors.Columns[0].ColumnName = "Factors"; mDTFactors.TableName = "factors"; mRConnector.EvaluateNoReturn("print(factors)"); mRConnector.EvaluateNoReturn("cat(\"Factors loaded.\n\")"); } } catch (Exception ex) { MessageBox.Show("Error: " + ex.Message, "Exception while talking to R"); success = false; } } if (success) { AddDataset2HashTable(mDTFactors); } #endregion break; default: break; } return success; }
/// <summary> /// Main routine that opens data files (Expressions, Factors) /// in comma or tab delimited CSVs (and Excel as a future possibility). /// The main file load routine is in clsDataTable.LoadFile2DataTable(fileName) /// Duplicates are handled in C# /// </summary> private bool OpenFile(string filePath) { string rowID; string rcmd; var success = true; bool FactorsValid; // check that the file exists before opening it if (!File.Exists(filePath)) { return false; } if (!string.Equals(mstrLoadedfileName, filePath)) { mstrLoadedfileName = filePath; } var fExt = Path.GetExtension(filePath); if (string.IsNullOrWhiteSpace(fExt)) { MessageBox.Show("File does not have an extension (like .csv or .txt); unable to determine file type", "Error!", MessageBoxButtons.OK, MessageBoxIcon.Exclamation); return false; } if (!ValidExtension(fExt)) { MessageBox.Show("Filetype not allowed (must be csv, txt, xls, or xlsx)", "Error!"); return false; } switch (mDataSetType) { case enmDataType.ESET: #region Load Expressions FactorsValid = true; if (!mProgressEventWired) { clsDataTable.OnProgress += clsDataTable_OnProgress; mProgressEventWired = true; } var mDTloaded = clsDataTable.LoadFile2DataTableFastCSVReader(mstrLoadedfileName); if (mDTloaded == null) { return false; } mDTloaded.TableName = "AllEset"; //Select columns var mfrmSelectCols = new frmSelectColumns { PopulateListBox = clsDataTable.DataTableColumns(mDTloaded, false), Proteins = mhtDatasets.ContainsKey("Protein Info") }; if (mfrmSelectCols.ShowDialog() == DialogResult.OK) { rowID = mfrmSelectCols.RowIDColumn; //mass tags var dataCols = mfrmSelectCols.DataColumns; try { var mDTselectedEset1 = ArrangeDataTable(mDTloaded, rowID, dataCols); mDTselectedEset1.Columns[0].ColumnName = "Row_ID"; mDTselectedEset1 = clsDataTable.RemoveDuplicateRows2(mDTselectedEset1, mDTselectedEset1.Columns[0].ColumnName); // handle duplicate rows mDTselectedEset1.TableName = "Eset"; success = mRConnector.SendTable2RmatrixNumeric("Eset", mDTselectedEset1); if (mhtDatasets.ContainsKey("Factors")) { rcmd = "FactorsValid<-identical(as.array(colnames(factors)),as.array(colnames(Eset)))"; mRConnector.EvaluateNoReturn(rcmd); FactorsValid = mRConnector.GetSymbolAsBool("FactorsValid"); } if (!FactorsValid) { success = false; } if (success) { AddDataset2HashTable(mDTselectedEset1); mRConnector.EvaluateNoReturn("print(dim(Eset))"); mRConnector.EvaluateNoReturn("cat(\"Expressions loaded.\n\")"); } } catch (Exception ex) { MessageBox.Show("Error: " + ex.Message, "Exception while extracting data columns"); return false; } if (mfrmSelectCols.Proteins && success) // Protein info needs to be loaded ? { // loads to mDataTableProtInfo and then sends to R var mdtProts = LoadProtColumns(mDTloaded, mfrmSelectCols.ProteinIDColumn, rowID); mdtProts.TableName = "ProtInfo"; AddDataset2HashTable(mdtProts); } } else { success = false; } #endregion break; case enmDataType.PROTINFO: #region Load Protein Info var mDTtmp = clsDataTable.LoadFile2DataTableFastCSVReader(mstrLoadedfileName); if (mDTtmp == null) { return false; } var mfrmSelectProts = new frmSelectProtInfo { PopulateListBox = clsDataTable.DataTableColumns(mDTtmp, false) }; if (mfrmSelectProts.ShowDialog() == DialogResult.OK) { rowID = mfrmSelectProts.RowIDColumn; //mass tags var protIPI = mfrmSelectProts.ProteinIDColumn; var mdtProts = LoadProtColumns(mDTtmp, protIPI, rowID); mdtProts.TableName = "ProtInfo"; AddDataset2HashTable(mdtProts); } else { success = false; } #endregion break; case enmDataType.FACTORS: // Factor files can be CSV files or .txt files // The first row of the factor definitions file must have a column named Factor, then column names that match the names in the originally loaded Expressions table // Each subsequent row of the factor file is a new factor name, then the factor value for each dataset // // Example factor file (.txt file should use a tab delimiter; CSV file would use commas) // // Factor P10A P10B P11A P11B P12A P12B // Time 0 0 5 5 10 10 // Temperature Hot Cold Hot Cold Hot Cold #region Load Factors FactorsValid = true; var mDTFactors = clsDataTable.LoadFile2DataTableFastCSVReader(mstrLoadedfileName); if (mDTFactors == null) { return false; } if (mDTFactors.Rows.Count > frmDefFactors.MAX_LEVELS) { MessageBox.Show("Factors file has too many factors; max allowed is " + frmDefFactors.MAX_LEVELS + " factors", "Error", MessageBoxButtons.OK, MessageBoxIcon.Exclamation); return false; } if (mRConnector.SendTable2RmatrixNonNumeric("factors", mDTFactors)) { try { if (mhtDatasets.ContainsKey("Expressions")) { rcmd = "FactorsValid<-identical(as.array(colnames(factors)),as.array(colnames(Eset)))"; mRConnector.EvaluateNoReturn(rcmd); FactorsValid = mRConnector.GetSymbolAsBool("FactorsValid"); } if (!FactorsValid) { success = false; } else { UpdateFactorInfoArray(); mDTFactors.Columns[0].ColumnName = "Factors"; mDTFactors.TableName = "factors"; mRConnector.EvaluateNoReturn("print(factors)"); mRConnector.EvaluateNoReturn("cat(\"Factors loaded.\n\")"); } } catch (Exception ex) { MessageBox.Show("Error: " + ex.Message, "Exception while talking to R"); success = false; } } if (success) { AddDataset2HashTable(mDTFactors); } #endregion break; default: break; } return success; }