/// <summary> /// Main routine that opens data files (Expressions, Factors) /// in comma or tab delimited CSVs (and Excel as a future possibility). /// The main file load routine is in clsDataTable.LoadFile2DataTable(fileName) /// Duplicates are handled in C# /// </summary> private bool OpenFile(string filePath) { string rowID; string rcmd; var success = true; bool validFactors; // check that the file exists before opening it if (!File.Exists(filePath)) { return(false); } if (!string.Equals(mstrLoadedfileName, filePath)) { mstrLoadedfileName = filePath; } var fExt = Path.GetExtension(filePath); if (string.IsNullOrWhiteSpace(fExt)) { MessageBox.Show("File does not have an extension (like .csv or .txt); unable to determine file type", "Error!", MessageBoxButtons.OK, MessageBoxIcon.Exclamation); return(false); } if (!ValidExtension(fExt)) { MessageBox.Show("Filetype not allowed (must be csv, txt, xls, or xlsx)", "Error!"); return(false); } var dataLoader = new clsDataTable(); dataLoader.OnError += clsDataTable_OnError; dataLoader.OnProgress += clsDataTable_OnProgress; switch (mDataSetType) { case enmDataType.ESET: #region Load Expressions validFactors = true; mfrmShowProgress.Reset("Loading data"); var esetTable = dataLoader.LoadFile2DataTableFastCSVReader(mstrLoadedfileName); if (esetTable == null) { string errorMessage; if (string.IsNullOrWhiteSpace(mfrmShowProgress.ErrorMessage)) { errorMessage = "Unknown load error"; } else { errorMessage = "Load error: " + mfrmShowProgress.ErrorMessage; } MessageBox.Show(errorMessage, "Error", MessageBoxButtons.OK, MessageBoxIcon.Exclamation); return(false); } esetTable.TableName = "AllEset"; //Select columns var columnSelectionForm = new frmSelectColumns { PopulateListBox = clsDataTable.DataTableColumns(esetTable, false), Proteins = mhtDatasets.ContainsKey("Protein Info") }; if (columnSelectionForm.ShowDialog() == DialogResult.OK) { rowID = columnSelectionForm.RowIDColumn; //mass tags var dataCols = columnSelectionForm.DataColumns.ToList(); try { var filteredDataTable = ArrangeDataTable(esetTable, rowID, dataCols); // Rename the first column from MassTagID (or whatever the user-supplied name is) to Row_ID filteredDataTable.Columns[0].ColumnName = "Row_ID"; // Remove rows with no data or duplicate data filteredDataTable = dataLoader.RemoveDuplicateRows2( filteredDataTable, filteredDataTable.Columns[0].ColumnName); // Copy the data into R filteredDataTable.TableName = "Eset"; success = mRConnector.SendTable2RmatrixNumeric("Eset", filteredDataTable); if (mhtDatasets.ContainsKey("Factors")) { rcmd = "FactorsValid<-identical(as.array(colnames(factors)),as.array(colnames(Eset)))"; mRConnector.EvaluateNoReturn(rcmd); validFactors = mRConnector.GetSymbolAsBool("FactorsValid"); } if (!validFactors) { success = false; } if (success) { AddDataset2HashTable(filteredDataTable); mRConnector.EvaluateNoReturn("print(dim(Eset))"); mRConnector.EvaluateNoReturn("cat(\"Expressions loaded.\n\")"); } } catch (Exception ex) { MessageBox.Show("Error: " + ex.Message, "Exception while extracting data columns"); return(false); } if (columnSelectionForm.Proteins && !string.IsNullOrWhiteSpace(columnSelectionForm.ProteinIDColumn) && success) { // Load protein info then send to R var proteinDataTable = LoadProtColumns(esetTable, columnSelectionForm.ProteinIDColumn, rowID, columnSelectionForm.ProteinMetadataColumns); proteinDataTable.TableName = "ProtInfo"; AddDataset2HashTable(proteinDataTable); } } else { success = false; } #endregion break; case enmDataType.PROTINFO: #region Load Protein Info var proteinInfoTable = dataLoader.LoadFile2DataTableFastCSVReader(mstrLoadedfileName); if (proteinInfoTable == null) { return(false); } var proteinSelectionForm = new frmSelectProtInfo { PopulateListBox = clsDataTable.DataTableColumns(proteinInfoTable, false) }; if (proteinSelectionForm.ShowDialog() == DialogResult.OK) { rowID = proteinSelectionForm.RowIDColumn; //mass tags var proteinIdentifierColumn = proteinSelectionForm.ProteinIDColumn; var proteinDataTable = LoadProtColumns(proteinInfoTable, proteinIdentifierColumn, rowID, proteinSelectionForm.ProteinMetadataColumns); proteinDataTable.TableName = "ProtInfo"; AddDataset2HashTable(proteinDataTable); } else { success = false; } #endregion break; case enmDataType.FACTORS: // Factor files can be CSV files or .txt files // The first row of the factor definitions file must have a column named Factor, then column names that match the names in the originally loaded Expressions table // Each subsequent row of the factor file is a new factor name, then the factor value for each dataset // // Example factor file (.txt file should use a tab delimiter; CSV file would use commas) // // Factor P10A P10B P11A P11B P12A P12B // Time 0 0 5 5 10 10 // Temperature Hot Cold Hot Cold Hot Cold #region Load Factors validFactors = true; var factorTable = dataLoader.LoadFile2DataTableFastCSVReader(mstrLoadedfileName); if (factorTable == null) { return(false); } if (factorTable.Rows.Count > frmDefFactors.MAX_LEVELS) { MessageBox.Show( "Factors file has too many factors; max allowed is " + frmDefFactors.MAX_LEVELS + " factors", "Error", MessageBoxButtons.OK, MessageBoxIcon.Exclamation); return(false); } if (mRConnector.SendTable2RmatrixNonNumeric("factors", factorTable)) { try { if (mhtDatasets.ContainsKey("Expressions")) { rcmd = "FactorsValid<-identical(as.array(colnames(factors)),as.array(colnames(Eset)))"; mRConnector.EvaluateNoReturn(rcmd); validFactors = mRConnector.GetSymbolAsBool("FactorsValid"); } if (!validFactors) { success = false; } else { UpdateFactorInfoArray(); factorTable.Columns[0].ColumnName = "Factors"; factorTable.TableName = "factors"; mRConnector.EvaluateNoReturn("print(factors)"); mRConnector.EvaluateNoReturn("cat(\"Factors loaded.\n\")"); } } catch (Exception ex) { MessageBox.Show("Error: " + ex.Message, "Exception while talking to R"); success = false; } } if (success) { AddDataset2HashTable(factorTable); } #endregion break; } return(success); }
private bool OpenFile2(string filename) { var success = true; // check that the file exists before opening it if (!File.Exists(mstrLoadedfileName)) { return(false); } var fExt = Path.GetExtension(filename); if (!ValidExtension(fExt)) { MessageBox.Show("Filetype not allowed.", "Error!"); return(false); } string rowID; string rcmd; bool FactorsValid; switch (mDataSetType) { case enmDataType.ESET: #region Load Expressions FactorsValid = true; var mDTloaded = clsDataTable.LoadFile2DataTableFastCSVReader(mstrLoadedfileName); if (mDTloaded == null) { return(false); } mDTloaded.TableName = "AllEset"; //Select columns var mfrmSelectCols = new frmSelectColumns { PopulateListBox = clsDataTable.DataTableColumns(mDTloaded, false), Proteins = mhtDatasets.ContainsKey("Protein Info") }; if (mfrmSelectCols.ShowDialog() == DialogResult.OK) { rowID = mfrmSelectCols.RowIDColumn; //mass tags var dataCols = mfrmSelectCols.DataColumns; DataTable mDTselectedEset1; try { mDTselectedEset1 = ArrangeDataTable(mDTloaded, rowID, dataCols); // create the datatable mDTselectedEset1.TableName = "Eset"; } catch (Exception ex) { MessageBox.Show("Error: " + ex.Message, "Exception while extracting data columns"); return(false); } //clsRarray.rowNamesID = mDTselectedEset.Columns[0].ToString(); clsRarray.rowNamesID = "Row_ID"; if (mRConnector.SendTable2RmatrixNumeric("Eset", mDTselectedEset1)) // Duplicates are handled during the call 'SendTable2RmatrixNumeric' { try { if (mhtDatasets.ContainsKey("Factors")) { rcmd = "FactorsValid<-identical(as.array(colnames(factors)),as.array(colnames(Eset)))"; mRConnector.EvaluateNoReturn(rcmd); FactorsValid = mRConnector.GetSymbolAsBool("FactorsValid"); } if (!FactorsValid) { success = false; } else { mRConnector.GetTableFromRmatrix("Eset"); // Get the cleaned data matrix mDTselectedEset1 = mRConnector.DataTable.Copy(); mDTselectedEset1.TableName = "Eset"; mRConnector.EvaluateNoReturn("print(dim(Eset))"); mRConnector.EvaluateNoReturn("cat(\"Expressions loaded.\n\")"); AddDataset2HashTable(mDTselectedEset1); } } catch (Exception ex) { MessageBox.Show("Error: " + ex.Message, "Exception while talking to R"); success = false; } } else { success = false; } if (mfrmSelectCols.Proteins && success) // Protein info needs to be loaded ? { // loads to mDataTableProtInfo and then sends to R var mdtProts = LoadProtColumns(mDTloaded, mfrmSelectCols.ProteinIDColumn, rowID); mdtProts.TableName = "ProtInfo"; AddDataset2HashTable(mdtProts); } } else { success = false; } #endregion break; case enmDataType.PROTINFO: #region Load Protein Info var mDTtmp = clsDataTable.LoadFile2DataTableFastCSVReader(mstrLoadedfileName); if (mDTtmp == null) { return(false); } var mfrmSelectProts = new frmSelectProtInfo { PopulateListBox = clsDataTable.DataTableColumns(mDTtmp, false) }; if (mfrmSelectProts.ShowDialog() == DialogResult.OK) { rowID = mfrmSelectProts.RowIDColumn; //mass tags var protIPI = mfrmSelectProts.ProteinIDColumn; var mdtProts = LoadProtColumns(mDTtmp, protIPI, rowID); mdtProts.TableName = "ProtInfo"; AddDataset2HashTable(mdtProts); } else { success = false; } #endregion break; case enmDataType.FACTORS: #region Load Factors FactorsValid = true; var mDTFactors = clsDataTable.LoadFile2DataTableFastCSVReader(mstrLoadedfileName); if (mDTFactors == null) { return(false); } if (mDTFactors.Rows.Count > frmDefFactors.MAX_LEVELS) { MessageBox.Show("Factors file has too many factors; max allowed is " + frmDefFactors.MAX_LEVELS + " factors", "Error", MessageBoxButtons.OK, MessageBoxIcon.Exclamation); return(false); } if (mRConnector.SendTable2RmatrixNonNumeric("factors", mDTFactors)) { try { if (mhtDatasets.ContainsKey("Expressions")) { rcmd = "FactorsValid<-identical(as.array(colnames(factors)),as.array(colnames(Eset)))"; mRConnector.EvaluateNoReturn(rcmd); FactorsValid = mRConnector.GetSymbolAsBool("FactorsValid"); } if (!FactorsValid) { success = false; } else { UpdateFactorInfoArray(); mDTFactors.Columns[0].ColumnName = "Factors"; mDTFactors.TableName = "factors"; mRConnector.EvaluateNoReturn("print(factors)"); mRConnector.EvaluateNoReturn("cat(\"Factors loaded.\n\")"); } } catch (Exception ex) { MessageBox.Show("Error: " + ex.Message, "Exception while talking to R"); success = false; } } if (success) { AddDataset2HashTable(mDTFactors); } #endregion break; default: break; } return(success); }
/// <summary> /// Main routine that opens data files (Expressions, Factors) /// in comma or tab delimited CSVs (and Excel as a future possibility). /// The main file load routine is in clsDataTable.LoadFile2DataTable(fileName) /// Duplicates are handled in C# /// </summary> private bool OpenFile(string filePath) { string rowID; string rcmd; var success = true; bool FactorsValid; // check that the file exists before opening it if (!File.Exists(filePath)) { return(false); } if (!string.Equals(mstrLoadedfileName, filePath)) { mstrLoadedfileName = filePath; } var fExt = Path.GetExtension(filePath); if (string.IsNullOrWhiteSpace(fExt)) { MessageBox.Show("File does not have an extension (like .csv or .txt); unable to determine file type", "Error!", MessageBoxButtons.OK, MessageBoxIcon.Exclamation); return(false); } if (!ValidExtension(fExt)) { MessageBox.Show("Filetype not allowed (must be csv, txt, xls, or xlsx)", "Error!"); return(false); } switch (mDataSetType) { case enmDataType.ESET: #region Load Expressions FactorsValid = true; if (!mProgressEventWired) { clsDataTable.OnProgress += clsDataTable_OnProgress; mProgressEventWired = true; } var mDTloaded = clsDataTable.LoadFile2DataTableFastCSVReader(mstrLoadedfileName); if (mDTloaded == null) { return(false); } mDTloaded.TableName = "AllEset"; //Select columns var mfrmSelectCols = new frmSelectColumns { PopulateListBox = clsDataTable.DataTableColumns(mDTloaded, false), Proteins = mhtDatasets.ContainsKey("Protein Info") }; if (mfrmSelectCols.ShowDialog() == DialogResult.OK) { rowID = mfrmSelectCols.RowIDColumn; //mass tags var dataCols = mfrmSelectCols.DataColumns; try { var mDTselectedEset1 = ArrangeDataTable(mDTloaded, rowID, dataCols); mDTselectedEset1.Columns[0].ColumnName = "Row_ID"; mDTselectedEset1 = clsDataTable.RemoveDuplicateRows2(mDTselectedEset1, mDTselectedEset1.Columns[0].ColumnName); // handle duplicate rows mDTselectedEset1.TableName = "Eset"; success = mRConnector.SendTable2RmatrixNumeric("Eset", mDTselectedEset1); if (mhtDatasets.ContainsKey("Factors")) { rcmd = "FactorsValid<-identical(as.array(colnames(factors)),as.array(colnames(Eset)))"; mRConnector.EvaluateNoReturn(rcmd); FactorsValid = mRConnector.GetSymbolAsBool("FactorsValid"); } if (!FactorsValid) { success = false; } if (success) { AddDataset2HashTable(mDTselectedEset1); mRConnector.EvaluateNoReturn("print(dim(Eset))"); mRConnector.EvaluateNoReturn("cat(\"Expressions loaded.\n\")"); } } catch (Exception ex) { MessageBox.Show("Error: " + ex.Message, "Exception while extracting data columns"); return(false); } if (mfrmSelectCols.Proteins && success) // Protein info needs to be loaded ? { // loads to mDataTableProtInfo and then sends to R var mdtProts = LoadProtColumns(mDTloaded, mfrmSelectCols.ProteinIDColumn, rowID); mdtProts.TableName = "ProtInfo"; AddDataset2HashTable(mdtProts); } } else { success = false; } #endregion break; case enmDataType.PROTINFO: #region Load Protein Info var mDTtmp = clsDataTable.LoadFile2DataTableFastCSVReader(mstrLoadedfileName); if (mDTtmp == null) { return(false); } var mfrmSelectProts = new frmSelectProtInfo { PopulateListBox = clsDataTable.DataTableColumns(mDTtmp, false) }; if (mfrmSelectProts.ShowDialog() == DialogResult.OK) { rowID = mfrmSelectProts.RowIDColumn; //mass tags var protIPI = mfrmSelectProts.ProteinIDColumn; var mdtProts = LoadProtColumns(mDTtmp, protIPI, rowID); mdtProts.TableName = "ProtInfo"; AddDataset2HashTable(mdtProts); } else { success = false; } #endregion break; case enmDataType.FACTORS: // Factor files can be CSV files or .txt files // The first row of the factor definitions file must have a column named Factor, then column names that match the names in the originally loaded Expressions table // Each subsequent row of the factor file is a new factor name, then the factor value for each dataset // // Example factor file (.txt file should use a tab delimiter; CSV file would use commas) // // Factor P10A P10B P11A P11B P12A P12B // Time 0 0 5 5 10 10 // Temperature Hot Cold Hot Cold Hot Cold #region Load Factors FactorsValid = true; var mDTFactors = clsDataTable.LoadFile2DataTableFastCSVReader(mstrLoadedfileName); if (mDTFactors == null) { return(false); } if (mDTFactors.Rows.Count > frmDefFactors.MAX_LEVELS) { MessageBox.Show("Factors file has too many factors; max allowed is " + frmDefFactors.MAX_LEVELS + " factors", "Error", MessageBoxButtons.OK, MessageBoxIcon.Exclamation); return(false); } if (mRConnector.SendTable2RmatrixNonNumeric("factors", mDTFactors)) { try { if (mhtDatasets.ContainsKey("Expressions")) { rcmd = "FactorsValid<-identical(as.array(colnames(factors)),as.array(colnames(Eset)))"; mRConnector.EvaluateNoReturn(rcmd); FactorsValid = mRConnector.GetSymbolAsBool("FactorsValid"); } if (!FactorsValid) { success = false; } else { UpdateFactorInfoArray(); mDTFactors.Columns[0].ColumnName = "Factors"; mDTFactors.TableName = "factors"; mRConnector.EvaluateNoReturn("print(factors)"); mRConnector.EvaluateNoReturn("cat(\"Factors loaded.\n\")"); } } catch (Exception ex) { MessageBox.Show("Error: " + ex.Message, "Exception while talking to R"); success = false; } } if (success) { AddDataset2HashTable(mDTFactors); } #endregion break; default: break; } return(success); }
private bool OpenFile2(string filename) { var success = true; // check that the file exists before opening it if (!File.Exists(mstrLoadedfileName)) { return false; } var fExt = Path.GetExtension(filename); if (!ValidExtension(fExt)) { MessageBox.Show("Filetype not allowed.", "Error!"); return false; } string rowID; string rcmd; bool FactorsValid; switch (mDataSetType) { case enmDataType.ESET: #region Load Expressions FactorsValid = true; var mDTloaded = clsDataTable.LoadFile2DataTableFastCSVReader(mstrLoadedfileName); if (mDTloaded == null) { return false; } mDTloaded.TableName = "AllEset"; //Select columns var mfrmSelectCols = new frmSelectColumns { PopulateListBox = clsDataTable.DataTableColumns(mDTloaded, false), Proteins = mhtDatasets.ContainsKey("Protein Info") }; if (mfrmSelectCols.ShowDialog() == DialogResult.OK) { rowID = mfrmSelectCols.RowIDColumn; //mass tags var dataCols = mfrmSelectCols.DataColumns; DataTable mDTselectedEset1; try { mDTselectedEset1 = ArrangeDataTable(mDTloaded, rowID, dataCols); // create the datatable mDTselectedEset1.TableName = "Eset"; } catch (Exception ex) { MessageBox.Show("Error: " + ex.Message, "Exception while extracting data columns"); return false; } //clsRarray.rowNamesID = mDTselectedEset.Columns[0].ToString(); clsRarray.rowNamesID = "Row_ID"; if (mRConnector.SendTable2RmatrixNumeric("Eset", mDTselectedEset1)) // Duplicates are handled during the call 'SendTable2RmatrixNumeric' { try { if (mhtDatasets.ContainsKey("Factors")) { rcmd = "FactorsValid<-identical(as.array(colnames(factors)),as.array(colnames(Eset)))"; mRConnector.EvaluateNoReturn(rcmd); FactorsValid = mRConnector.GetSymbolAsBool("FactorsValid"); } if (!FactorsValid) { success = false; } else { mRConnector.GetTableFromRmatrix("Eset"); // Get the cleaned data matrix mDTselectedEset1 = mRConnector.DataTable.Copy(); mDTselectedEset1.TableName = "Eset"; mRConnector.EvaluateNoReturn("print(dim(Eset))"); mRConnector.EvaluateNoReturn("cat(\"Expressions loaded.\n\")"); AddDataset2HashTable(mDTselectedEset1); } } catch (Exception ex) { MessageBox.Show("Error: " + ex.Message, "Exception while talking to R"); success = false; } } else { success = false; } if (mfrmSelectCols.Proteins && success) // Protein info needs to be loaded ? { // loads to mDataTableProtInfo and then sends to R var mdtProts = LoadProtColumns(mDTloaded, mfrmSelectCols.ProteinIDColumn, rowID); mdtProts.TableName = "ProtInfo"; AddDataset2HashTable(mdtProts); } } else { success = false; } #endregion break; case enmDataType.PROTINFO: #region Load Protein Info var mDTtmp = clsDataTable.LoadFile2DataTableFastCSVReader(mstrLoadedfileName); if (mDTtmp == null) { return false; } var mfrmSelectProts = new frmSelectProtInfo { PopulateListBox = clsDataTable.DataTableColumns(mDTtmp, false) }; if (mfrmSelectProts.ShowDialog() == DialogResult.OK) { rowID = mfrmSelectProts.RowIDColumn; //mass tags var protIPI = mfrmSelectProts.ProteinIDColumn; var mdtProts = LoadProtColumns(mDTtmp, protIPI, rowID); mdtProts.TableName = "ProtInfo"; AddDataset2HashTable(mdtProts); } else { success = false; } #endregion break; case enmDataType.FACTORS: #region Load Factors FactorsValid = true; var mDTFactors = clsDataTable.LoadFile2DataTableFastCSVReader(mstrLoadedfileName); if (mDTFactors == null) { return false; } if (mDTFactors.Rows.Count > frmDefFactors.MAX_LEVELS) { MessageBox.Show("Factors file has too many factors; max allowed is " + frmDefFactors.MAX_LEVELS + " factors", "Error", MessageBoxButtons.OK, MessageBoxIcon.Exclamation); return false; } if (mRConnector.SendTable2RmatrixNonNumeric("factors", mDTFactors)) { try { if (mhtDatasets.ContainsKey("Expressions")) { rcmd = "FactorsValid<-identical(as.array(colnames(factors)),as.array(colnames(Eset)))"; mRConnector.EvaluateNoReturn(rcmd); FactorsValid = mRConnector.GetSymbolAsBool("FactorsValid"); } if (!FactorsValid) { success = false; } else { UpdateFactorInfoArray(); mDTFactors.Columns[0].ColumnName = "Factors"; mDTFactors.TableName = "factors"; mRConnector.EvaluateNoReturn("print(factors)"); mRConnector.EvaluateNoReturn("cat(\"Factors loaded.\n\")"); } } catch (Exception ex) { MessageBox.Show("Error: " + ex.Message, "Exception while talking to R"); success = false; } } if (success) { AddDataset2HashTable(mDTFactors); } #endregion break; default: break; } return success; }
/// <summary> /// Main routine that opens data files (Expressions, Factors) /// in comma or tab delimited CSVs (and Excel as a future possibility). /// The main file load routine is in clsDataTable.LoadFile2DataTable(fileName) /// Duplicates are handled in C# /// </summary> private bool OpenFile(string filePath) { string rowID; string rcmd; var success = true; bool FactorsValid; // check that the file exists before opening it if (!File.Exists(filePath)) { return false; } if (!string.Equals(mstrLoadedfileName, filePath)) { mstrLoadedfileName = filePath; } var fExt = Path.GetExtension(filePath); if (string.IsNullOrWhiteSpace(fExt)) { MessageBox.Show("File does not have an extension (like .csv or .txt); unable to determine file type", "Error!", MessageBoxButtons.OK, MessageBoxIcon.Exclamation); return false; } if (!ValidExtension(fExt)) { MessageBox.Show("Filetype not allowed (must be csv, txt, xls, or xlsx)", "Error!"); return false; } switch (mDataSetType) { case enmDataType.ESET: #region Load Expressions FactorsValid = true; if (!mProgressEventWired) { clsDataTable.OnProgress += clsDataTable_OnProgress; mProgressEventWired = true; } var mDTloaded = clsDataTable.LoadFile2DataTableFastCSVReader(mstrLoadedfileName); if (mDTloaded == null) { return false; } mDTloaded.TableName = "AllEset"; //Select columns var mfrmSelectCols = new frmSelectColumns { PopulateListBox = clsDataTable.DataTableColumns(mDTloaded, false), Proteins = mhtDatasets.ContainsKey("Protein Info") }; if (mfrmSelectCols.ShowDialog() == DialogResult.OK) { rowID = mfrmSelectCols.RowIDColumn; //mass tags var dataCols = mfrmSelectCols.DataColumns; try { var mDTselectedEset1 = ArrangeDataTable(mDTloaded, rowID, dataCols); mDTselectedEset1.Columns[0].ColumnName = "Row_ID"; mDTselectedEset1 = clsDataTable.RemoveDuplicateRows2(mDTselectedEset1, mDTselectedEset1.Columns[0].ColumnName); // handle duplicate rows mDTselectedEset1.TableName = "Eset"; success = mRConnector.SendTable2RmatrixNumeric("Eset", mDTselectedEset1); if (mhtDatasets.ContainsKey("Factors")) { rcmd = "FactorsValid<-identical(as.array(colnames(factors)),as.array(colnames(Eset)))"; mRConnector.EvaluateNoReturn(rcmd); FactorsValid = mRConnector.GetSymbolAsBool("FactorsValid"); } if (!FactorsValid) { success = false; } if (success) { AddDataset2HashTable(mDTselectedEset1); mRConnector.EvaluateNoReturn("print(dim(Eset))"); mRConnector.EvaluateNoReturn("cat(\"Expressions loaded.\n\")"); } } catch (Exception ex) { MessageBox.Show("Error: " + ex.Message, "Exception while extracting data columns"); return false; } if (mfrmSelectCols.Proteins && success) // Protein info needs to be loaded ? { // loads to mDataTableProtInfo and then sends to R var mdtProts = LoadProtColumns(mDTloaded, mfrmSelectCols.ProteinIDColumn, rowID); mdtProts.TableName = "ProtInfo"; AddDataset2HashTable(mdtProts); } } else { success = false; } #endregion break; case enmDataType.PROTINFO: #region Load Protein Info var mDTtmp = clsDataTable.LoadFile2DataTableFastCSVReader(mstrLoadedfileName); if (mDTtmp == null) { return false; } var mfrmSelectProts = new frmSelectProtInfo { PopulateListBox = clsDataTable.DataTableColumns(mDTtmp, false) }; if (mfrmSelectProts.ShowDialog() == DialogResult.OK) { rowID = mfrmSelectProts.RowIDColumn; //mass tags var protIPI = mfrmSelectProts.ProteinIDColumn; var mdtProts = LoadProtColumns(mDTtmp, protIPI, rowID); mdtProts.TableName = "ProtInfo"; AddDataset2HashTable(mdtProts); } else { success = false; } #endregion break; case enmDataType.FACTORS: // Factor files can be CSV files or .txt files // The first row of the factor definitions file must have a column named Factor, then column names that match the names in the originally loaded Expressions table // Each subsequent row of the factor file is a new factor name, then the factor value for each dataset // // Example factor file (.txt file should use a tab delimiter; CSV file would use commas) // // Factor P10A P10B P11A P11B P12A P12B // Time 0 0 5 5 10 10 // Temperature Hot Cold Hot Cold Hot Cold #region Load Factors FactorsValid = true; var mDTFactors = clsDataTable.LoadFile2DataTableFastCSVReader(mstrLoadedfileName); if (mDTFactors == null) { return false; } if (mDTFactors.Rows.Count > frmDefFactors.MAX_LEVELS) { MessageBox.Show("Factors file has too many factors; max allowed is " + frmDefFactors.MAX_LEVELS + " factors", "Error", MessageBoxButtons.OK, MessageBoxIcon.Exclamation); return false; } if (mRConnector.SendTable2RmatrixNonNumeric("factors", mDTFactors)) { try { if (mhtDatasets.ContainsKey("Expressions")) { rcmd = "FactorsValid<-identical(as.array(colnames(factors)),as.array(colnames(Eset)))"; mRConnector.EvaluateNoReturn(rcmd); FactorsValid = mRConnector.GetSymbolAsBool("FactorsValid"); } if (!FactorsValid) { success = false; } else { UpdateFactorInfoArray(); mDTFactors.Columns[0].ColumnName = "Factors"; mDTFactors.TableName = "factors"; mRConnector.EvaluateNoReturn("print(factors)"); mRConnector.EvaluateNoReturn("cat(\"Factors loaded.\n\")"); } } catch (Exception ex) { MessageBox.Show("Error: " + ex.Message, "Exception while talking to R"); success = false; } } if (success) { AddDataset2HashTable(mDTFactors); } #endregion break; default: break; } return success; }
/// <summary> /// Main routine that opens data files (Expressions, Factors) /// in comma or tab delimited CSVs (and Excel as a future possibility). /// The main file load routine is in clsDataTable.LoadFile2DataTable(fileName) /// Duplicates are handled in C# /// </summary> private bool OpenFile(string filePath) { string rowID; string rcmd; var success = true; bool validFactors; // check that the file exists before opening it if (!File.Exists(filePath)) { return(false); } if (!string.Equals(mLoadedFilename, filePath)) { mLoadedFilename = filePath; } var fExt = Path.GetExtension(filePath); if (string.IsNullOrWhiteSpace(fExt)) { MessageBox.Show("File does not have an extension (like .csv or .txt); unable to determine file type", "Error!", MessageBoxButtons.OK, MessageBoxIcon.Exclamation); return(false); } if (!ValidExtension(fExt)) { MessageBox.Show("File type not allowed (must be csv, tsv, txt, xls, or xlsx)", "Error!"); return(false); } var dataLoader = new clsDataTable(); dataLoader.OnError += clsDataTable_OnError; dataLoader.OnWarning += clsDataTable_OnWarning; dataLoader.OnProgress += clsDataTable_OnProgress; switch (mDataSetType) { case enmDataType.ESET: #region Load Expressions validFactors = true; mProgressForm.Reset("Loading data"); var esetTable = dataLoader.LoadFile2DataTableFastCSVReader(mLoadedFilename); if (esetTable == null) { string errorMessage; if (string.IsNullOrWhiteSpace(mProgressForm.ErrorMessage)) { errorMessage = "Unknown load error"; } else { errorMessage = "Load error: " + mProgressForm.ErrorMessage; } MessageBox.Show(errorMessage, "Error", MessageBoxButtons.OK, MessageBoxIcon.Exclamation); return(false); } if (!string.IsNullOrWhiteSpace(mProgressForm.WarningMessage)) { MessageBox.Show(mProgressForm.WarningMessage, "Warning", MessageBoxButtons.OK, MessageBoxIcon.Information); } esetTable.TableName = "AllEset"; // Select columns var columnSelectionForm = new frmSelectColumns { PopulateListBox = clsDataTable.DataTableColumns(esetTable, false), Proteins = mhtDatasets.ContainsKey("Protein Info") }; if (columnSelectionForm.ShowDialog() == DialogResult.OK) { rowID = columnSelectionForm.RowIDColumn; //mass tags var dataCols = columnSelectionForm.DataColumns.ToList(); try { var filteredDataTable = ArrangeDataTable(esetTable, rowID, dataCols, false); // Rename the first column from MassTagID (or whatever the user-supplied name is) to Row_ID filteredDataTable.Columns[0].ColumnName = "Row_ID"; // Remove rows with no data or duplicate data filteredDataTable = dataLoader.RemoveDuplicateRows2( filteredDataTable, filteredDataTable.Columns[0].ColumnName); // Copy the data into R filteredDataTable.TableName = "Eset"; success = mRConnector.SendTable2RmatrixNumeric("Eset", filteredDataTable); if (mhtDatasets.ContainsKey("Factors")) { // Simplistic method looking for exact duplicates: // rcmd = "FactorsValid<-identical(as.array(colnames(factors)),as.array(colnames(Eset)))"; // mRConnector.EvaluateNoReturn(rcmd); // validFactors = mRConnector.GetSymbolAsBool("FactorsValid"); // Better method for comparing column names, including notifying the user of missing columns validFactors = ValidateFactors(); } if (!validFactors) { success = false; } if (!string.IsNullOrWhiteSpace(mProgressForm.ErrorMessage)) { MessageBox.Show(mProgressForm.ErrorMessage, "Error", MessageBoxButtons.OK, MessageBoxIcon.Exclamation); } if (success) { AddDataset2HashTable(filteredDataTable); mRConnector.EvaluateNoReturn("print(dim(Eset))"); mRConnector.EvaluateNoReturn("cat(\"Expressions loaded.\n\")"); if (!string.IsNullOrWhiteSpace(mProgressForm.WarningMessage)) { MessageBox.Show(mProgressForm.WarningMessage, "Warning", MessageBoxButtons.OK, MessageBoxIcon.Information); } } } catch (Exception ex) { MessageBox.Show("Error: " + ex.Message, "Exception while extracting data columns"); return(false); } if (columnSelectionForm.Proteins && !string.IsNullOrWhiteSpace(columnSelectionForm.ProteinIDColumn) && success) { // Load protein info then send to R var proteinDataTable = LoadProtColumns(esetTable, columnSelectionForm.ProteinIDColumn, rowID, columnSelectionForm.ProteinMetadataColumns); proteinDataTable.TableName = "ProtInfo"; AddDataset2HashTable(proteinDataTable); } } else { success = false; } #endregion break; case enmDataType.PROTINFO: #region Load Protein Info var proteinInfoTable = dataLoader.LoadFile2DataTableFastCSVReader(mLoadedFilename); if (proteinInfoTable == null) { return(false); } var proteinSelectionForm = new frmSelectProtInfo { PopulateListBox = clsDataTable.DataTableColumns(proteinInfoTable, false) }; if (proteinSelectionForm.ShowDialog() == DialogResult.OK) { rowID = proteinSelectionForm.RowIDColumn; //mass tags var proteinIdentifierColumn = proteinSelectionForm.ProteinIDColumn; var proteinDataTable = LoadProtColumns(proteinInfoTable, proteinIdentifierColumn, rowID, proteinSelectionForm.ProteinMetadataColumns); proteinDataTable.TableName = "ProtInfo"; AddDataset2HashTable(proteinDataTable); } else { success = false; } #endregion break; case enmDataType.FACTORS: // Factor files can be CSV files or .txt files // The first row of the factor definitions file must have a column named Factor, then column names that match the names in the originally loaded Expressions table // Each subsequent row of the factor file is a new factor name, then the factor value for each dataset // // Example factor file (.txt file should use a tab delimiter; CSV file would use commas) // // Factor P10A P10B P11A P11B P12A P12B // Time 0 0 5 5 10 10 // Temperature Hot Cold Hot Cold Hot Cold #region Load Factors validFactors = true; var factorTable = dataLoader.LoadFile2DataTableFastCSVReader(mLoadedFilename); if (factorTable == null) { return(false); } if (factorTable.Rows.Count > frmDefFactors.MAX_LEVELS) { MessageBox.Show( "Factors file has too many factors; max allowed is " + frmDefFactors.MAX_LEVELS + " factors", "Error", MessageBoxButtons.OK, MessageBoxIcon.Exclamation); return(false); } if (factorTable.Columns.Count == 0) { MessageBox.Show( "Factors file is empty; nothing to load", "Error", MessageBoxButtons.OK, MessageBoxIcon.Exclamation); return(false); } if (mhtDatasets.ContainsKey("Expressions")) { // Check for extra sample names in the factorTable and remove them // The various calls to R that utilize factors do not work properly when extra columns are present mRConnector.EvaluateNoReturn("esetColNames <- colnames(Eset)"); var esetColNamesFromR = mRConnector.GetSymbolAsStrings("esetColNames"); var esetColNames = new SortedSet <string>(); foreach (var item in esetColNamesFromR.Distinct()) { esetColNames.Add(item); } var extraFactorCols = new List <string>(); var factorColumnName = string.Empty; foreach (var factorNameToFind in new List <string> { "Factor", "Factors" }) { foreach (DataColumn factorCol in factorTable.Columns) { if (string.Equals(factorCol.ColumnName, factorNameToFind, StringComparison.OrdinalIgnoreCase)) { factorColumnName = factorNameToFind; break; } } } if (string.IsNullOrWhiteSpace(factorColumnName)) { MessageBox.Show( string.Format( "The first column of the factors file must be named Factor or Factors; your file has {0}", factorTable.Columns[0].ColumnName), "Error", MessageBoxButtons.OK, MessageBoxIcon.Information); return(false); } // Assure that the factor column is the first column if (factorTable.Columns[factorColumnName].Ordinal > 0) { factorTable.Columns[factorColumnName].SetOrdinal(0); } foreach (DataColumn factorCol in factorTable.Columns) { if (string.Equals(factorCol.ColumnName, factorColumnName, StringComparison.OrdinalIgnoreCase)) { continue; } if (!esetColNames.Contains(factorCol.ColumnName)) { extraFactorCols.Add(factorCol.ColumnName); } } if (extraFactorCols.Count > 0) { foreach (var colToRemove in extraFactorCols) { factorTable.Columns.Remove(colToRemove); } if (extraFactorCols.Count == 1) { MessageBox.Show( string.Format( "Removed 1 unknown sample name from the factors file (did not match any expression column names): {0}", extraFactorCols.First()), "Warning", MessageBoxButtons.OK, MessageBoxIcon.Information); } else { MessageBox.Show( string.Format( "Removed {0} unknown sample names from the factors file (names did not match expression column names): {1}", extraFactorCols.Count, string.Join(", ", extraFactorCols)), "Warning", MessageBoxButtons.OK, MessageBoxIcon.Information); } } } if (mRConnector.SendTable2RmatrixNonNumeric("factors", factorTable)) { try { if (mhtDatasets.ContainsKey("Expressions")) { validFactors = ValidateFactors(); } if (!validFactors) { success = false; } else { UpdateFactorInfoArray(); factorTable.Columns[0].ColumnName = "Factors"; factorTable.TableName = "factors"; mRConnector.EvaluateNoReturn("print(factors)"); mRConnector.EvaluateNoReturn("cat(\"Factors loaded.\n\")"); } } catch (Exception ex) { MessageBox.Show("Error: " + ex.Message, "Exception while talking to R"); success = false; } } if (success) { AddDataset2HashTable(factorTable); } #endregion break; } return(success); }