Esempio n. 1
0
        /// <summary>
        /// Main routine that opens data files (Expressions, Factors)
        /// in comma or tab delimited CSVs (and Excel as a future possibility).
        /// The main file load routine is in clsDataTable.LoadFile2DataTable(fileName)
        /// Duplicates are handled in C#
        /// </summary>
        private bool OpenFile(string filePath)
        {
            string rowID;
            string rcmd;
            var    success = true;
            bool   validFactors;

            // check that the file exists before opening it
            if (!File.Exists(filePath))
            {
                return(false);
            }

            if (!string.Equals(mstrLoadedfileName, filePath))
            {
                mstrLoadedfileName = filePath;
            }

            var fExt = Path.GetExtension(filePath);

            if (string.IsNullOrWhiteSpace(fExt))
            {
                MessageBox.Show("File does not have an extension (like .csv or .txt); unable to determine file type",
                                "Error!", MessageBoxButtons.OK, MessageBoxIcon.Exclamation);
                return(false);
            }

            if (!ValidExtension(fExt))
            {
                MessageBox.Show("Filetype not allowed (must be csv, txt, xls, or xlsx)", "Error!");
                return(false);
            }

            var dataLoader = new clsDataTable();

            dataLoader.OnError    += clsDataTable_OnError;
            dataLoader.OnProgress += clsDataTable_OnProgress;

            switch (mDataSetType)
            {
            case enmDataType.ESET:

                #region Load Expressions

                validFactors = true;

                mfrmShowProgress.Reset("Loading data");

                var esetTable = dataLoader.LoadFile2DataTableFastCSVReader(mstrLoadedfileName);
                if (esetTable == null)
                {
                    string errorMessage;
                    if (string.IsNullOrWhiteSpace(mfrmShowProgress.ErrorMessage))
                    {
                        errorMessage = "Unknown load error";
                    }
                    else
                    {
                        errorMessage = "Load error: " + mfrmShowProgress.ErrorMessage;
                    }

                    MessageBox.Show(errorMessage, "Error", MessageBoxButtons.OK, MessageBoxIcon.Exclamation);
                    return(false);
                }

                esetTable.TableName = "AllEset";

                //Select columns
                var columnSelectionForm = new frmSelectColumns
                {
                    PopulateListBox = clsDataTable.DataTableColumns(esetTable, false),
                    Proteins        = mhtDatasets.ContainsKey("Protein Info")
                };

                if (columnSelectionForm.ShowDialog() == DialogResult.OK)
                {
                    rowID = columnSelectionForm.RowIDColumn;     //mass tags
                    var dataCols = columnSelectionForm.DataColumns.ToList();
                    try
                    {
                        var filteredDataTable = ArrangeDataTable(esetTable, rowID, dataCols);

                        // Rename the first column from MassTagID (or whatever the user-supplied name is) to Row_ID
                        filteredDataTable.Columns[0].ColumnName = "Row_ID";

                        // Remove rows with no data or duplicate data
                        filteredDataTable = dataLoader.RemoveDuplicateRows2(
                            filteredDataTable,
                            filteredDataTable.Columns[0].ColumnName);

                        // Copy the data into R
                        filteredDataTable.TableName = "Eset";
                        success = mRConnector.SendTable2RmatrixNumeric("Eset", filteredDataTable);
                        if (mhtDatasets.ContainsKey("Factors"))
                        {
                            rcmd = "FactorsValid<-identical(as.array(colnames(factors)),as.array(colnames(Eset)))";
                            mRConnector.EvaluateNoReturn(rcmd);
                            validFactors = mRConnector.GetSymbolAsBool("FactorsValid");
                        }

                        if (!validFactors)
                        {
                            success = false;
                        }

                        if (success)
                        {
                            AddDataset2HashTable(filteredDataTable);
                            mRConnector.EvaluateNoReturn("print(dim(Eset))");
                            mRConnector.EvaluateNoReturn("cat(\"Expressions loaded.\n\")");
                        }
                    }
                    catch (Exception ex)
                    {
                        MessageBox.Show("Error: " + ex.Message,
                                        "Exception while extracting data columns");
                        return(false);
                    }

                    if (columnSelectionForm.Proteins &&
                        !string.IsNullOrWhiteSpace(columnSelectionForm.ProteinIDColumn) && success)
                    {
                        // Load protein info then send to R
                        var proteinDataTable = LoadProtColumns(esetTable, columnSelectionForm.ProteinIDColumn, rowID,
                                                               columnSelectionForm.ProteinMetadataColumns);
                        proteinDataTable.TableName = "ProtInfo";
                        AddDataset2HashTable(proteinDataTable);
                    }
                }
                else
                {
                    success = false;
                }

                #endregion

                break;

            case enmDataType.PROTINFO:

                #region Load Protein Info

                var proteinInfoTable = dataLoader.LoadFile2DataTableFastCSVReader(mstrLoadedfileName);
                if (proteinInfoTable == null)
                {
                    return(false);
                }
                var proteinSelectionForm = new frmSelectProtInfo
                {
                    PopulateListBox = clsDataTable.DataTableColumns(proteinInfoTable, false)
                };

                if (proteinSelectionForm.ShowDialog() == DialogResult.OK)
                {
                    rowID = proteinSelectionForm.RowIDColumn;     //mass tags
                    var proteinIdentifierColumn = proteinSelectionForm.ProteinIDColumn;
                    var proteinDataTable        = LoadProtColumns(proteinInfoTable, proteinIdentifierColumn, rowID,
                                                                  proteinSelectionForm.ProteinMetadataColumns);
                    proteinDataTable.TableName = "ProtInfo";
                    AddDataset2HashTable(proteinDataTable);
                }
                else
                {
                    success = false;
                }

                #endregion

                break;

            case enmDataType.FACTORS:

                // Factor files can be CSV files or .txt files
                // The first row of the factor definitions file must have a column named Factor, then column names that match the names in the originally loaded Expressions table
                // Each subsequent row of the factor file is a new factor name, then the factor value for each dataset
                //
                // Example factor file (.txt file should use a tab delimiter; CSV file would use commas)
                //
                // Factor       P10A  P10B  P11A  P11B  P12A  P12B
                // Time         0     0     5     5     10    10
                // Temperature  Hot   Cold  Hot   Cold  Hot   Cold

                #region Load Factors

                validFactors = true;
                var factorTable = dataLoader.LoadFile2DataTableFastCSVReader(mstrLoadedfileName);
                if (factorTable == null)
                {
                    return(false);
                }
                if (factorTable.Rows.Count > frmDefFactors.MAX_LEVELS)
                {
                    MessageBox.Show(
                        "Factors file has too many factors; max allowed is " + frmDefFactors.MAX_LEVELS + " factors",
                        "Error", MessageBoxButtons.OK, MessageBoxIcon.Exclamation);
                    return(false);
                }
                if (mRConnector.SendTable2RmatrixNonNumeric("factors", factorTable))
                {
                    try
                    {
                        if (mhtDatasets.ContainsKey("Expressions"))
                        {
                            rcmd = "FactorsValid<-identical(as.array(colnames(factors)),as.array(colnames(Eset)))";
                            mRConnector.EvaluateNoReturn(rcmd);
                            validFactors = mRConnector.GetSymbolAsBool("FactorsValid");
                        }
                        if (!validFactors)
                        {
                            success = false;
                        }
                        else
                        {
                            UpdateFactorInfoArray();
                            factorTable.Columns[0].ColumnName = "Factors";
                            factorTable.TableName             = "factors";
                            mRConnector.EvaluateNoReturn("print(factors)");
                            mRConnector.EvaluateNoReturn("cat(\"Factors loaded.\n\")");
                        }
                    }
                    catch (Exception ex)
                    {
                        MessageBox.Show("Error: " + ex.Message, "Exception while talking to R");
                        success = false;
                    }
                }
                if (success)
                {
                    AddDataset2HashTable(factorTable);
                }

                #endregion

                break;
            }
            return(success);
        }
        private bool OpenFile2(string filename)
        {
            var success = true;

            // check that the file exists before opening it
            if (!File.Exists(mstrLoadedfileName))
            {
                return(false);
            }

            var fExt = Path.GetExtension(filename);

            if (!ValidExtension(fExt))
            {
                MessageBox.Show("Filetype not allowed.", "Error!");
                return(false);
            }

            string rowID;
            string rcmd;
            bool   FactorsValid;

            switch (mDataSetType)
            {
            case enmDataType.ESET:

                #region Load Expressions

                FactorsValid = true;
                var mDTloaded = clsDataTable.LoadFile2DataTableFastCSVReader(mstrLoadedfileName);
                if (mDTloaded == null)
                {
                    return(false);
                }
                mDTloaded.TableName = "AllEset";

                //Select columns
                var mfrmSelectCols = new frmSelectColumns
                {
                    PopulateListBox = clsDataTable.DataTableColumns(mDTloaded, false),
                    Proteins        = mhtDatasets.ContainsKey("Protein Info")
                };

                if (mfrmSelectCols.ShowDialog() == DialogResult.OK)
                {
                    rowID = mfrmSelectCols.RowIDColumn;     //mass tags
                    var       dataCols = mfrmSelectCols.DataColumns;
                    DataTable mDTselectedEset1;
                    try
                    {
                        mDTselectedEset1           = ArrangeDataTable(mDTloaded, rowID, dataCols); // create the datatable
                        mDTselectedEset1.TableName = "Eset";
                    }
                    catch (Exception ex)
                    {
                        MessageBox.Show("Error: " + ex.Message,
                                        "Exception while extracting data columns");
                        return(false);
                    }

                    //clsRarray.rowNamesID = mDTselectedEset.Columns[0].ToString();
                    clsRarray.rowNamesID = "Row_ID";
                    if (mRConnector.SendTable2RmatrixNumeric("Eset", mDTselectedEset1))
                    // Duplicates are handled during the call 'SendTable2RmatrixNumeric'
                    {
                        try
                        {
                            if (mhtDatasets.ContainsKey("Factors"))
                            {
                                rcmd = "FactorsValid<-identical(as.array(colnames(factors)),as.array(colnames(Eset)))";
                                mRConnector.EvaluateNoReturn(rcmd);
                                FactorsValid = mRConnector.GetSymbolAsBool("FactorsValid");
                            }
                            if (!FactorsValid)
                            {
                                success = false;
                            }
                            else
                            {
                                mRConnector.GetTableFromRmatrix("Eset");     // Get the cleaned data matrix
                                mDTselectedEset1           = mRConnector.DataTable.Copy();
                                mDTselectedEset1.TableName = "Eset";
                                mRConnector.EvaluateNoReturn("print(dim(Eset))");
                                mRConnector.EvaluateNoReturn("cat(\"Expressions loaded.\n\")");
                                AddDataset2HashTable(mDTselectedEset1);
                            }
                        }
                        catch (Exception ex)
                        {
                            MessageBox.Show("Error: " + ex.Message, "Exception while talking to R");
                            success = false;
                        }
                    }
                    else
                    {
                        success = false;
                    }
                    if (mfrmSelectCols.Proteins && success)     // Protein info needs to be loaded ?
                    {
                        // loads to mDataTableProtInfo and then sends to R
                        var mdtProts = LoadProtColumns(mDTloaded, mfrmSelectCols.ProteinIDColumn, rowID);
                        mdtProts.TableName = "ProtInfo";
                        AddDataset2HashTable(mdtProts);
                    }
                }
                else
                {
                    success = false;
                }

                #endregion

                break;

            case enmDataType.PROTINFO:

                #region Load Protein Info

                var mDTtmp = clsDataTable.LoadFile2DataTableFastCSVReader(mstrLoadedfileName);
                if (mDTtmp == null)
                {
                    return(false);
                }
                var mfrmSelectProts = new frmSelectProtInfo
                {
                    PopulateListBox = clsDataTable.DataTableColumns(mDTtmp, false)
                };

                if (mfrmSelectProts.ShowDialog() == DialogResult.OK)
                {
                    rowID = mfrmSelectProts.RowIDColumn;     //mass tags
                    var protIPI  = mfrmSelectProts.ProteinIDColumn;
                    var mdtProts = LoadProtColumns(mDTtmp, protIPI, rowID);
                    mdtProts.TableName = "ProtInfo";
                    AddDataset2HashTable(mdtProts);
                }
                else
                {
                    success = false;
                }

                #endregion

                break;

            case enmDataType.FACTORS:

                #region Load Factors

                FactorsValid = true;
                var mDTFactors = clsDataTable.LoadFile2DataTableFastCSVReader(mstrLoadedfileName);
                if (mDTFactors == null)
                {
                    return(false);
                }
                if (mDTFactors.Rows.Count > frmDefFactors.MAX_LEVELS)
                {
                    MessageBox.Show("Factors file has too many factors; max allowed is " + frmDefFactors.MAX_LEVELS + " factors", "Error", MessageBoxButtons.OK, MessageBoxIcon.Exclamation);
                    return(false);
                }
                if (mRConnector.SendTable2RmatrixNonNumeric("factors", mDTFactors))
                {
                    try
                    {
                        if (mhtDatasets.ContainsKey("Expressions"))
                        {
                            rcmd = "FactorsValid<-identical(as.array(colnames(factors)),as.array(colnames(Eset)))";
                            mRConnector.EvaluateNoReturn(rcmd);
                            FactorsValid = mRConnector.GetSymbolAsBool("FactorsValid");
                        }
                        if (!FactorsValid)
                        {
                            success = false;
                        }
                        else
                        {
                            UpdateFactorInfoArray();
                            mDTFactors.Columns[0].ColumnName = "Factors";
                            mDTFactors.TableName             = "factors";
                            mRConnector.EvaluateNoReturn("print(factors)");
                            mRConnector.EvaluateNoReturn("cat(\"Factors loaded.\n\")");
                        }
                    }
                    catch (Exception ex)
                    {
                        MessageBox.Show("Error: " + ex.Message, "Exception while talking to R");
                        success = false;
                    }
                }
                if (success)
                {
                    AddDataset2HashTable(mDTFactors);
                }

                #endregion

                break;

            default:
                break;
            }
            return(success);
        }
        /// <summary>
        /// Main routine that opens data files (Expressions, Factors)
        /// in comma or tab delimited CSVs (and Excel as a future possibility).
        /// The main file load routine is in clsDataTable.LoadFile2DataTable(fileName)
        /// Duplicates are handled in C#
        /// </summary>
        private bool OpenFile(string filePath)
        {
            string rowID;
            string rcmd;
            var    success = true;
            bool   FactorsValid;

            // check that the file exists before opening it
            if (!File.Exists(filePath))
            {
                return(false);
            }

            if (!string.Equals(mstrLoadedfileName, filePath))
            {
                mstrLoadedfileName = filePath;
            }

            var fExt = Path.GetExtension(filePath);

            if (string.IsNullOrWhiteSpace(fExt))
            {
                MessageBox.Show("File does not have an extension (like .csv or .txt); unable to determine file type", "Error!", MessageBoxButtons.OK, MessageBoxIcon.Exclamation);
                return(false);
            }

            if (!ValidExtension(fExt))
            {
                MessageBox.Show("Filetype not allowed (must be csv, txt, xls, or xlsx)", "Error!");
                return(false);
            }

            switch (mDataSetType)
            {
            case enmDataType.ESET:

                #region Load Expressions

                FactorsValid = true;

                if (!mProgressEventWired)
                {
                    clsDataTable.OnProgress += clsDataTable_OnProgress;
                    mProgressEventWired      = true;
                }

                var mDTloaded = clsDataTable.LoadFile2DataTableFastCSVReader(mstrLoadedfileName);
                if (mDTloaded == null)
                {
                    return(false);
                }
                mDTloaded.TableName = "AllEset";
                //Select columns
                var mfrmSelectCols = new frmSelectColumns
                {
                    PopulateListBox = clsDataTable.DataTableColumns(mDTloaded, false),
                    Proteins        = mhtDatasets.ContainsKey("Protein Info")
                };

                if (mfrmSelectCols.ShowDialog() == DialogResult.OK)
                {
                    rowID = mfrmSelectCols.RowIDColumn;     //mass tags
                    var dataCols = mfrmSelectCols.DataColumns;
                    try
                    {
                        var mDTselectedEset1 = ArrangeDataTable(mDTloaded, rowID, dataCols);
                        mDTselectedEset1.Columns[0].ColumnName = "Row_ID";
                        mDTselectedEset1 = clsDataTable.RemoveDuplicateRows2(mDTselectedEset1,
                                                                             mDTselectedEset1.Columns[0].ColumnName);
                        // handle duplicate rows
                        mDTselectedEset1.TableName = "Eset";
                        success = mRConnector.SendTable2RmatrixNumeric("Eset", mDTselectedEset1);
                        if (mhtDatasets.ContainsKey("Factors"))
                        {
                            rcmd = "FactorsValid<-identical(as.array(colnames(factors)),as.array(colnames(Eset)))";
                            mRConnector.EvaluateNoReturn(rcmd);
                            FactorsValid = mRConnector.GetSymbolAsBool("FactorsValid");
                        }
                        if (!FactorsValid)
                        {
                            success = false;
                        }
                        if (success)
                        {
                            AddDataset2HashTable(mDTselectedEset1);
                            mRConnector.EvaluateNoReturn("print(dim(Eset))");
                            mRConnector.EvaluateNoReturn("cat(\"Expressions loaded.\n\")");
                        }
                    }
                    catch (Exception ex)
                    {
                        MessageBox.Show("Error: " + ex.Message,
                                        "Exception while extracting data columns");
                        return(false);
                    }

                    if (mfrmSelectCols.Proteins && success)     // Protein info needs to be loaded ?
                    {
                        // loads to mDataTableProtInfo and then sends to R
                        var mdtProts = LoadProtColumns(mDTloaded, mfrmSelectCols.ProteinIDColumn, rowID);
                        mdtProts.TableName = "ProtInfo";
                        AddDataset2HashTable(mdtProts);
                    }
                }
                else
                {
                    success = false;
                }

                #endregion

                break;

            case enmDataType.PROTINFO:

                #region Load Protein Info

                var mDTtmp = clsDataTable.LoadFile2DataTableFastCSVReader(mstrLoadedfileName);
                if (mDTtmp == null)
                {
                    return(false);
                }
                var mfrmSelectProts = new frmSelectProtInfo
                {
                    PopulateListBox = clsDataTable.DataTableColumns(mDTtmp, false)
                };

                if (mfrmSelectProts.ShowDialog() == DialogResult.OK)
                {
                    rowID = mfrmSelectProts.RowIDColumn;     //mass tags
                    var protIPI  = mfrmSelectProts.ProteinIDColumn;
                    var mdtProts = LoadProtColumns(mDTtmp, protIPI, rowID);
                    mdtProts.TableName = "ProtInfo";
                    AddDataset2HashTable(mdtProts);
                }
                else
                {
                    success = false;
                }

                #endregion

                break;

            case enmDataType.FACTORS:

                // Factor files can be CSV files or .txt files
                // The first row of the factor definitions file must have a column named Factor, then column names that match the names in the originally loaded Expressions table
                // Each subsequent row of the factor file is a new factor name, then the factor value for each dataset
                //
                // Example factor file (.txt file should use a tab delimiter; CSV file would use commas)
                //
                // Factor       P10A  P10B  P11A  P11B  P12A  P12B
                // Time         0     0     5     5     10    10
                // Temperature  Hot   Cold  Hot   Cold  Hot   Cold

                #region Load Factors

                FactorsValid = true;
                var mDTFactors = clsDataTable.LoadFile2DataTableFastCSVReader(mstrLoadedfileName);
                if (mDTFactors == null)
                {
                    return(false);
                }
                if (mDTFactors.Rows.Count > frmDefFactors.MAX_LEVELS)
                {
                    MessageBox.Show("Factors file has too many factors; max allowed is " + frmDefFactors.MAX_LEVELS + " factors", "Error", MessageBoxButtons.OK, MessageBoxIcon.Exclamation);
                    return(false);
                }
                if (mRConnector.SendTable2RmatrixNonNumeric("factors", mDTFactors))
                {
                    try
                    {
                        if (mhtDatasets.ContainsKey("Expressions"))
                        {
                            rcmd = "FactorsValid<-identical(as.array(colnames(factors)),as.array(colnames(Eset)))";
                            mRConnector.EvaluateNoReturn(rcmd);
                            FactorsValid = mRConnector.GetSymbolAsBool("FactorsValid");
                        }
                        if (!FactorsValid)
                        {
                            success = false;
                        }
                        else
                        {
                            UpdateFactorInfoArray();
                            mDTFactors.Columns[0].ColumnName = "Factors";
                            mDTFactors.TableName             = "factors";
                            mRConnector.EvaluateNoReturn("print(factors)");
                            mRConnector.EvaluateNoReturn("cat(\"Factors loaded.\n\")");
                        }
                    }
                    catch (Exception ex)
                    {
                        MessageBox.Show("Error: " + ex.Message, "Exception while talking to R");
                        success = false;
                    }
                }
                if (success)
                {
                    AddDataset2HashTable(mDTFactors);
                }

                #endregion

                break;

            default:
                break;
            }
            return(success);
        }
        private bool OpenFile2(string filename)
        {
            var success = true;

            // check that the file exists before opening it
            if (!File.Exists(mstrLoadedfileName))
            {
                return false;
            }

            var fExt = Path.GetExtension(filename);
            if (!ValidExtension(fExt))
            {
                MessageBox.Show("Filetype not allowed.", "Error!");
                return false;
            }

            string rowID;
            string rcmd;
            bool FactorsValid;
            switch (mDataSetType)
            {
                case enmDataType.ESET:

                    #region Load Expressions

                    FactorsValid = true;
                    var mDTloaded = clsDataTable.LoadFile2DataTableFastCSVReader(mstrLoadedfileName);
                    if (mDTloaded == null)
                    {
                        return false;
                    }
                    mDTloaded.TableName = "AllEset";

                    //Select columns
                    var mfrmSelectCols = new frmSelectColumns
                    {
                        PopulateListBox = clsDataTable.DataTableColumns(mDTloaded, false),
                        Proteins = mhtDatasets.ContainsKey("Protein Info")
                    };

                    if (mfrmSelectCols.ShowDialog() == DialogResult.OK)
                    {
                        rowID = mfrmSelectCols.RowIDColumn; //mass tags
                        var dataCols = mfrmSelectCols.DataColumns;
                        DataTable mDTselectedEset1;
                        try
                        {
                            mDTselectedEset1 = ArrangeDataTable(mDTloaded, rowID, dataCols); // create the datatable
                            mDTselectedEset1.TableName = "Eset";
                        }
                        catch (Exception ex)
                        {
                            MessageBox.Show("Error: " + ex.Message,
                                            "Exception while extracting data columns");
                            return false;
                        }

                        //clsRarray.rowNamesID = mDTselectedEset.Columns[0].ToString();
                        clsRarray.rowNamesID = "Row_ID";
                        if (mRConnector.SendTable2RmatrixNumeric("Eset", mDTselectedEset1))
                        // Duplicates are handled during the call 'SendTable2RmatrixNumeric'
                        {
                            try
                            {
                                if (mhtDatasets.ContainsKey("Factors"))
                                {
                                    rcmd = "FactorsValid<-identical(as.array(colnames(factors)),as.array(colnames(Eset)))";
                                    mRConnector.EvaluateNoReturn(rcmd);
                                    FactorsValid = mRConnector.GetSymbolAsBool("FactorsValid");
                                }
                                if (!FactorsValid)
                                {
                                    success = false;
                                }
                                else
                                {
                                    mRConnector.GetTableFromRmatrix("Eset"); // Get the cleaned data matrix
                                    mDTselectedEset1 = mRConnector.DataTable.Copy();
                                    mDTselectedEset1.TableName = "Eset";
                                    mRConnector.EvaluateNoReturn("print(dim(Eset))");
                                    mRConnector.EvaluateNoReturn("cat(\"Expressions loaded.\n\")");
                                    AddDataset2HashTable(mDTselectedEset1);
                                }
                            }
                            catch (Exception ex)
                            {
                                MessageBox.Show("Error: " + ex.Message, "Exception while talking to R");
                                success = false;
                            }
                        }
                        else
                        {
                            success = false;
                        }
                        if (mfrmSelectCols.Proteins && success) // Protein info needs to be loaded ?
                        {
                            // loads to mDataTableProtInfo and then sends to R
                            var mdtProts = LoadProtColumns(mDTloaded, mfrmSelectCols.ProteinIDColumn, rowID);
                            mdtProts.TableName = "ProtInfo";
                            AddDataset2HashTable(mdtProts);
                        }
                    }
                    else
                    {
                        success = false;
                    }

                    #endregion

                    break;
                case enmDataType.PROTINFO:

                    #region Load Protein Info

                    var mDTtmp = clsDataTable.LoadFile2DataTableFastCSVReader(mstrLoadedfileName);
                    if (mDTtmp == null)
                    {
                        return false;
                    }
                    var mfrmSelectProts = new frmSelectProtInfo
                    {
                        PopulateListBox = clsDataTable.DataTableColumns(mDTtmp, false)
                    };

                    if (mfrmSelectProts.ShowDialog() == DialogResult.OK)
                    {
                        rowID = mfrmSelectProts.RowIDColumn; //mass tags
                        var protIPI = mfrmSelectProts.ProteinIDColumn;
                        var mdtProts = LoadProtColumns(mDTtmp, protIPI, rowID);
                        mdtProts.TableName = "ProtInfo";
                        AddDataset2HashTable(mdtProts);
                    }
                    else
                    {
                        success = false;
                    }

                    #endregion

                    break;
                case enmDataType.FACTORS:

                    #region Load Factors

                    FactorsValid = true;
                    var mDTFactors = clsDataTable.LoadFile2DataTableFastCSVReader(mstrLoadedfileName);
                    if (mDTFactors == null)
                    {
                        return false;
                    }
                    if (mDTFactors.Rows.Count > frmDefFactors.MAX_LEVELS)
                    {
                        MessageBox.Show("Factors file has too many factors; max allowed is " + frmDefFactors.MAX_LEVELS + " factors", "Error", MessageBoxButtons.OK, MessageBoxIcon.Exclamation);
                        return false;
                    }
                    if (mRConnector.SendTable2RmatrixNonNumeric("factors", mDTFactors))
                    {
                        try
                        {
                            if (mhtDatasets.ContainsKey("Expressions"))
                            {
                                rcmd = "FactorsValid<-identical(as.array(colnames(factors)),as.array(colnames(Eset)))";
                                mRConnector.EvaluateNoReturn(rcmd);
                                FactorsValid = mRConnector.GetSymbolAsBool("FactorsValid");
                            }
                            if (!FactorsValid)
                            {
                                success = false;
                            }
                            else
                            {
                                UpdateFactorInfoArray();
                                mDTFactors.Columns[0].ColumnName = "Factors";
                                mDTFactors.TableName = "factors";
                                mRConnector.EvaluateNoReturn("print(factors)");
                                mRConnector.EvaluateNoReturn("cat(\"Factors loaded.\n\")");
                            }
                        }
                        catch (Exception ex)
                        {
                            MessageBox.Show("Error: " + ex.Message, "Exception while talking to R");
                            success = false;
                        }
                    }
                    if (success)
                    {
                        AddDataset2HashTable(mDTFactors);
                    }

                    #endregion

                    break;
                default:
                    break;
            }
            return success;
        }
        /// <summary>
        /// Main routine that opens data files (Expressions, Factors)
        /// in comma or tab delimited CSVs (and Excel as a future possibility).
        /// The main file load routine is in clsDataTable.LoadFile2DataTable(fileName)
        /// Duplicates are handled in C#
        /// </summary>
        private bool OpenFile(string filePath)
        {
            string rowID;
            string rcmd;
            var success = true;
            bool FactorsValid;

            // check that the file exists before opening it
            if (!File.Exists(filePath))
            {
                return false;
            }

            if (!string.Equals(mstrLoadedfileName, filePath))
            {
                mstrLoadedfileName = filePath;
            }

            var fExt = Path.GetExtension(filePath);
            if (string.IsNullOrWhiteSpace(fExt))
            {
                MessageBox.Show("File does not have an extension (like .csv or .txt); unable to determine file type", "Error!", MessageBoxButtons.OK, MessageBoxIcon.Exclamation);
                return false;
            }

            if (!ValidExtension(fExt))
            {
                MessageBox.Show("Filetype not allowed (must be csv, txt, xls, or xlsx)", "Error!");
                return false;
            }

            switch (mDataSetType)
            {
                case enmDataType.ESET:

                    #region Load Expressions

                    FactorsValid = true;

                    if (!mProgressEventWired)
                    {
                        clsDataTable.OnProgress += clsDataTable_OnProgress;
                        mProgressEventWired = true;
                    }

                    var mDTloaded = clsDataTable.LoadFile2DataTableFastCSVReader(mstrLoadedfileName);
                    if (mDTloaded == null)
                    {
                        return false;
                    }
                    mDTloaded.TableName = "AllEset";
                    //Select columns
                    var mfrmSelectCols = new frmSelectColumns
                    {
                        PopulateListBox = clsDataTable.DataTableColumns(mDTloaded, false),
                        Proteins = mhtDatasets.ContainsKey("Protein Info")
                    };

                    if (mfrmSelectCols.ShowDialog() == DialogResult.OK)
                    {
                        rowID = mfrmSelectCols.RowIDColumn; //mass tags
                        var dataCols = mfrmSelectCols.DataColumns;
                        try
                        {
                            var mDTselectedEset1 = ArrangeDataTable(mDTloaded, rowID, dataCols);
                            mDTselectedEset1.Columns[0].ColumnName = "Row_ID";
                            mDTselectedEset1 = clsDataTable.RemoveDuplicateRows2(mDTselectedEset1,
                                                                                 mDTselectedEset1.Columns[0].ColumnName);
                            // handle duplicate rows
                            mDTselectedEset1.TableName = "Eset";
                            success = mRConnector.SendTable2RmatrixNumeric("Eset", mDTselectedEset1);
                            if (mhtDatasets.ContainsKey("Factors"))
                            {
                                rcmd = "FactorsValid<-identical(as.array(colnames(factors)),as.array(colnames(Eset)))";
                                mRConnector.EvaluateNoReturn(rcmd);
                                FactorsValid = mRConnector.GetSymbolAsBool("FactorsValid");
                            }
                            if (!FactorsValid)
                            {
                                success = false;
                            }
                            if (success)
                            {
                                AddDataset2HashTable(mDTselectedEset1);
                                mRConnector.EvaluateNoReturn("print(dim(Eset))");
                                mRConnector.EvaluateNoReturn("cat(\"Expressions loaded.\n\")");
                            }
                        }
                        catch (Exception ex)
                        {
                            MessageBox.Show("Error: " + ex.Message,
                                            "Exception while extracting data columns");
                            return false;
                        }

                        if (mfrmSelectCols.Proteins && success) // Protein info needs to be loaded ?
                        {
                            // loads to mDataTableProtInfo and then sends to R
                            var mdtProts = LoadProtColumns(mDTloaded, mfrmSelectCols.ProteinIDColumn, rowID);
                            mdtProts.TableName = "ProtInfo";
                            AddDataset2HashTable(mdtProts);
                        }
                    }
                    else
                    {
                        success = false;
                    }

                    #endregion

                    break;
                case enmDataType.PROTINFO:

                    #region Load Protein Info

                    var mDTtmp = clsDataTable.LoadFile2DataTableFastCSVReader(mstrLoadedfileName);
                    if (mDTtmp == null)
                    {
                        return false;
                    }
                    var mfrmSelectProts = new frmSelectProtInfo
                    {
                        PopulateListBox = clsDataTable.DataTableColumns(mDTtmp, false)
                    };

                    if (mfrmSelectProts.ShowDialog() == DialogResult.OK)
                    {
                        rowID = mfrmSelectProts.RowIDColumn; //mass tags
                        var protIPI = mfrmSelectProts.ProteinIDColumn;
                        var mdtProts = LoadProtColumns(mDTtmp, protIPI, rowID);
                        mdtProts.TableName = "ProtInfo";
                        AddDataset2HashTable(mdtProts);
                    }
                    else
                    {
                        success = false;
                    }

                    #endregion

                    break;
                case enmDataType.FACTORS:

                    // Factor files can be CSV files or .txt files
                    // The first row of the factor definitions file must have a column named Factor, then column names that match the names in the originally loaded Expressions table
                    // Each subsequent row of the factor file is a new factor name, then the factor value for each dataset
                    //
                    // Example factor file (.txt file should use a tab delimiter; CSV file would use commas)
                    //
                    // Factor       P10A  P10B  P11A  P11B  P12A  P12B
                    // Time         0     0     5     5     10    10
                    // Temperature  Hot   Cold  Hot   Cold  Hot   Cold

                    #region Load Factors

                    FactorsValid = true;
                    var mDTFactors = clsDataTable.LoadFile2DataTableFastCSVReader(mstrLoadedfileName);
                    if (mDTFactors == null)
                    {
                        return false;
                    }
                    if (mDTFactors.Rows.Count > frmDefFactors.MAX_LEVELS)
                    {
                        MessageBox.Show("Factors file has too many factors; max allowed is " + frmDefFactors.MAX_LEVELS + " factors", "Error", MessageBoxButtons.OK, MessageBoxIcon.Exclamation);
                        return false;
                    }
                    if (mRConnector.SendTable2RmatrixNonNumeric("factors", mDTFactors))
                    {
                        try
                        {
                            if (mhtDatasets.ContainsKey("Expressions"))
                            {
                                rcmd = "FactorsValid<-identical(as.array(colnames(factors)),as.array(colnames(Eset)))";
                                mRConnector.EvaluateNoReturn(rcmd);
                                FactorsValid = mRConnector.GetSymbolAsBool("FactorsValid");
                            }
                            if (!FactorsValid)
                            {
                                success = false;
                            }
                            else
                            {
                                UpdateFactorInfoArray();
                                mDTFactors.Columns[0].ColumnName = "Factors";
                                mDTFactors.TableName = "factors";
                                mRConnector.EvaluateNoReturn("print(factors)");
                                mRConnector.EvaluateNoReturn("cat(\"Factors loaded.\n\")");
                            }
                        }
                        catch (Exception ex)
                        {
                            MessageBox.Show("Error: " + ex.Message, "Exception while talking to R");
                            success = false;
                        }
                    }
                    if (success)
                    {
                        AddDataset2HashTable(mDTFactors);
                    }

                    #endregion

                    break;
                default:
                    break;
            }
            return success;
        }
        /// <summary>
        /// Main routine that opens data files (Expressions, Factors)
        /// in comma or tab delimited CSVs (and Excel as a future possibility).
        /// The main file load routine is in clsDataTable.LoadFile2DataTable(fileName)
        /// Duplicates are handled in C#
        /// </summary>
        private bool OpenFile(string filePath)
        {
            string rowID;
            string rcmd;
            var    success = true;
            bool   validFactors;

            // check that the file exists before opening it
            if (!File.Exists(filePath))
            {
                return(false);
            }

            if (!string.Equals(mLoadedFilename, filePath))
            {
                mLoadedFilename = filePath;
            }

            var fExt = Path.GetExtension(filePath);

            if (string.IsNullOrWhiteSpace(fExt))
            {
                MessageBox.Show("File does not have an extension (like .csv or .txt); unable to determine file type",
                                "Error!", MessageBoxButtons.OK, MessageBoxIcon.Exclamation);
                return(false);
            }

            if (!ValidExtension(fExt))
            {
                MessageBox.Show("File type not allowed (must be csv, tsv, txt, xls, or xlsx)", "Error!");
                return(false);
            }

            var dataLoader = new clsDataTable();

            dataLoader.OnError    += clsDataTable_OnError;
            dataLoader.OnWarning  += clsDataTable_OnWarning;
            dataLoader.OnProgress += clsDataTable_OnProgress;

            switch (mDataSetType)
            {
            case enmDataType.ESET:

                #region Load Expressions

                validFactors = true;

                mProgressForm.Reset("Loading data");

                var esetTable = dataLoader.LoadFile2DataTableFastCSVReader(mLoadedFilename);
                if (esetTable == null)
                {
                    string errorMessage;
                    if (string.IsNullOrWhiteSpace(mProgressForm.ErrorMessage))
                    {
                        errorMessage = "Unknown load error";
                    }
                    else
                    {
                        errorMessage = "Load error: " + mProgressForm.ErrorMessage;
                    }

                    MessageBox.Show(errorMessage, "Error", MessageBoxButtons.OK, MessageBoxIcon.Exclamation);
                    return(false);
                }

                if (!string.IsNullOrWhiteSpace(mProgressForm.WarningMessage))
                {
                    MessageBox.Show(mProgressForm.WarningMessage, "Warning", MessageBoxButtons.OK, MessageBoxIcon.Information);
                }

                esetTable.TableName = "AllEset";

                // Select columns
                var columnSelectionForm = new frmSelectColumns
                {
                    PopulateListBox = clsDataTable.DataTableColumns(esetTable, false),
                    Proteins        = mhtDatasets.ContainsKey("Protein Info")
                };

                if (columnSelectionForm.ShowDialog() == DialogResult.OK)
                {
                    rowID = columnSelectionForm.RowIDColumn;     //mass tags
                    var dataCols = columnSelectionForm.DataColumns.ToList();
                    try
                    {
                        var filteredDataTable = ArrangeDataTable(esetTable, rowID, dataCols, false);

                        // Rename the first column from MassTagID (or whatever the user-supplied name is) to Row_ID
                        filteredDataTable.Columns[0].ColumnName = "Row_ID";

                        // Remove rows with no data or duplicate data
                        filteredDataTable = dataLoader.RemoveDuplicateRows2(
                            filteredDataTable,
                            filteredDataTable.Columns[0].ColumnName);

                        // Copy the data into R
                        filteredDataTable.TableName = "Eset";
                        success = mRConnector.SendTable2RmatrixNumeric("Eset", filteredDataTable);
                        if (mhtDatasets.ContainsKey("Factors"))
                        {
                            // Simplistic method looking for exact duplicates:
                            // rcmd = "FactorsValid<-identical(as.array(colnames(factors)),as.array(colnames(Eset)))";
                            // mRConnector.EvaluateNoReturn(rcmd);
                            // validFactors = mRConnector.GetSymbolAsBool("FactorsValid");

                            // Better method for comparing column names, including notifying the user of missing columns
                            validFactors = ValidateFactors();
                        }

                        if (!validFactors)
                        {
                            success = false;
                        }

                        if (!string.IsNullOrWhiteSpace(mProgressForm.ErrorMessage))
                        {
                            MessageBox.Show(mProgressForm.ErrorMessage, "Error", MessageBoxButtons.OK, MessageBoxIcon.Exclamation);
                        }

                        if (success)
                        {
                            AddDataset2HashTable(filteredDataTable);
                            mRConnector.EvaluateNoReturn("print(dim(Eset))");
                            mRConnector.EvaluateNoReturn("cat(\"Expressions loaded.\n\")");

                            if (!string.IsNullOrWhiteSpace(mProgressForm.WarningMessage))
                            {
                                MessageBox.Show(mProgressForm.WarningMessage, "Warning", MessageBoxButtons.OK, MessageBoxIcon.Information);
                            }
                        }
                    }
                    catch (Exception ex)
                    {
                        MessageBox.Show("Error: " + ex.Message,
                                        "Exception while extracting data columns");
                        return(false);
                    }

                    if (columnSelectionForm.Proteins &&
                        !string.IsNullOrWhiteSpace(columnSelectionForm.ProteinIDColumn) && success)
                    {
                        // Load protein info then send to R
                        var proteinDataTable = LoadProtColumns(esetTable, columnSelectionForm.ProteinIDColumn, rowID,
                                                               columnSelectionForm.ProteinMetadataColumns);
                        proteinDataTable.TableName = "ProtInfo";
                        AddDataset2HashTable(proteinDataTable);
                    }
                }
                else
                {
                    success = false;
                }

                #endregion

                break;

            case enmDataType.PROTINFO:

                #region Load Protein Info

                var proteinInfoTable = dataLoader.LoadFile2DataTableFastCSVReader(mLoadedFilename);
                if (proteinInfoTable == null)
                {
                    return(false);
                }
                var proteinSelectionForm = new frmSelectProtInfo
                {
                    PopulateListBox = clsDataTable.DataTableColumns(proteinInfoTable, false)
                };

                if (proteinSelectionForm.ShowDialog() == DialogResult.OK)
                {
                    rowID = proteinSelectionForm.RowIDColumn;     //mass tags
                    var proteinIdentifierColumn = proteinSelectionForm.ProteinIDColumn;
                    var proteinDataTable        = LoadProtColumns(proteinInfoTable, proteinIdentifierColumn, rowID,
                                                                  proteinSelectionForm.ProteinMetadataColumns);
                    proteinDataTable.TableName = "ProtInfo";
                    AddDataset2HashTable(proteinDataTable);
                }
                else
                {
                    success = false;
                }

                #endregion

                break;

            case enmDataType.FACTORS:

                // Factor files can be CSV files or .txt files
                // The first row of the factor definitions file must have a column named Factor, then column names that match the names in the originally loaded Expressions table
                // Each subsequent row of the factor file is a new factor name, then the factor value for each dataset
                //
                // Example factor file (.txt file should use a tab delimiter; CSV file would use commas)
                //
                // Factor       P10A  P10B  P11A  P11B  P12A  P12B
                // Time         0     0     5     5     10    10
                // Temperature  Hot   Cold  Hot   Cold  Hot   Cold

                #region Load Factors

                validFactors = true;
                var factorTable = dataLoader.LoadFile2DataTableFastCSVReader(mLoadedFilename);
                if (factorTable == null)
                {
                    return(false);
                }

                if (factorTable.Rows.Count > frmDefFactors.MAX_LEVELS)
                {
                    MessageBox.Show(
                        "Factors file has too many factors; max allowed is " + frmDefFactors.MAX_LEVELS + " factors",
                        "Error", MessageBoxButtons.OK, MessageBoxIcon.Exclamation);
                    return(false);
                }

                if (factorTable.Columns.Count == 0)
                {
                    MessageBox.Show(
                        "Factors file is empty; nothing to load",
                        "Error", MessageBoxButtons.OK, MessageBoxIcon.Exclamation);
                    return(false);
                }

                if (mhtDatasets.ContainsKey("Expressions"))
                {
                    // Check for extra sample names in the factorTable and remove them
                    // The various calls to R that utilize factors do not work properly when extra columns are present

                    mRConnector.EvaluateNoReturn("esetColNames <- colnames(Eset)");
                    var esetColNamesFromR = mRConnector.GetSymbolAsStrings("esetColNames");
                    var esetColNames      = new SortedSet <string>();
                    foreach (var item in esetColNamesFromR.Distinct())
                    {
                        esetColNames.Add(item);
                    }

                    var extraFactorCols  = new List <string>();
                    var factorColumnName = string.Empty;

                    foreach (var factorNameToFind in new List <string> {
                        "Factor", "Factors"
                    })
                    {
                        foreach (DataColumn factorCol in factorTable.Columns)
                        {
                            if (string.Equals(factorCol.ColumnName, factorNameToFind, StringComparison.OrdinalIgnoreCase))
                            {
                                factorColumnName = factorNameToFind;
                                break;
                            }
                        }
                    }

                    if (string.IsNullOrWhiteSpace(factorColumnName))
                    {
                        MessageBox.Show(
                            string.Format(
                                "The first column of the factors file must be named Factor or Factors; your file has {0}",
                                factorTable.Columns[0].ColumnName),
                            "Error", MessageBoxButtons.OK, MessageBoxIcon.Information);

                        return(false);
                    }

                    // Assure that the factor column is the first column
                    if (factorTable.Columns[factorColumnName].Ordinal > 0)
                    {
                        factorTable.Columns[factorColumnName].SetOrdinal(0);
                    }

                    foreach (DataColumn factorCol in factorTable.Columns)
                    {
                        if (string.Equals(factorCol.ColumnName, factorColumnName, StringComparison.OrdinalIgnoreCase))
                        {
                            continue;
                        }

                        if (!esetColNames.Contains(factorCol.ColumnName))
                        {
                            extraFactorCols.Add(factorCol.ColumnName);
                        }
                    }

                    if (extraFactorCols.Count > 0)
                    {
                        foreach (var colToRemove in extraFactorCols)
                        {
                            factorTable.Columns.Remove(colToRemove);
                        }

                        if (extraFactorCols.Count == 1)
                        {
                            MessageBox.Show(
                                string.Format(
                                    "Removed 1 unknown sample name from the factors file (did not match any expression column names): {0}",
                                    extraFactorCols.First()),
                                "Warning", MessageBoxButtons.OK, MessageBoxIcon.Information);
                        }
                        else
                        {
                            MessageBox.Show(
                                string.Format(
                                    "Removed {0} unknown sample names from the factors file (names did not match expression column names): {1}",
                                    extraFactorCols.Count,
                                    string.Join(", ", extraFactorCols)),
                                "Warning", MessageBoxButtons.OK, MessageBoxIcon.Information);
                        }
                    }
                }

                if (mRConnector.SendTable2RmatrixNonNumeric("factors", factorTable))
                {
                    try
                    {
                        if (mhtDatasets.ContainsKey("Expressions"))
                        {
                            validFactors = ValidateFactors();
                        }

                        if (!validFactors)
                        {
                            success = false;
                        }
                        else
                        {
                            UpdateFactorInfoArray();
                            factorTable.Columns[0].ColumnName = "Factors";
                            factorTable.TableName             = "factors";
                            mRConnector.EvaluateNoReturn("print(factors)");
                            mRConnector.EvaluateNoReturn("cat(\"Factors loaded.\n\")");
                        }
                    }
                    catch (Exception ex)
                    {
                        MessageBox.Show("Error: " + ex.Message, "Exception while talking to R");
                        success = false;
                    }
                }
                if (success)
                {
                    AddDataset2HashTable(factorTable);
                }

                #endregion

                break;
            }

            return(success);
        }