Exemple #1
0
        public KeyValuePair <SparseMatrixIndexParserV2, SparseMatrix> GenerateIndexAndSparseMatrix()
        {
            StringBuilder sb = new StringBuilder();



            //Get a list of all proteins identified in all packages
            List <string> fastaIDs = (from pckg in MyResultPackages
                                      from fst in pckg.MyPackage.MyFasta
                                      select fst.SequenceIdentifier).Distinct().ToList();

            SparseMatrixIndexParserV2 smi = new SparseMatrixIndexParserV2();

            foreach (string fastaID in fastaIDs)
            {
                List <FastaItem> fi = (from pckg in MyResultPackages
                                       from fst in pckg.MyPackage.MyFasta
                                       where fst.SequenceIdentifier.Equals(fastaID)
                                       select fst).ToList();

                smi.Add(fastaID, fi[0].Description);
            }


            SparseMatrix sm = new SparseMatrix();

            foreach (DirectoryClassDescription myDir in MyDirectoryDescriptionDictionary)
            {
                sm.ClassDescriptionDictionary.Add(myDir.ClassLabel, myDir.Description);
            }

            foreach (ThePackage rp in MyResultPackages)
            {
                sparseMatrixRow smr = new sparseMatrixRow(rp.MyClassLabel);
                smr.FileName = rp.MyFileInfo.FullName;

                List <int>    dims   = new List <int>();
                List <double> values = new List <double>();

                for (int i = 0; i < smi.TheIndexes.Count; i++)
                {
                    int count = rp.MyPackage.Alignments.FindAll(a => a.ProtIDs.Contains(smi.TheIndexes[i].Name)).Count;

                    if (count > 0)
                    {
                        dims.Add(i + 1);
                        values.Add(count);
                    }
                }

                smr.Dims   = dims;
                smr.Values = values;

                sm.addRow(smr);
            }


            return(new KeyValuePair <SparseMatrixIndexParserV2, SparseMatrix>(smi, sm));
        }
Exemple #2
0
        private void MenuItemExporToPLP_Click(object sender, RoutedEventArgs e)
        {
            SaveFileDialog sfd = new SaveFileDialog();

            sfd.DefaultExt = ".txt";
            sfd.Filter     = "PatternLab Project (*.plp)|*.plp";

            Nullable <bool> result = sfd.ShowDialog();

            // Get the selected file name and display in a TextBox
            if (result == true)
            {
                SparseMatrixIndexParserV2 smi = new SparseMatrixIndexParserV2();

                int counter = 0;
                List <FastaItem> orderedKeys = new List <FastaItem>();
                foreach (KeyValuePair <FastaItem, List <PepQuant> > kvp in protPepDict)
                {
                    if (kvp.Value.Count > IntegerUpDown.Value)
                    {
                        counter++;
                        SparseMatrixIndexParserV2.Index i = new SparseMatrixIndexParserV2.Index();
                        i.ID          = counter;
                        i.Name        = kvp.Key.SequenceIdentifier;
                        i.Description = kvp.Key.Description;

                        smi.Add(i);

                        orderedKeys.Add(kvp.Key);
                    }
                }

                SparseMatrix sm = new SparseMatrix();
                sm.ClassDescriptionDictionary = new Dictionary <int, string>();
                List <int> labels = Regex.Split(TextBoxClassLabel.Text, " ").Select(a => int.Parse(a)).ToList();


                //Generate the dictionary
                for (int i = 0; i < labels.Count; i++)
                {
                    if (labels[i] < 0)
                    {
                        continue;
                    }

                    //Create the dictionary for the class
                    sm.ClassDescriptionDictionary.Add(i, (i).ToString());


                    List <int>    dims   = new List <int>();
                    List <double> values = new List <double>();

                    for (int j = 0; j < orderedKeys.Count; j++)
                    {
                        FastaItem       fi           = orderedKeys[j];
                        List <PepQuant> thePepQuants = protPepDict[fi];

                        double theIntensitySum = 0;
                        foreach (PepQuant pq in thePepQuants)
                        {
                            theIntensitySum += pq.MyQuants.Sum(a => a.MarkerIntensities[i]);
                        }

                        if (theIntensitySum > 0)
                        {
                            dims.Add(j + 1);
                            values.Add(theIntensitySum);
                        }
                    }

                    sparseMatrixRow smr = new sparseMatrixRow(i, dims, values);
                    sm.theMatrixInRows.Add(smr);
                }

                PatternLabProject plp = new PatternLabProject(sm, smi, "Isobaric Quant Project");
                plp.Save(sfd.FileName);

                MessageBox.Show("PLP file was saved");
                Console.WriteLine("PLP file was saved.");
            }
        }
Exemple #3
0
        private void buttonGo_Click(object sender, EventArgs e)
        {
            //Verify write permission to directory

            if (!Directory.Exists(textBoxOutputDirectory.Text))
            {
                MessageBox.Show("Please specify a valid output directory");
                return;
            }

            if (!Regex.IsMatch(textBoxIsobaricMasses.Text, "[0-9]+ [0-9]+"))
            {
                MessageBox.Show("Please fill out the masses of the isobaric tags.");
                return;
            }


            if (!PatternTools.pTools.HasWriteAccessToFolder(textBoxOutputDirectory.Text))
            {
                MessageBox.Show("Please specify a valid output directory");
                return;
            }

            //Obtain class labels
            if (textBoxClassLabels.Text.Length == 0)
            {
                MessageBox.Show("Please input the class labels (eg., for iTRAQ 1,2,3,4");
                return;
            }
            List <int> labels = Regex.Split(textBoxClassLabels.Text, " ").Select(a => int.Parse(a)).ToList();

            //Obtain the isobaric masses
            string[] im = Regex.Split(textBoxIsobaricMasses.Text, " ");

            List <double> isobaricMasses = im.Select(a => double.Parse(a)).ToList();

            if (labels.Count != isobaricMasses.Count)
            {
                MessageBox.Show("Please make sure that the class labels and isobaric masses match");
                return;
            }

            buttonGo.Text = "Working...";
            this.Update();

            richTextBoxLog.Clear();


            //--------------------------------------------


            //Get signal from all
            signalAllNormalizationDictionary = new Dictionary <string, double[]>();
            //if (false)
            FileInfo      fi            = new FileInfo(textBoxitraqSEPro.Text);
            bool          extractSignal = false;
            ResultPackage rp            = null;

            if (checkBoxNormalizationChannelSignal.Checked)
            {
                //We should get the MS infor and merge it the the sepro package
                if (fi.Extension.Equals(".sepr"))
                {
                    rp            = ResultPackage.Load(textBoxitraqSEPro.Text);
                    extractSignal = true;
                }

                List <FileInfo> rawFiles = fi.Directory.GetFiles("*.RAW").ToList();

                foreach (FileInfo rawFile in rawFiles)
                {
                    Console.WriteLine("Extracting data for " + rawFile.Name);
                    PatternTools.RawReader.RawReaderParams rParams = new PatternTools.RawReader.RawReaderParams();
                    rParams.ExtractMS1 = false;
                    rParams.ExtractMS2 = true;
                    rParams.ExtractMS3 = false;

                    PatternTools.RawReader.Reader reader = new PatternTools.RawReader.Reader(rParams);

                    List <MSLight> theMS2 = reader.GetSpectra(rawFile.FullName, new List <int>(), false);

                    theMS2.RemoveAll(a => a.Ions == null);

                    double [] totalSignal = new double[isobaricMasses.Count];

                    List <SQTScan> theScans = null;
                    //Update the sepro result package with the signal
                    if (extractSignal)
                    {
                        //Get all the scans from this file
                        string rawName = rawFile.Name.Substring(0, rawFile.Name.Length - 4);
                        theScans = rp.MyProteins.AllSQTScans.FindAll(a => a.FileName.Substring(0, a.FileName.Length - 4).Equals(rawName));
                    }


                    foreach (MSLight ms in theMS2)
                    {
                        double[] thisQuantitation = GetIsobaricSignal(ms.Ions, isobaricMasses);

                        if (extractSignal)
                        {
                            SQTScan scn = theScans.Find(a => a.ScanNumber == ms.ScanNumber);
                            if (scn != null)
                            {
                                scn.MSLight = ms;
                                scn.MSLight.Ions.RemoveAll(a => a.MZ > 400);
                            }
                        }

                        for (int i = 0; i < thisQuantitation.Length; i++)
                        {
                            totalSignal[i] += thisQuantitation[i];
                        }
                    }

                    string theName = rawFile.Name.Substring(0, rawFile.Name.Length - 3);
                    theName += "sqt";

                    signalAllNormalizationDictionary.Add(theName, totalSignal);
                }
            }


            Console.WriteLine("Loading SEPro File");

            if (!File.Exists(textBoxitraqSEPro.Text))
            {
                MessageBox.Show("Unable to find SEPro file");
                return;
            }


            #region Load the spero or pepexplorer file

            theScansToAnalyze = new List <SQTScan>();
            List <FastaItem> theFastaItems = new List <FastaItem>();

            if (fi.Extension.Equals(".sepr"))
            {
                Console.WriteLine("Loading SEPro file");

                if (!extractSignal)
                {
                    rp = ResultPackage.Load(textBoxitraqSEPro.Text);
                }
                rp.MyProteins.AllSQTScans.RemoveAll(a => a.MSLight == null);
                theScansToAnalyze = rp.MyProteins.AllSQTScans;
                Console.WriteLine("Done reading SEPro result");
                theFastaItems = rp.MyProteins.MyProteinList.Select(a => new FastaItem(a.Locus, a.Sequence, a.Description)).ToList();
            }
            else if (fi.Extension.Equals(".mpex"))
            {
                Console.WriteLine("Loading PepExplorer file....");
                PepExplorer2.Result2.ResultPckg2 result = PepExplorer2.Result2.ResultPckg2.DeserializeResultPackage(textBoxitraqSEPro.Text);
                theFastaItems = result.MyFasta;

                theScansToAnalyze = new List <SQTScan>();

                foreach (PepExplorer2.Result2.AlignmentResult al in result.Alignments)
                {
                    foreach (var dnr in al.DeNovoRegistries)
                    {
                        SQTScan sqt = new SQTScan();
                        sqt.ScanNumber      = dnr.ScanNumber;
                        sqt.FileName        = dnr.FileName;
                        sqt.PeptideSequence = dnr.PtmSequence;
                        theScansToAnalyze.Add(sqt);
                    }
                }

                //And now we need to retrieve the mass spectra.  For this, the raw files should be inside the directory containing the mpex file
                List <string> rawFiles = theScansToAnalyze.Select(a => a.FileName).Distinct().ToList();

                for (int i = 0; i < rawFiles.Count; i++)
                {
                    rawFiles[i] = rawFiles[i].Remove(rawFiles[i].Length - 3, 3);
                    rawFiles[i] = rawFiles[i] += "raw";
                }

                foreach (string fn in rawFiles)
                {
                    Console.WriteLine("Retrieving spectra for file: " + fn);
                    ParserUltraLightRAW parser = new ParserUltraLightRAW();

                    string tmpFile = fn.Substring(0, fn.Length - 3);

                    List <SQTScan> scansForThisFile = theScansToAnalyze.FindAll(a => Regex.IsMatch(tmpFile, a.FileName.Substring(0, a.FileName.Length - 3), RegexOptions.IgnoreCase)).ToList();

                    List <int> scnNumbers = scansForThisFile.Select(a => a.ScanNumber).ToList();

                    FileInfo theInputFile = new FileInfo(textBoxitraqSEPro.Text);

                    List <MSUltraLight> theSpectra = parser.ParseFile(theInputFile.DirectoryName + "/" + fn, -1, 2, scnNumbers);

                    foreach (SQTScan sqt in scansForThisFile)
                    {
                        MSUltraLight spec = theSpectra.Find(a => a.ScanNumber == sqt.ScanNumber);
                        sqt.MSLight           = new MSLight();
                        sqt.MSLight.MZ        = spec.Ions.Select(a => (double)a.Item1).ToList();
                        sqt.MSLight.Intensity = spec.Ions.Select(a => (double)a.Item2).ToList();
                    }

                    Console.WriteLine("\tDone processing this file.");
                }
            }
            else
            {
                throw new Exception("This file format is not supported.");
            }

            #endregion


            //Obtaining multiplexed spectra
            SEProQ.IsobaricQuant.YadaMultiplexCorrection.YMC ymc = null;
            if (textBoxCorrectedYadaDirectory.Text.Length > 0)
            {
                Console.WriteLine("Reading Yada results");
                ymc = new IsobaricQuant.YadaMultiplexCorrection.YMC(new DirectoryInfo(textBoxCorrectedYadaDirectory.Text));
                Console.WriteLine("Done loading Yada results");
            }

            //Remove multiplexed spectra from sepro results
            if (textBoxCorrectedYadaDirectory.Text.Length > 0)
            {
                int removedCounter = 0;

                foreach (KeyValuePair <string, List <int> > kvp in ymc.fileNameScanNumberMultiplexDictionary)
                {
                    Console.WriteLine("Removing multiplexed spectra for file :: " + kvp.Key);
                    richTextBoxLog.AppendText("Removing multiplexed spectra for file :: " + kvp.Key + "\n");

                    string cleanName = kvp.Key.Substring(0, kvp.Key.Length - 4);
                    cleanName += ".sqt";
                    foreach (int scnNo in kvp.Value)
                    {
                        int index = theScansToAnalyze.FindIndex(a => a.ScanNumber == scnNo && a.FileName.Equals(cleanName));
                        if (index >= 0)
                        {
                            Console.Write(theScansToAnalyze[index].ScanNumber + " ");
                            richTextBoxLog.AppendText(theScansToAnalyze[index].ScanNumber + " ");

                            removedCounter++;
                            theScansToAnalyze.RemoveAt(index);
                        }
                    }

                    Console.WriteLine("\n");
                    richTextBoxLog.AppendText("\n");
                }

                Console.WriteLine("Done removing multiplexed spectra :: " + removedCounter);
            }


            PatternTools.CSML.Matrix correctionMatrix = new PatternTools.CSML.Matrix();
            if (checkBoxApplyPurityCorrection.Checked)
            {
                List <List <double> > correctionData = GetPurityCorrectionsFromForm();
                correctionMatrix = IsobaricQuant.IsobaricImpurityCorrection.GenerateInverseCorrectionMatrix(correctionData);
            }


            //--------------------------------------------------------------------------------------------------------------------

            //Prepare normalization Dictionary
            signalIdentifiedNormalizationDictionary = new Dictionary <string, double[]>();

            List <string> fileNames = theScansToAnalyze.Select(a => a.FileName).Distinct().ToList();

            foreach (string fileName in fileNames)
            {
                signalIdentifiedNormalizationDictionary.Add(fileName, new double[isobaricMasses.Count]);
            }
            //-------------------------------------



            //If necessary, correct for impurity and feed global signal dictionary
            foreach (SQTScan scn in theScansToAnalyze)
            {
                double[] thisQuantitation = GetIsobaricSignal(scn.MSLight.Ions, isobaricMasses);

                double maxSignal = thisQuantitation.Max();

                //We can only correct for signal for those that have quantitation values in all places
                if (checkBoxApplyPurityCorrection.Checked && (thisQuantitation.Count(a => a > maxSignal * (double)numericUpDownIonCountThreshold.Value) == isobaricMasses.Count))
                {
                    thisQuantitation = IsobaricQuant.IsobaricImpurityCorrection.CorrectForSignal(correctionMatrix, thisQuantitation).ToArray();
                }

                if (checkBoxNormalizationChannelSignal.Checked)
                {
                    for (int k = 0; k < thisQuantitation.Length; k++)
                    {
                        signalIdentifiedNormalizationDictionary[scn.FileName][k] += thisQuantitation[k];
                    }
                }

                scn.Quantitation = new List <List <double> >()
                {
                    thisQuantitation.ToList()
                };
            }

            //And now normalize -------------------

            if (checkBoxNormalizationChannelSignal.Checked)
            {
                Console.WriteLine("Performing channel signal normalization for " + theScansToAnalyze.Count + " scans.");

                foreach (SQTScan scn2 in theScansToAnalyze)
                {
                    for (int m = 0; m < isobaricMasses.Count; m++)
                    {
                        scn2.Quantitation[0][m] /= signalIdentifiedNormalizationDictionary[scn2.FileName][m];
                    }

                    if (scn2.Quantitation[0].Contains(double.NaN))
                    {
                        Console.WriteLine("Problems on signal of scan " + scn2.FileNameWithScanNumberAndChargeState);
                    }
                }
            }

            comboBoxSelectFileForGraphs.Items.Clear();
            foreach (string file in signalIdentifiedNormalizationDictionary.Keys.ToList())
            {
                comboBoxSelectFileForGraphs.Items.Add(file);
            }


            tabControlMain.SelectedIndex = 1;


            if (radioButtonAnalysisPeptideReport.Checked)
            {
                //Peptide Analysis

                //Write Peptide Analysis
                StreamWriter sw = new StreamWriter(textBoxOutputDirectory.Text + "/" + "PeptideQuantitationReport.txt");

                //Eliminate problematic quants
                int removed = theScansToAnalyze.RemoveAll(a => Object.ReferenceEquals(a.Quantitation, null));
                Console.WriteLine("Problematic scans removed: " + removed);

                var pepDic = from scn in theScansToAnalyze
                             group scn by scn.PeptideSequenceCleaned


                             into groupedSequences
                             select new { PeptideSequence = groupedSequences.Key, TheScans = groupedSequences.ToList() };

                foreach (var pep in pepDic)
                {
                    sw.WriteLine("Peptide:" + pep.PeptideSequence + "\tSpecCounts:" + pep.TheScans.Count);

                    foreach (SQTScan sqt in pep.TheScans)
                    {
                        sw.WriteLine(sqt.FileNameWithScanNumberAndChargeState + "\t" + string.Join("\t", sqt.Quantitation[0]));
                    }
                }


                //And now write the Fasta
                sw.WriteLine("#Fasta Items");
                foreach (FastaItem fastaItem in theFastaItems)
                {
                    sw.WriteLine(">" + fastaItem.SequenceIdentifier + " " + fastaItem.Description);
                    sw.WriteLine(fastaItem.Sequence);
                }

                sw.Close();
            }
            else
            {
                rp = ResultPackage.Load(textBoxitraqSEPro.Text);

                //Peptide Level
                if (true)
                {
                    PatternTools.SparseMatrixIndexParserV2 ip = new SparseMatrixIndexParserV2();
                    List <int>           allDims  = new List <int>();
                    List <PeptideResult> peptides = rp.MyProteins.MyPeptideList;

                    if (checkBoxOnlyUniquePeptides.Checked)
                    {
                        int removedPeptides = peptides.RemoveAll(a => a.MyMapableProteins.Count > 1);
                        Console.WriteLine("Removing {0} peptides for not being unique.", removedPeptides);
                    }

                    for (int i = 0; i < peptides.Count; i++)
                    {
                        SparseMatrixIndexParserV2.Index index = new SparseMatrixIndexParserV2.Index();
                        index.Name        = peptides[i].PeptideSequence;
                        index.Description = string.Join(" ", peptides[i].MyMapableProteins);
                        index.ID          = i;

                        ip.Add(index, true);
                        allDims.Add(i);
                    }

                    SparseMatrix sm = new SparseMatrix();

                    List <int> dims = ip.allIDs();


                    for (int l = 0; l < labels.Count; l++)
                    {
                        if (labels[l] < 0)
                        {
                            continue;
                        }

                        sparseMatrixRow smr    = new sparseMatrixRow(labels[l]);
                        List <double>   values = new List <double>(dims.Count);

                        List <int> dimsWithValues = new List <int>();

                        foreach (int d in dims)
                        {
                            List <SQTScan> scns = peptides[d].MyScans.FindAll(a => !object.ReferenceEquals(a.Quantitation, null));

                            if (scns.Count > 0)
                            {
                                double signalSum = scns.FindAll(a => !double.IsNaN(a.Quantitation[0][l])).Sum(a => a.Quantitation[0][l]);
                                values.Add(signalSum);
                                dimsWithValues.Add(d);
                            }
                        }


                        smr.Dims     = dimsWithValues;
                        smr.Values   = values;
                        smr.FileName = isobaricMasses[l].ToString();

                        sm.addRow(smr);
                    }

                    PatternLabProject plp = new PatternLabProject(sm, ip, "IsobaricQuant");
                    plp.Save(textBoxOutputDirectory.Text + "/MyPatternLabProjectPeptides.plp");
                }

                //Protein Level
                if (true)
                {
                    //Generate Index
                    PatternTools.SparseMatrixIndexParserV2 ip = new SparseMatrixIndexParserV2();

                    List <MyProtein> theProteins = rp.MyProteins.MyProteinList;

                    if (checkBoxOnlyUniquePeptides.Checked)
                    {
                        int removedProteins = theProteins.RemoveAll(a => !a.PeptideResults.Exists(b => b.NoMyMapableProteins == 1));
                        Console.WriteLine("{0} removed proteins for not having unique peptides", removedProteins);
                    }

                    for (int i = 0; i < theProteins.Count; i++)
                    {
                        SparseMatrixIndexParserV2.Index index = new SparseMatrixIndexParserV2.Index();
                        index.ID          = i;
                        index.Name        = theProteins[i].Locus;
                        index.Description = theProteins[i].Description;

                        ip.Add(index, false);
                    }

                    //SparseMatrix
                    SparseMatrix sm = new SparseMatrix();

                    List <int> dims = ip.allIDs();

                    for (int l = 0; l < labels.Count; l++)
                    {
                        if (labels[l] < 0)
                        {
                            continue;
                        }

                        if (!sm.ClassDescriptionDictionary.ContainsKey(labels[l]))
                        {
                            sm.ClassDescriptionDictionary.Add(labels[l], labels[l].ToString());
                        }

                        sparseMatrixRow smr    = new sparseMatrixRow(labels[l]);
                        List <double>   values = new List <double>(dims.Count);

                        List <int> dimsToInclude = new List <int>();

                        foreach (int d in dims)
                        {
                            double signalSum = 0;

                            List <PeptideResult> thePeptides = theProteins[d].PeptideResults;

                            if (checkBoxOnlyUniquePeptides.Checked)
                            {
                                thePeptides.RemoveAll(a => a.MyMapableProteins.Count > 1);
                            }

                            foreach (PeptideResult pr in thePeptides)
                            {
                                List <SQTScan> scns = pr.MyScans.FindAll(a => !object.ReferenceEquals(a.Quantitation, null));

                                foreach (SQTScan sqt in scns)
                                {
                                    if (!double.IsNaN(sqt.Quantitation[0][l]) && !double.IsInfinity(sqt.Quantitation[0][l]))
                                    {
                                        signalSum += sqt.Quantitation[0][l];
                                    }
                                }
                            }

                            if (signalSum > 0)
                            {
                                dimsToInclude.Add(d);
                                values.Add(signalSum);
                            }
                            else
                            {
                                Console.WriteLine("No signal found for " + theProteins[d].Locus + " on marker " + l);
                            }
                        }

                        smr.Dims     = dims;
                        smr.Values   = values;
                        smr.FileName = isobaricMasses[l].ToString();

                        sm.addRow(smr);
                    }


                    PatternLabProject plp = new PatternLabProject(sm, ip, "IsobaricQuant");
                    plp.Save(textBoxOutputDirectory.Text + "/MyPatternLabProjectProteins.plp");
                }
            }

            comboBoxSelectFileForGraphs.Enabled = true;
            tabControlMain.SelectedIndex        = 2;
            Console.WriteLine("Done");
            buttonGo.Text = "Generate Report";
        }