예제 #1
0
        private void buttonWork_Click(object sender, EventArgs e)
        {
            buttonWork.Text = "Busy!";

            //First, lets get the input DB in RAM
            PatternTools.FastaParser.FastaFileParser fp = new PatternTools.FastaParser.FastaFileParser();
            fp.ParseFile(new StreamReader(textBoxInputFile.Text), false, PatternTools.FastaParser.DBTypes.IDSpaceDescription);

            if (checkBoxQualityFilters.Checked)
            {
                int itemsRemovedMinNo = fp.MyItems.RemoveAll(a => a.Sequence.Length <= (int)numericUpDownMinNoNT.Value);
                labelNoNt.Text = itemsRemovedMinNo.ToString();
            }
            else
            {
                labelNoNt.Text = "0";
            }

            this.Update();


            int itemsRemovedStopCoddons = 0;


            StreamWriter sw = new StreamWriter(textBoxOutput.Text);

            PatternTools.DNA2Protein translator = new PatternTools.DNA2Protein();

            foreach (FastaItem fi in fp.MyItems)
            {
                List <string> translatedFrames = new List <string>();

                if (radioButton3Frame.Checked)
                {
                    translatedFrames = translator.Translate3(fi.Sequence);
                }
                else
                {
                    translatedFrames = translator.Translate6(fi.Sequence);
                }

                for (int i = 0; i < translatedFrames.Count; i++)
                {
                    if (checkBoxQualityFilters.Checked)
                    {
                        int noStopCodons = Regex.Matches(translatedFrames[i], Regex.Escape("*")).Count;
                        Console.WriteLine(fi.SequenceIdentifier + " Frame{0} StopCodons: {1}", i, noStopCodons);

                        if (noStopCodons >= (int)numericUpDownMaxNoStopCodons.Value)
                        {
                            itemsRemovedStopCoddons++;
                            labelNoStopCodons.Text = itemsRemovedStopCoddons.ToString();
                            this.Update();
                        }
                    }
                    else
                    {
                        sw.WriteLine(fi.SequenceIdentifier + "_" + i);
                        sw.WriteLine(translatedFrames[i]);
                    }
                }
            }

            sw.Close();
            Console.WriteLine("Done");

            buttonWork.Text = "Go!";
        }
예제 #2
0
        static void Main(string[] args)
        {
            int    minNoPeptides      = 2;
            double minAcceptableXCorr = 1.7;
            string sequenceDB         = @"C:\Users\pcarvalho\Desktop\XIC\BSA\PFUsmall.T-R";
            string massSpectraFile    = @"C:\Users\pcarvalho\Desktop\XIC\BSA\1\20111024_BSA_40fmol_01.RAW";


            CometWrapper.cometParams cp = new CometWrapper.cometParams();

            cp.ClearMZRangeMax = 0;
            cp.ClearMZRangeMin = 0;

            cp.Enzyme               = 1; //Trypsin
            cp.EnzymeSpecificity    = 2;
            cp.FragmentBinOffset    = 0.4;
            cp.FragmentBinTolerance = 1.0005;
            cp.IonsA  = false;
            cp.IonsB  = true;
            cp.IonsC  = false;
            cp.IonsNL = true;
            cp.IonsX  = false;
            cp.IonsY  = true;
            cp.IonsZ  = false;

            cp.MaxVariableModsPerPeptide = 3;
            cp.MissedCleavages           = 4;


            Modification m1 = new Modification("Carb", (decimal)57.02146, "C");

            //Modification m2 = new Modification("M", (decimal)15.9949, "M");
            //m2.isDiff = true;

            //For cross-linker dead end, include as a variable mod, and then as a variable mod for n-terminal

            cp.MyModificationItems = new List <CometWrapper.Modification>()
            {
                m1
            };

            cp.PrecursorMassTolerance = 40;
            cp.SearchMassRangeMax     = 5500;
            cp.SearchMassRangeMin     = 550;
            cp.SequenceDatabase       = sequenceDB;
            cp.TheoreticalFragIons    = 1;

            List <SQTScan2> myResults = CometWrapper.RemoteCall.CallComet(cp, massSpectraFile);



            int removedPrimary = myResults.RemoveAll(a => a.Matches[0].PrimaryScore < minAcceptableXCorr);

            var proteinIDs = (from result in myResults
                              from ID id in result.Matches[0].IDs
                              group id by id.Locus into myLocusGroups
                              select new { ID = myLocusGroups.Key, Count = myLocusGroups.Count() });

            List <string> peptides = (from result in myResults
                                      select result.Matches[0].PeptideSequence).Distinct().ToList();

            PatternTools.FastaParser.FastaFileParser ffp = new PatternTools.FastaParser.FastaFileParser();
            ffp.ParseFile(new StreamReader(cp.SequenceDatabase), true, PatternTools.FastaParser.DBTypes.IDSpaceDescription);

            List <FastaItem> myFasta = new List <FastaItem>();

            foreach (var prot in proteinIDs)
            {
                if (prot.Count >= minNoPeptides)
                {
                    myFasta.Add(ffp.MyItems.Find(a => a.SequenceIdentifier.Equals(prot.ID)));
                }
            }

            Console.WriteLine("Done");
        }
예제 #3
0
        private void DoJob()
        {
            try
            {
                PatternTools.FastaParser.FastaFileParser fp = new PatternTools.FastaParser.FastaFileParser();
                buttonGoText  = "Working!";
                statusBarText = "Parsing DB...";
                backgroundWorker1.ReportProgress(1);


                Regex myRegex = new Regex(textBoxNameRegex.Text, RegexOptions.Compiled);

                PatternTools.FastaParser.DBTypes dbType;
                if (radioButtonDBNCBI.Checked)
                {
                    dbType = DBTypes.NCBInr;
                }
                else if (radioButtonDBUniprot.Checked)
                {
                    dbType = DBTypes.UniProt;
                }
                else if (radioButtonDBGeneric.Checked)
                {
                    dbType = DBTypes.IDSpaceDescription;
                }
                else if (radioButtonNextProt.Checked)
                {
                    dbType = DBTypes.NeXtProt;
                }
                else
                {
                    dbType = PatternTools.FastaParser.DBTypes.IPI;
                }

                if (includeContaminants)
                {
                    byte[]       byteArray      = Encoding.ASCII.GetBytes(NCBIExtractor.Properties.Resources.ContaminantDB);
                    StreamReader sContaminantsr = new StreamReader(new MemoryStream(byteArray));

                    fp.ParseFile(
                        sContaminantsr,
                        false,
                        PatternTools.FastaParser.DBTypes.Contaminant);
                }

                foreach (DataGridViewRow r in dataGridViewInputDBs.Rows)
                {
                    PatternTools.FastaParser.FastaFileParser fpTmp = new PatternTools.FastaParser.FastaFileParser();

                    string fileName = r.Cells[0].Value.ToString();

                    StreamReader srDB = new StreamReader(fileName);

                    fpTmp.ParseFile(
                        srDB,
                        false,
                        dbType,
                        true,
                        new Regex(textBoxNameRegex.Text, RegexOptions.IgnoreCase));

                    srDB.Close();

                    fpTmp.MyItems.RemoveAll(a => string.IsNullOrEmpty(a.Sequence));

                    fp.MyItems.AddRange(fpTmp.MyItems);
                }

                if (removeSubSequences)
                {
                    if (numericUpDownIdentity.Value == 100)
                    {
                        Console.WriteLine("Removing subset sequences");

                        List <FastaItem> toEliminate = new List <FastaItem>();
                        Console.WriteLine("Searching for subset sequences...");
                        //order by increasing length
                        fp.MyItems.Sort((a, b) => a.Sequence.Length.CompareTo(b.Sequence.Length));

                        for (int i = 0; i < fp.MyItems.Count; i++)
                        {
                            Console.WriteLine("Analyzing for sequence " + i);
                            for (int j = i + 1; i < fp.MyItems.Count; j++)
                            {
                                if (j == fp.MyItems.Count)
                                {
                                    break;
                                }

                                if (fp.MyItems[j].Sequence.Contains(fp.MyItems[i].Sequence))
                                {
                                    toEliminate.Add(fp.MyItems[i]);
                                    Console.WriteLine("subset sequence found");
                                    fp.MyItems[j].Description = fp.MyItems[j].Description + " : " + fp.MyItems[i].SequenceIdentifier + " " + fp.MyItems[i].Description;
                                    break;
                                }
                            }
                        }

                        Console.WriteLine("Sequences eliminated: " + toEliminate.Count);

                        fp.MyItems = fp.MyItems.Except(toEliminate).ToList();
                    }
                    else
                    {
                        MAligner.MCompress.Compress c = new MAligner.MCompress.Compress();
                        c.CompressByIdentity(fp, (double)numericUpDownIdentity.Value / 100.0, null);
                    }
                }

                statusBarText = "Writing DB...";
                backgroundWorker1.ReportProgress(1);

                fp.GenerateSearchDB(outputFile, includeReversed, includeScrambled0, includeScrambled1, radioButtonTPRMR.Checked);

                myFileParserItems = fp.MyItems.Count();
            }
            catch (Exception ee)
            {
                MessageBox.Show(ee.Message);
            }
        }