예제 #1
0
        public void TestTransferTo()
        {
            var aas = new Aminoacids();

            int count;
            var actual = SnpCode.TransferTo(aas['I'], aas['L'], out count);

            Assert.AreEqual(1, count);
            Assert.AreEqual("AUU->CUU ! AUC->CUC ! AUA->UUA ! AUA->CUA", actual);

            actual = SnpCode.TransferTo(aas['I'], aas['.'], out count);
            Assert.AreEqual(int.MaxValue, count);
            Assert.AreEqual(string.Empty, actual);

            actual = SnpCode.TransferTo(aas['.'], aas['I'], out count);
            Assert.AreEqual(int.MaxValue, count);
            Assert.AreEqual(string.Empty, actual);
        }
예제 #2
0
        private string OutputPairedResult(Aminoacids aas, MascotPeptideTextFormat format, Dictionary <string, Sequence> proMap, IClassification <IIdentifiedPeptide> classification, string mutHeader, MascotPeptideTextFormat mutPepFormat, List <List <IGrouping <string, IIdentifiedPeptide> > > pairedMut, string pairedFile)
        {
            bool dbAnnotation = File.Exists(_uniprotXmlFile);

            List <TempResult> tr = new List <TempResult>();

            int resIndex = 1;

            foreach (var res in pairedMut)
            {
                var mutCharges = (from r in res[0]
                                  orderby r.Spectrum.Charge
                                  select r.Spectrum.Charge).Distinct().ToList();

                var pepCounts = GetPepCount(classification, res[0]);

                int  peplabel = 0;
                bool bFound   = false;
                for (int pepIndex = 1; pepIndex < res.Count; pepIndex++)
                {
                    var charges = (from r in res[pepIndex]
                                   orderby r.Spectrum.Charge
                                   select r.Spectrum.Charge).Distinct().ToList();

                    var commonCharges = mutCharges.Intersect(charges).ToList();
                    if (commonCharges.Count == 0)
                    {
                        continue;
                    }

                    if (!bFound)
                    {
                        bFound = true;
                        resIndex++;
                    }

                    peplabel++;
                    var curMutSpectrum = (from r in res[0]
                                          where commonCharges.Contains(r.Spectrum.Charge)
                                          orderby r.Spectrum.Score descending
                                          select r).First();

                    var mutText = mutPepFormat.PeptideFormat.GetString(curMutSpectrum.Spectrum);

                    var curOriginalSpectrum = (from r in res[pepIndex]
                                               where r.Spectrum.Charge == curMutSpectrum.Spectrum.Charge
                                               orderby r.Spectrum.Score descending
                                               select r).First();

                    var oriPureSeq = curOriginalSpectrum.PureSequence;

                    var mutFixSeq     = MutationUtils.ReplaceLToI(curMutSpectrum.Sequence, oriPureSeq);
                    var mutFixPureSeq = PeptideUtils.GetPureSequence(mutFixSeq);

                    int    mutationSite         = -1;
                    string equalsToModification = string.Empty;
                    string rnaediting           = string.Empty;
                    string databaseannotation   = string.Empty;

                    bool isType1 = mutFixPureSeq.Length == oriPureSeq.Length;
                    if (isType1)
                    {
                        MutationUtils.IsMutationOneIL(mutFixPureSeq, oriPureSeq, ref mutationSite);
                        equalsToModification = spTable.GetModification(oriPureSeq[mutationSite], mutFixPureSeq[mutationSite]);
                        SnpCode.IsRnaEditing(aas[oriPureSeq[mutationSite]], aas[mutFixPureSeq[mutationSite]], out rnaediting);
                    }
                    else
                    {
                        mutationSite = Math.Min(mutFixPureSeq.Length, oriPureSeq.Length) - 1;
                    }

                    var pepMutation = MyConvert.Format("{0}{1}{2}", oriPureSeq[mutationSite], mutationSite + 1, mutFixPureSeq[mutationSite]);


                    List <Sequence> seqs = new List <Sequence>();
                    foreach (var p in curOriginalSpectrum.Proteins)
                    {
                        var ac = acParser.GetValue(p);
                        if (!proMap.ContainsKey(ac))
                        {
                            throw new Exception("Cannot find protein " + p + " in sequence database!");
                        }
                        seqs.Add(proMap[ac]);
                    }

                    var proMutations = (from p in curOriginalSpectrum.Proteins
                                        let ac = acParser.GetValue(p)
                                                 let seq = proMap[ac]
                                                           let pos = seq.SeqString.IndexOf(oriPureSeq)
                                                                     let pmu = MyConvert.Format("{0}{1}{2}", oriPureSeq[mutationSite], pos + mutationSite + 1, mutFixPureSeq[mutationSite])
                                                                               select new { ProteinName = p, Mutation = pmu }).ToList();
                    var proMutation = (from pro in proMutations select pro.Mutation).Merge("/");

                    if (isType1 && dbAnnotation)
                    {
                        //sequence variants
                        foreach (var pro in proMutations)
                        {
                            var entry = GetUniprotEntry(pro.ProteinName);
                            if (entry == null)
                            {
                                continue;
                            }

                            foreach (var sv in entry.SequenceVariants)
                            {
                                var mut = string.Format("{0}{1}{2}", sv.Original, sv.Position, sv.Variation);
                                if (pro.Mutation.Equals(mut))
                                {
                                    databaseannotation = string.Format("{0}=SequenceVariant {1}", pro.ProteinName, sv.Description);
                                    break;
                                }
                            }

                            if (databaseannotation != string.Empty)
                            {
                                break;
                            }
                        }

                        //sequence conflicts
                        if (databaseannotation == string.Empty)
                        {
                            foreach (var pro in proMutations)
                            {
                                var entry = GetUniprotEntry(pro.ProteinName);
                                if (entry == null)
                                {
                                    continue;
                                }

                                foreach (var sv in entry.SequenceConflicts)
                                {
                                    if ((sv.BeginPosition != sv.EndPosition) || sv.Original.Length != 1)
                                    {
                                        continue;
                                    }

                                    var mut = string.Format("{0}{1}{2}", sv.Original, sv.BeginPosition, sv.Variation);
                                    if (pro.Mutation.Equals(mut))
                                    {
                                        databaseannotation = string.Format("{0}=SequenceConflict {1}", pro.ProteinName, sv.Description);
                                        break;
                                    }
                                }

                                if (databaseannotation != string.Empty)
                                {
                                    break;
                                }
                            }
                        }
                    }

                    List <string> proRefs = seqs.ConvertAll(m => m.Description).ToList();

                    int mutationCount;
                    var dnaMutation = aas[oriPureSeq[mutationSite]].TransferTo(aas[mutFixPureSeq[mutationSite]], out mutationCount);

                    var oriPepCounts = GetPepCount(classification, res[pepIndex]);

                    var line = string.Format("${0}-{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}\t{10}\t{11}\t{12}\t{13}\t{14}\t{15}\t{16}\t{17}",
                                             resIndex,
                                             peplabel,
                                             mutText,
                                             mutFixSeq,
                                             mutFixPureSeq,
                                             (from p in pepCounts
                                              select p.ToString()).Merge("\t"),
                                             pepMutation,
                                             mutationCount,
                                             dnaMutation,
                                             curOriginalSpectrum.Spectrum.Query.FileScan.ShortFileName,
                                             oriPureSeq,
                                             curOriginalSpectrum.Proteins.Merge("/"),
                                             proRefs.Merge("/"),
                                             (from p in oriPepCounts
                                              select p.ToString()).Merge("\t"),
                                             proMutation,
                                             equalsToModification,
                                             rnaediting,
                                             databaseannotation
                                             );

                    tr.Add(new TempResult()
                    {
                        Line             = line,
                        PepCount         = pepCounts.Sum(),
                        OriginalSequence = oriPureSeq
                    });
                }

                var groups = tr.GroupBy(m => m.OriginalSequence).ToList();
                groups.Sort((m1, m2) => - m1.Max(n => n.PepCount).CompareTo(m2.Max(n => n.PepCount)));

                List <TempResult> lines = new List <TempResult>();
                groups.ForEach(g => lines.AddRange(from l in g
                                                   orderby l.PepCount descending
                                                   select l));

                using (StreamWriter sw = new StreamWriter(pairedFile))
                {
                    string pepCountHeader         = "";
                    string originalPepCountHeader = "";
                    foreach (var key in ClassificationSet.Keys)
                    {
                        pepCountHeader         = pepCountHeader + "\t" + key + "_PepCount";
                        originalPepCountHeader = originalPepCountHeader + "\t" + key + "_OriginalCount";
                    }

                    sw.WriteLine("Index\t" + mutHeader + "\tSequence\tPureSequence" + pepCountHeader + "\tPepMutation\tDNAMutationCount\tDNAMutation\tOriginalFileScan\tOriginalSequence\tOriginalProteins\tOriginalReferences"
                                 + originalPepCountHeader + "\tProMutation\tEqualsToModification\tRNA-Editing\tDatabaseAnnotation");

                    lines.ForEach(m => sw.WriteLine(m.Line));
                }
            }

            var pairedPeptideFile = pairedFile + ".peptides";

            SavePeptidesFile(pairedMut, format, pairedPeptideFile);

            return(pairedPeptideFile);
        }