public SingleNucleotidePolymorphism GetNotGsnapMismatch(string querySequence)
        {
            if (this.NumberOfMismatch == 0)
            {
                return(null);
            }

            var isPositiveStrand = this.Strand == '+';
            var m = mismatch.Match(this.MismatchPositions);

            if (!m.Success)
            {
                return(null);
            }

            var seq         = isPositiveStrand ? querySequence : SequenceUtils.GetReversedSequence(querySequence);
            var pos         = int.Parse(m.Groups[1].Value);
            var detectedChr = seq[pos];

            var chr = m.Groups[2].Value.First();

            chr = isPositiveStrand ? chr : SequenceUtils.GetComplementAllele(chr);

            return(new SingleNucleotidePolymorphism(pos, chr, detectedChr));
        }
Ejemplo n.º 2
0
 public Sequence GetReversedSequence(int index, Sequence seq)
 {
     if (options.DecoyType == DecoyType.Index)
     {
         return(SequenceUtils.GetReversedSequence(seq.SeqString, index));
     }
     else
     {
         var    description = options.DecoyKey + " " + seq.Description;
         var    sequence = SequenceUtils.GetReversedSequence(seq.SeqString);
         string prefix = string.Empty, oldname;
         if (options.DecoyType == DecoyType.Middle)
         {
             oldname = seq.Name.StringAfter("|");
             if (oldname.Equals(seq.Name))
             {
                 oldname = seq.Name.StringAfter(":");
                 if (!oldname.Equals(seq.Name))
                 {
                     prefix = seq.Name.StringBefore(":") + ":";
                 }
             }
             else
             {
                 prefix = seq.Name.StringBefore("|") + "|";
             }
         }
         else
         {
             oldname = seq.Name;
         }
         var newname = prefix + options.DecoyKey + "_" + oldname;
         return(new Sequence(newname + " " + description, sequence));
     }
 }
        public override IEnumerable <string> Process()
        {
            var format = new MascotPeptideTextFormat();

            Progress.SetMessage("reading peptide-spectra-matches from " + options.PeptideFile + " ...");
            var spectra = format.ReadFromFile(options.PeptideFile);
            var seqMap  = new Dictionary <string, IIdentifiedPeptide>();

            foreach (var spec in spectra)
            {
                seqMap[spec.Peptide.PureSequence] = spec.Peptide;
            }

            var aas = (from c in new Aminoacids().GetVisibleAminoacids()
                       where c != 'I'
                       select c.ToString()).Merge("");

            var ff = new FastaFormat();

            Progress.SetMessage("inserting amino acid ...");
            using (var sw = new StreamWriter(options.OutputFile))
            {
                sw.WriteLine(File.ReadAllText(options.DatabaseFile));

                var seqs           = seqMap.Keys.OrderBy(m => m).ToArray();
                var reversed_index = 1000000;
                foreach (var seq in seqs)
                {
                    for (int i = 0; i < seq.Length; i++)
                    {
                        for (int j = 0; j < aas.Length; j++)
                        {
                            var newsequence = seq.Insert(i, aas[j].ToString());
                            var newref      = string.Format("INS_{0}_{1}{2} Insertion of {3}", seq, i, aas[j], seqMap[seq].Proteins.Merge("/"));
                            var newseq      = new Sequence(newref, newsequence);
                            ff.WriteSequence(sw, newseq);

                            if (options.GenerateReversedPeptide)
                            {
                                var revsequence = SequenceUtils.GetReversedSequence(newsequence);
                                var revref      = string.Format("REVERSED_{0}", reversed_index++);
                                var revseq      = new Sequence(revref, revsequence);
                                ff.WriteSequence(sw, revseq);
                            }
                        }
                    }
                }
            }

            return(new[] { options.OutputFile });
        }
Ejemplo n.º 4
0
        private void ProcessFile(ref int index, StreamWriter sw, string fastaFile, bool isContaminant)
        {
            FastaFormat ff = new FastaFormat();

            using (StreamReader sr = new StreamReader(fastaFile))
            {
                Progress.SetRange(0, sr.BaseStream.Length);

                Sequence seq;
                while ((seq = ff.ReadSequence(sr)) != null)
                {
                    Progress.SetPosition(sr.BaseStream.Position);

                    if (isContaminant)
                    {
                        if (!seq.Reference.StartsWith("CON_"))
                        {
                            seq.Reference = "CON_" + seq.Reference;
                        }
                    }

                    if (options.ReversedOnly)
                    {
                        ff.WriteSequence(sw, seq);
                    }

                    if (options.IsPseudoAminoacid)
                    {
                        options.PseudoAminoacidBuilder.Build(seq);
                    }

                    index++;
                    Sequence reversedSeq = SequenceUtils.GetReversedSequence(seq.SeqString, index);

                    ff.WriteSequence(sw, reversedSeq);
                }
            }
        }
Ejemplo n.º 5
0
        public Dictionary <int, IIdentifiedProtein> ParseProteinMap(string fileName, bool isDecoy)
        {
            var            suffix = isDecoy ? "_decoy" : "";
            SQLiteDBHelper sqlite = new SQLiteDBHelper(fileName);

            var result = new Dictionary <int, IIdentifiedProtein>();

            string sqlProtein    = string.Format("select ps.ProteinID, pa.Description, pro.Sequence, ps.ProteinScore, ps.Coverage from ProteinAnnotations as pa, Proteins as pro, ProteinScores{0} as ps where pro.ProteinID=pa.ProteinID and pro.ProteinID=ps.ProteinID", suffix);
            var    proteinReader = sqlite.ExecuteReader(sqlProtein, null);

            Progress.SetMessage("Parsing proteins ...");
            while (proteinReader.Read())
            {
                var protein = new IdentifiedProtein();
                var proid   = proteinReader.GetInt32(0);
                var des     = proteinReader.GetString(1);
                if (des.Length > 0 && des[0] == '>')
                {
                    des = des.Substring(1);
                }
                protein.Reference = des;
                protein.Sequence  = proteinReader.GetString(2);
                protein.Score     = proteinReader.GetDouble(3);
                protein.Coverage  = proteinReader.GetDouble(4);
                result[proid]     = protein;
            }

            if (isDecoy)
            {
                foreach (var v in result.Values)
                {
                    v.Sequence  = SequenceUtils.GetReversedSequence(v.Sequence);
                    v.Reference = GetReversedReference(v.Reference);
                }
            }

            return(result);
        }
        public override IEnumerable <string> Process()
        {
            var spectra = new MaxQuantPeptideTextReader().ReadFromFile(options.SiteFile);

            spectra.RemoveAll(m => m.DeltaScore < options.MinDeltaScore || m.Probability < options.MinProbability);
            spectra = (from g in spectra.GroupBy(m => m.Query.FileScan.ShortFileName)
                       select g.OrderBy(l => l.Score).Last()).ToList();

            if (options.IsSILAC)
            {
                var spmap = spectra.ToDictionary(m => m.Query.FileScan.ShortFileName);

                var existModificationChar = (from sp in spectra
                                             from c in sp.Sequence
                                             where !char.IsLetter(c)
                                             select c).Distinct().Count();

                Dictionary <char, char> labelChars = new Dictionary <char, char>();
                foreach (var c in options.SILACAminoacids)
                {
                    labelChars[c] = ModificationConsts.MODIFICATION_CHAR[++existModificationChar];
                }

                using (var sr = new StreamReader(options.MSMSFile))
                {
                    var    headers   = sr.ReadLine().Split('\t');
                    var    rawIndex  = Array.IndexOf(headers, "Raw file");
                    var    scanIndex = Array.IndexOf(headers, "Scan number");
                    string line;
                    while ((line = sr.ReadLine()) != null)
                    {
                        if (string.IsNullOrWhiteSpace(line))
                        {
                            break;
                        }

                        var parts = line.Split('\t');
                        var raw   = parts[rawIndex];
                        var scan  = int.Parse(parts[scanIndex]);
                        var sf    = new SequestFilename(raw, scan, scan, 0, "");
                        var name  = sf.ShortFileName;

                        IIdentifiedSpectrum sp;
                        if (spmap.TryGetValue(name, out sp))
                        {
                            foreach (var pep in sp.Peptides)
                            {
                                var           seq = pep.Sequence;
                                StringBuilder sb  = new StringBuilder();
                                for (int i = seq.Length - 1; i >= 0; i--)
                                {
                                    char heavyChar;
                                    if (labelChars.TryGetValue(seq[i], out heavyChar))
                                    {
                                        sb.Append(heavyChar);
                                    }
                                    sb.Append(seq[i]);
                                }
                                pep.Sequence = SequenceUtils.GetReversedSequence(sb.ToString());
                            }
                        }
                    }
                }
            }



            string resultFilename = options.SiteFile + ".peptides";

            new MascotPeptideTextFormat("\t\"File, Scan(s)\"\tSequence\tCharge\tScore\tDeltaScore\tExpectValue\tPValue\tModification").WriteToFile(resultFilename, spectra);

            return(new[] { resultFilename });
        }
        public override IEnumerable <string> Process(string fileName)
        {
            var parser = new MsfDatabaseParser(SearchEngineType.SEQUEST);
            var seqs   = parser.ParseProteinSequences(fileName);

            SQLiteDBHelper sqlite = new SQLiteDBHelper(fileName);

            var result   = new List <Sequence>();
            var aaReader = sqlite.ExecuteReader("select count(*) from peptides_decoy", null);

            if (aaReader.Read())
            {
                if (aaReader.GetInt32(0) > 0) // there are decoy database
                {
                    foreach (var seq in seqs)
                    {
                        result.Add(seq);
                        var revseq = new Sequence(MsfDatabaseParser.GetReversedReference(seq.Reference), SequenceUtils.GetReversedSequence(seq.SeqString));
                        result.Add(revseq);
                    }
                }
            }

            if (result.Count == 0)
            {
                result = seqs;
            }

            var fastafile = fileName + ".fasta";

            using (var sw = new StreamWriter(fastafile))
            {
                var ff = new FastaFormat();
                foreach (var seq in result)
                {
                    ff.WriteSequence(sw, seq);
                }
            }

            return(new[] { fastafile });
        }