public override IEnumerable <string> Process()
        {
            var format = new MascotPeptideTextFormat();

            Progress.SetMessage("reading peptide-spectra-matches from " + options.PeptideFile + " ...");
            var spectra = format.ReadFromFile(options.PeptideFile);
            var seqMap  = new Dictionary <string, IIdentifiedPeptide>();

            foreach (var spec in spectra)
            {
                seqMap[spec.Peptide.PureSequence] = spec.Peptide;
            }

            var aas = (from c in new Aminoacids().GetVisibleAminoacids()
                       where c != 'I'
                       select c.ToString()).Merge("");

            var ff = new FastaFormat();

            Progress.SetMessage("inserting amino acid ...");
            using (var sw = new StreamWriter(options.OutputFile))
            {
                sw.WriteLine(File.ReadAllText(options.DatabaseFile));

                var seqs           = seqMap.Keys.OrderBy(m => m).ToArray();
                var reversed_index = 1000000;
                foreach (var seq in seqs)
                {
                    for (int i = 0; i < seq.Length; i++)
                    {
                        for (int j = 0; j < aas.Length; j++)
                        {
                            var newsequence = seq.Insert(i, aas[j].ToString());
                            var newref      = string.Format("INS_{0}_{1}{2} Insertion of {3}", seq, i, aas[j], seqMap[seq].Proteins.Merge("/"));
                            var newseq      = new Sequence(newref, newsequence);
                            ff.WriteSequence(sw, newseq);

                            if (options.GenerateReversedPeptide)
                            {
                                var revsequence = SequenceUtils.GetReversedSequence(newsequence);
                                var revref      = string.Format("REVERSED_{0}", reversed_index++);
                                var revseq      = new Sequence(revref, revsequence);
                                ff.WriteSequence(sw, revseq);
                            }
                        }
                    }
                }
            }

            return(new[] { options.OutputFile });
        }
示例#2
0
        public static void WriteFastaFile(string fastaFilename, IList <IIdentifiedProteinGroup> t, Func <IIdentifiedProteinGroup, bool> validateGroup)
        {
            foreach (var g in t)
            {
                if (validateGroup(g) && g.Count > 0 && (g[0].Sequence == null || g[0].Sequence.Length == 0))
                {
                    return;
                }
            }

            var ff = new FastaFormat();

            using (var sw = new StreamWriter(fastaFilename))
            {
                foreach (IIdentifiedProteinGroup mpg in t)
                {
                    if (validateGroup(mpg))
                    {
                        foreach (IIdentifiedProtein protein in mpg)
                        {
                            ff.WriteSequence(sw, protein.Reference, protein.Sequence);
                        }
                    }
                }
            }
        }
        public override IEnumerable <string> Process(string fileName)
        {
            DatFormat   reader = new DatFormat();
            FastaFormat writer = new FastaFormat();

            string result = FileUtils.ChangeExtension(fileName, ".fasta");

            long fileLength = new FileInfo(fileName).Length;

            using (StreamReader sr = new StreamReader(fileName))
                using (StreamWriter sw = new StreamWriter(result))
                {
                    Progress.SetRange(0, fileLength);

                    Sequence seq;
                    while ((seq = reader.ReadSequence(sr)) != null)
                    {
                        if (Progress.IsCancellationPending())
                        {
                            throw new UserTerminatedException();
                        }

                        Progress.SetPosition(sr.GetCharpos());

                        writer.WriteSequence(sw, seq);
                    }
                }

            return(new string[] { result });
        }
示例#4
0
        public override IEnumerable <string> Process(string fileName)
        {
            string      result = FileUtils.ChangeExtension(fileName, "") + "_" + name + new FileInfo(fileName).Extension;
            FastaFormat format = new FastaFormat();

            Progress.SetMessage("Processing " + fileName);
            using (StreamReader sr = new StreamReader(fileName))
            {
                Progress.SetRange(0, sr.BaseStream.Length);
                using (StreamWriter sw = new StreamWriter(result))
                {
                    Sequence seq;
                    while ((seq = format.ReadSequence(sr)) != null)
                    {
                        if (Progress.IsCancellationPending())
                        {
                            throw new UserTerminatedException();
                        }

                        Progress.SetPosition(sr.BaseStream.Position);
                        if (nameRegex.Match(seq.Name).Success)
                        {
                            format.WriteSequence(sw, seq);
                        }
                    }
                }
            }

            return(new string[] { result });
        }
 private void WriteFasta(StreamWriter swFasta, Dictionary <string, Sequence> seqMap, string protein)
 {
     if (swFasta != null)
     {
         string[] parts = Regex.Split(protein, @"\s+");
         ff.WriteSequence(swFasta, seqMap[parts[1].Trim()]);
     }
 }
示例#6
0
        private void ProcessFile(ref int index, StreamWriter sw, string fastaFile, bool isContaminant)
        {
            FastaFormat ff = new FastaFormat();

            using (StreamReader sr = new StreamReader(fastaFile))
            {
                Progress.SetRange(0, sr.BaseStream.Length);

                Sequence seq;
                while ((seq = ff.ReadSequence(sr)) != null)
                {
                    Progress.SetPosition(sr.BaseStream.Position);

                    if (isContaminant)
                    {
                        if (!seq.Reference.StartsWith("CON_"))
                        {
                            seq.Reference = "CON_" + seq.Reference;
                        }
                    }

                    if (!options.ReversedOnly)
                    {
                        ff.WriteSequence(sw, seq);
                    }

                    if (options.IsPseudoAminoacid)
                    {
                        options.PseudoAminoacidBuilder.Build(seq);
                    }

                    index++;
                    Sequence reversedSeq = GetReversedSequence(index, seq);

                    ff.WriteSequence(sw, reversedSeq);
                }
            }
        }
        public override IEnumerable <string> Process(string fileName)
        {
            var parser = new MsfDatabaseParser(SearchEngineType.SEQUEST);
            var seqs   = parser.ParseProteinSequences(fileName);

            SQLiteDBHelper sqlite = new SQLiteDBHelper(fileName);

            var result   = new List <Sequence>();
            var aaReader = sqlite.ExecuteReader("select count(*) from peptides_decoy", null);

            if (aaReader.Read())
            {
                if (aaReader.GetInt32(0) > 0) // there are decoy database
                {
                    foreach (var seq in seqs)
                    {
                        result.Add(seq);
                        var revseq = new Sequence(MsfDatabaseParser.GetReversedReference(seq.Reference), SequenceUtils.GetReversedSequence(seq.SeqString));
                        result.Add(revseq);
                    }
                }
            }

            if (result.Count == 0)
            {
                result = seqs;
            }

            var fastafile = fileName + ".fasta";

            using (var sw = new StreamWriter(fastafile))
            {
                var ff = new FastaFormat();
                foreach (var seq in result)
                {
                    ff.WriteSequence(sw, seq);
                }
            }

            return(new[] { fastafile });
        }
示例#8
0
        public override IEnumerable <string> Process(string fileName)
        {
            FastaFormat ff     = new FastaFormat();
            var         result = Path.ChangeExtension(fileName, ".dM.fasta");

            using (StreamReader sr = new StreamReader(fileName))
                using (StreamWriter sw = new StreamWriter(result))
                {
                    Sequence seq;
                    Progress.SetRange(1, sr.BaseStream.Length);
                    while ((seq = ff.ReadSequence(sr)) != null)
                    {
                        Progress.SetPosition(StreamUtils.GetCharpos(sr));
                        if (seq.SeqString.StartsWith("M"))
                        {
                            seq.SeqString = seq.SeqString.Substring(1);
                            seq.Reference = seq.Name + " N-terminal-M-Removed " + seq.Description;
                        }
                        ff.WriteSequence(sw, seq);
                    }
                }

            return(new string[] { result });
        }
示例#9
0
        public override IEnumerable <string> Process(string fileName)
        {
            var result = new List <string>();

            string[] acLines = File.ReadAllLines(fileName);

            var acs = new HashSet <string>();

            foreach (var acline in acLines)
            {
                string ac;
                if (!parser.TryParse(acline, out ac))
                {
                    ac = acline;
                }

                acs.Add(ac);
            }

            var findAcs = new HashSet <string>();

            var resultFile = fileName + ".fasta";

            result.Add(resultFile);

            var ff = new FastaFormat();

            using (StreamWriter sw = new StreamWriter(resultFile))
                using (StreamReader sr = new StreamReader(database))
                {
                    Progress.SetRange(0, sr.BaseStream.Length);

                    Sequence seq;
                    while ((seq = ff.ReadSequence(sr)) != null)
                    {
                        Progress.SetPosition(sr.BaseStream.Position);

                        string curAc;
                        if (!parser.TryParse(seq.Name, out curAc))
                        {
                            curAc = seq.Name;
                        }

                        if (acs.Contains(curAc))
                        {
                            findAcs.Add(curAc);
                            if (this.replaceName)
                            {
                                seq.Reference = curAc;
                            }
                            ff.WriteSequence(sw, seq);
                        }
                    }
                }

            acs.ExceptWith(findAcs);

            var missFile = fileName + ".miss";

            if (acs.Count > 0)
            {
                using (StreamWriter sw = new StreamWriter(missFile))
                {
                    foreach (var ac in acs)
                    {
                        sw.WriteLine(ac);
                    }
                }
                result.Add(missFile);
            }
            else if (File.Exists(missFile))
            {
                File.Delete(missFile);
            }

            return(result);
        }
示例#10
0
        public override IEnumerable <string> Process()
        {
            var srItems = SequenceRegionUtils.GetSequenceRegions(options.InputFile).Where(m => options.AcceptName(m.Name)).ToList();

            srItems = (from sr in srItems.GroupBy(m => m.Name)
                       select sr.First()).ToList();

            var keepChrInName = options.KeepChrInName && srItems.Any(m => m.Name.StartsWith("chr"));

            if (!keepChrInName)
            {
                srItems.ForEach(m => m.Seqname = m.Seqname.StringAfter("chr"));
            }

            var srMap = srItems.ToGroupDictionary(m => m.Seqname);

            var ff = new FastaFormat(int.MaxValue);

            using (StreamWriter sw = new StreamWriter(options.OutputFile))
            {
                using (StreamReader sr = new StreamReader(options.GenomeFastaFile))
                {
                    Sequence seq;
                    while ((seq = ff.ReadSequence(sr)) != null)
                    {
                        Progress.SetMessage("processing " + seq.Name + " ...");
                        var name = seq.Name;
                        if (!keepChrInName)
                        {
                            name = name.StringAfter("chr");
                        }

                        List <GtfItem> items;

                        if (!srMap.TryGetValue(name, out items))
                        {
                            if (name.Equals("M"))
                            {
                                name = "MT";
                                srMap.TryGetValue(name, out items);
                            }
                            else if (name.Equals("chrM"))
                            {
                                name = "chrMT";
                                srMap.TryGetValue(name, out items);
                            }
                            else if (name.Equals("MT"))
                            {
                                name = "M";
                                srMap.TryGetValue(name, out items);
                            }
                            else if (name.Equals("chrMT"))
                            {
                                name = "chrM";
                                srMap.TryGetValue(name, out items);
                            }
                        }

                        if (items != null)
                        {
                            Progress.SetMessage("  there are {0} entries in {1} ...", items.Count, name);
                            foreach (var item in items)
                            {
                                if (item.Start - 1 + item.Length >= seq.SeqString.Length)
                                {
                                    throw new Exception(string.Format("{0} exceed chromosome {1} length {2}", item, name, seq.SeqString.Length));
                                }
                                var newseq = seq.SeqString.Substring((int)item.Start - 1, (int)item.Length);
                                if (item.Strand == '-')
                                {
                                    newseq = SequenceUtils.GetReverseComplementedSequence(newseq);
                                }
                                newseq = newseq.ToUpper();

                                var newname = string.Format("{0} {1} {2}", item.Name, item.GetLocationWithoutStrand(), item.Strand);
                                var entry   = new Sequence(newname, newseq);

                                ff.WriteSequence(sw, entry);
                            }
                        }
                    }
                }
            }
            return(new string[] { options.OutputFile });
        }