public static void MatchPeptideToProtein() { string pepSeq = "AAIKEESEGKLK"; string protID = "comp320_c0_seq1";//comp4598_c0_seq1";//comp1011_c0_seq1"; NucleicAcid.InitHash(); FileStream protein_fasta_database1 = new FileStream(@"G:\Thibault\Olivier\Databases\DMatton\Matton_Illumina_Anthesis_WithReverse.fasta", FileMode.Open, FileAccess.Read, FileShare.Read); List <Protein> proteins1 = new List <Protein>(ProteinFastaReader.ReadProteins(protein_fasta_database1, false)); foreach (Protein prot in proteins1) { if (prot.Description.CompareTo(protID) == 0) { for (int shift = 0; shift < 3; shift++) { foreach (string seq in NucleicAcid.ConvertNA3ToAAs(prot.BaseSequence, shift, false)) { if (seq.Contains("*") || seq.Contains("X")) { Console.WriteLine("AWRA"); } if (seq.Contains(pepSeq)) { Console.WriteLine("FORWARD (" + shift + ") : Found " + pepSeq + " in " + protID + " [" + seq + "]"); } } foreach (string seq in NucleicAcid.ConvertNA3ToAAs(prot.BaseSequence, shift, true)) { if (seq.Contains("*") || seq.Contains("X")) { Console.WriteLine("AWRA"); } if (seq.Contains(pepSeq)) { Console.WriteLine("REVERSE (" + shift + ") : Found " + pepSeq + " in " + protID + " [" + seq + "]"); } } } } } }
public static void YangLiuPeptidesWithAllProteins() { //vsCSV csvPeptides = new vsCSV(@"G:\Thibault\-=Proteomics_Raw_Data=-\ELITE\DEC18_2012\DMatton\Clustering_186716\Identifications.csv"); vsCSV csvPeptides = new vsCSV(@"G:\Thibault\-=Proteomics_Raw_Data=-\ELITE\DEC18_2012\DMatton\Clustering_186716\Cluster_Intensity_peptides_NormP.csv"); vsCSVWriter writer = new vsCSVWriter(@"G:\Thibault\-=Proteomics_Raw_Data=-\ELITE\DEC18_2012\DMatton\Clustering_186716\ProteinsPerPeptidesFromDatabases_AllReadingFrames.csv"); NucleicAcid.InitHash(); FileStream protein_fasta_database1 = new FileStream(@"G:\Thibault\Olivier\Databases\DMatton\Matton_Illumina_Anthesis_WithReverse.fasta", FileMode.Open, FileAccess.Read, FileShare.Read); List <Protein> proteins1 = new List <Protein>(ProteinFastaReader.ReadProteins(protein_fasta_database1, false)); Dictionary <string, List <string> > protein1AAs = new Dictionary <string, List <string> >(); foreach (Protein prot in proteins1) { for (int shift = 0; shift < 3; shift++) { protein1AAs.Add(prot.Description + " | Reading Frame " + shift + " | Forward", NucleicAcid.ConvertNA3ToAAs(prot.BaseSequence, shift, false)); protein1AAs.Add(prot.Description + " | Reading Frame " + shift + " | Backward", NucleicAcid.ConvertNA3ToAAs(prot.BaseSequence, shift, true)); } } FileStream protein_fasta_database2 = new FileStream(@"G:\Thibault\Olivier\Databases\DMatton\mattond_20110418_WithReverse_EditedJuly2013.fasta", FileMode.Open, FileAccess.Read, FileShare.Read); List <Protein> proteins2 = new List <Protein>(ProteinFastaReader.ReadProteins(protein_fasta_database2, false)); Dictionary <string, List <string> > protein2AAs = new Dictionary <string, List <string> >(); foreach (Protein prot in proteins2) { for (int shift = 0; shift < 3; shift++) { protein2AAs.Add(prot.Description + " | Reading Frame " + shift + " | Forward", NucleicAcid.ConvertNA3ToAAs(prot.BaseSequence, shift, false)); protein2AAs.Add(prot.Description + " | Reading Frame " + shift + " | Backward", NucleicAcid.ConvertNA3ToAAs(prot.BaseSequence, shift, true)); } } writer.AddLine(csvPeptides.LINES_LIST[0]); Dictionary <string, List <string> > dicOfPepProt = new Dictionary <string, List <string> >(); for (int i = 1; i < csvPeptides.LINES_LIST.Count; i++) { string[] splits = csvPeptides.LINES_LIST[i].Split(vsCSV._Generic_Separator); string seq = splits[4]; //string seq = splits[13]; /* * string protDesc = splits[10]; * if (protein1AAs.ContainsKey(protDesc)) * if (!protein1AAs[protDesc].Contains(seq)) * Console.WriteLine("Should be there 1"); * * if (protein2AAs.ContainsKey(protDesc)) * if (!protein2AAs[protDesc].Contains(seq)) * Console.WriteLine("Should be there 1"); * //*/ StringBuilder sb = new StringBuilder(); foreach (string key in protein1AAs.Keys) { foreach (string protSeq in protein1AAs[key]) { if (protSeq.Contains(seq)) { sb.Append(key + ";"); break; } } } foreach (string key in protein2AAs.Keys) { foreach (string protSeq in protein2AAs[key]) { if (protSeq.Contains(seq)) { sb.Append(key + ";"); break; } } } if (sb.Length == 0) { Console.WriteLine("Zut"); } writer.AddLine(csvPeptides.LINES_LIST[i] + "," + sb.ToString().Trim()); } writer.WriteToFile(); }