// creates dictionary of a protein to a list of peptides that match private void FindProteinMatchesWithFasta(Stream fastaFile) { var proteinAssociations = new List <KeyValuePair <FastaSequence, List <PeptideDocNode> > >(); var peptidesForMatching = ListPeptidesForMatching(); using (var reader = new StreamReader(fastaFile)) { foreach (var seq in FastaData.ParseFastaFile(reader)) { var fasta = new FastaSequence(seq.Name, null, null, seq.Sequence); var matches = new List <PeptideDocNode>(); foreach (var peptide in peptidesForMatching) { // TODO(yuval): does digest matter? if (fasta.Sequence.IndexOf(peptide.Peptide.Target.Sequence, StringComparison.Ordinal) < 0) { continue; } matches.Add(peptide); } if (matches.Count > 0) { proteinAssociations.Add(new KeyValuePair <FastaSequence, List <PeptideDocNode> >(fasta, matches)); } } } SetCheckBoxListItems(proteinAssociations, Resources.AssociateProteinsDlg_FindProteinMatchesWithFasta_No_matches_were_found_using_the_imported_fasta_file_); }
/// <summary> /// Obtain new names from a FASTA file. /// </summary> public void UseFastaFile(string fastaFile) { try { var dictExistToNewName = new Dictionary <string, string>(); using (var reader = new StreamReader(fastaFile)) { var dictSeqToNames = new Dictionary <string, List <string> >(); foreach (var nodePepGroup in _document.MoleculeGroups) { string sequence = nodePepGroup.PeptideGroup.Sequence; if (string.IsNullOrEmpty(sequence)) { continue; } List <string> names; if (!dictSeqToNames.TryGetValue(sequence, out names)) { names = new List <string>(); dictSeqToNames.Add(sequence, names); } if (!names.Contains(nodePepGroup.Name)) { names.Add(nodePepGroup.Name); } } foreach (var seq in FastaData.ParseFastaFile(reader)) { List <string> names; if (dictSeqToNames.TryGetValue(seq.Sequence, out names)) { // Ignore multiple occurrances of the same sequence in the FASTA file dictSeqToNames.Remove(seq.Sequence); foreach (var name in names) { if (Equals(name, seq.Name)) { continue; } if (dictExistToNewName.ContainsKey(name)) { throw new IOException(string.Format(Resources.RenameProteinsDlg_UseFastaFile_The_document_contains_a_naming_conflict_The_name__0__is_currently_used_by_multiple_protein_sequences, name)); } dictExistToNewName.Add(name, seq.Name); } } } } _gridViewDriver.Populate(_document.MoleculeGroups .Where(nodePepGroup => dictExistToNewName.ContainsKey(nodePepGroup.Name)) .Select(nodePepGroup => new RenameProteins { CurrentName = nodePepGroup.Name, NewName = dictExistToNewName[nodePepGroup.Name] })); if (NameCount == 0) { MessageDlg.Show(this, string.Format(Resources.RenameProteinsDlg_UseFastaFile_No_protein_sequence_matches_found_between_the_current_document_and_the_FASTA_file__0_, fastaFile)); } } catch (IOException x) { MessageDlg.Show(this, string.Format(Resources.RenameProteinsDlg_UseFastaFile_Failed_reading_the_file__0__1__, fastaFile, x.Message)); } }