public static Dictionary <string, MotifCounter> MotifDistinctWithCount(List <ProproteinInterfaceSpreadsheetRecord> proproteinInterfaceSpreadsheetRecordList) { if (proproteinInterfaceSpreadsheetRecordList == null) { throw new ArgumentNullException(nameof(proproteinInterfaceSpreadsheetRecordList)); } var result = new Dictionary <string, MotifCounter>(); foreach (var record in proproteinInterfaceSpreadsheetRecordList) { var motifs = record.Motifs().Distinct().ToArray(); foreach (var motif in motifs) { if (!result.ContainsKey(motif)) { var motifCounter = new MotifCounter() { Motif = motif, MotifTooGeneral = ProproteinInterfaceMotif.IsMotifTooGeneral(motif), TotalFwd = 0, TotalRev = 0, TotalMix = 0, }; result.Add(motif, motifCounter); } if (record.Direction == "Fwd") { result[motif].TotalFwd++; } else if (record.Direction == "Rev") { result[motif].TotalRev++; } else if (record.Direction == "Mix") { result[motif].TotalMix++; } } } return(result); }
public static ProproteinInterfaceSpreadsheetRecord Record(List <VectorProteinInterfaceWhole> vectorProteinInterfaceWholeList) { if (vectorProteinInterfaceWholeList == null || vectorProteinInterfaceWholeList.Count == 0) { return(null); } var proteinInterfaceSequenceList = vectorProteinInterfaceWholeList.Select(a => a.ProteinInterfaceAminoAcids1L()).ToList(); var proteinInterfaceInteractionSequenceList = vectorProteinInterfaceWholeList.Select(a => string.Join("", a.ProteinInterfaceAminoAcids1L().Select((b, i) => a.InteractionBools()[i] ? b : ' ').ToList())).ToList(); var proteinInterfaceNonInteractionSequenceList = vectorProteinInterfaceWholeList.Select(a => string.Join("", a.ProteinInterfaceAminoAcids1L().Select((b, i) => !a.InteractionBools()[i] ? b : ' ').ToList())).ToList(); var distinctProteinInterfaceLengths = vectorProteinInterfaceWholeList.Select(a => a.ProteinInterfaceLength).Distinct().ToArray(); var directionFwd = vectorProteinInterfaceWholeList.Count(a => !a.ReversedSequence); var directionRev = vectorProteinInterfaceWholeList.Count - directionFwd; string direction = ""; if (directionFwd > 0 && directionRev == 0) { direction = "Fwd"; } else if (directionFwd == 0 && directionRev > 0) { direction = "Rev"; } else { direction = "Mix"; } var record = new ProproteinInterfaceSpreadsheetRecord { MotifName = "", MotifSource = "", TotalFound = "" + vectorProteinInterfaceWholeList.Count, Direction = direction, ProteinInterfaceLength = "" + string.Join(", ", distinctProteinInterfaceLengths), MotifAminoAcids = ProproteinInterfaceMotif.FindProteinInterfaceAminoAcidMotif(proteinInterfaceSequenceList), MotifInteractionAminoAcids = ProproteinInterfaceMotif.FindProteinInterfaceAminoAcidMotif(proteinInterfaceInteractionSequenceList), MotifNonInteractionAminoAcids = ProproteinInterfaceMotif.FindProteinInterfaceAminoAcidMotif(proteinInterfaceNonInteractionSequenceList), MotifPhysicochemical = ProproteinInterfaceMotif.FindProteinInterfaceAminoAcidGroupMotif(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.Physicochemical, proteinInterfaceSequenceList), MotifInteractionPhysicochemical = ProproteinInterfaceMotif.FindProteinInterfaceAminoAcidGroupMotif(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.Physicochemical, proteinInterfaceInteractionSequenceList), MotifNonInteractionPhysicochemical = ProproteinInterfaceMotif.FindProteinInterfaceAminoAcidGroupMotif(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.Physicochemical, proteinInterfaceNonInteractionSequenceList), MotifHydrophobicity = ProproteinInterfaceMotif.FindProteinInterfaceAminoAcidGroupMotif(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.Hydrophobicity, proteinInterfaceSequenceList), MotifInteractionHydrophobicity = ProproteinInterfaceMotif.FindProteinInterfaceAminoAcidGroupMotif(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.Hydrophobicity, proteinInterfaceInteractionSequenceList), MotifNonInteractionHydrophobicity = ProproteinInterfaceMotif.FindProteinInterfaceAminoAcidGroupMotif(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.Hydrophobicity, proteinInterfaceNonInteractionSequenceList), MotifPdbSum = ProproteinInterfaceMotif.FindProteinInterfaceAminoAcidGroupMotif(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.PdbSum, proteinInterfaceSequenceList), MotifInteractionPdbSum = ProproteinInterfaceMotif.FindProteinInterfaceAminoAcidGroupMotif(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.PdbSum, proteinInterfaceInteractionSequenceList), MotifNonInteractionPdbSum = ProproteinInterfaceMotif.FindProteinInterfaceAminoAcidGroupMotif(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.PdbSum, proteinInterfaceNonInteractionSequenceList), MotifUniProtKb = ProproteinInterfaceMotif.FindProteinInterfaceAminoAcidGroupMotif(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.UniProtKb, proteinInterfaceSequenceList), MotifInteractionUniProtKb = ProproteinInterfaceMotif.FindProteinInterfaceAminoAcidGroupMotif(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.UniProtKb, proteinInterfaceInteractionSequenceList), MotifNonInteractionUniProtKb = ProproteinInterfaceMotif.FindProteinInterfaceAminoAcidGroupMotif(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.UniProtKb, proteinInterfaceNonInteractionSequenceList), }; for (var index = record.MotifCommonProperties.Length - 1; index >= 6; index--) { record.MotifCommonProperties[index] = ProproteinInterfaceMotif.FindProteinInterfaceAminoAcidCommonPropertiesMotif(proteinInterfaceSequenceList, AminoAcidPropertyMatchType.MininumMatch, index + 1); record.MotifInteractionsCommonProperties[index] = ProproteinInterfaceMotif.FindProteinInterfaceAminoAcidCommonPropertiesMotif(proteinInterfaceInteractionSequenceList, AminoAcidPropertyMatchType.MininumMatch, index + 1); record.MotifNonInteractionsCommonProperties[index] = ProproteinInterfaceMotif.FindProteinInterfaceAminoAcidCommonPropertiesMotif(proteinInterfaceNonInteractionSequenceList, AminoAcidPropertyMatchType.MininumMatch, index + 1); } return(record); }