public static string VectorProteinInterfaceWholeTreeHeader(VectorProteinInterfaceWhole a) { if (a == null) { throw new ArgumentNullException(nameof(a)); } return(Newick.TreeHeaderSafeName(a.FullProteinInterfaceId.ToString() + "_" + (a.ReversedSequence ? "R_" : "F_") + (a.ReversedInteractions ? "RI_" : "") + string.Join("", a.InteractionBools().Select(Convert.ToInt32)) + "_" + (a.VectorProteinInterfacePartList.Count(b => b.InteractionToNonProteinInterface) > 0 ? 1 : 0))); }
public static List <ProproteinInterfaceSpreadsheetRecord> MotifSpreadsheetData(List <VectorProteinInterfaceWhole> vectorProteinInterfaceWholeList) { if (vectorProteinInterfaceWholeList == null) { throw new ArgumentNullException(nameof(vectorProteinInterfaceWholeList)); } var result = new List <ProproteinInterfaceSpreadsheetRecord>(); // motif by proteinInterface length var distinctProteinInterfaceLengths = vectorProteinInterfaceWholeList.Select(a => a.ProteinInterfaceLength).Distinct().ToArray(); foreach (var proteinInterfaceLength in distinctProteinInterfaceLengths) { for (var index = 0; index < 3; index++) { ProproteinInterfaceSpreadsheetRecord record; if (index == 0) { record = Record(vectorProteinInterfaceWholeList.Where(b => b.ProteinInterfaceLength == proteinInterfaceLength).ToList()); } else if (index == 1) { record = Record(vectorProteinInterfaceWholeList.Where(b => b.ProteinInterfaceLength == proteinInterfaceLength && !b.ReversedSequence).ToList()); } else if (index == 2) { record = Record(vectorProteinInterfaceWholeList.Where(b => b.ProteinInterfaceLength == proteinInterfaceLength && b.ReversedSequence).ToList()); } else { throw new IndexOutOfRangeException(); } if (record == null) { continue; } record.MotifName = "ProteinInterface Length " + proteinInterfaceLength; record.MotifSource = "Length"; result.Add(record); } } // motif by common secondary structure pattern var distinctSecondaryStructures = vectorProteinInterfaceWholeList.Select(a => a.SecondaryStructure).Distinct().ToList(); foreach (var secondaryStructure in distinctSecondaryStructures) { for (var index = 0; index < 3; index++) { ProproteinInterfaceSpreadsheetRecord record; if (index == 0) { record = Record(vectorProteinInterfaceWholeList.Where(b => b.SecondaryStructure == secondaryStructure).ToList()); } else if (index == 1) { record = Record(vectorProteinInterfaceWholeList.Where(b => b.SecondaryStructure == secondaryStructure && !b.ReversedSequence).ToList()); } else if (index == 2) { record = Record(vectorProteinInterfaceWholeList.Where(b => b.SecondaryStructure == secondaryStructure && b.ReversedSequence).ToList()); } else { throw new IndexOutOfRangeException(); } if (record == null) { continue; } record.MotifName = secondaryStructure; record.MotifSource = "Secondary Structure"; result.Add(record); } } // motif by common interaction vector pattern for (var vectorType = 0; vectorType < 4; vectorType++) { var distinctVectors = vectorProteinInterfaceWholeList.Select(a => a.VectorString(vectorType)).Distinct().ToList(); foreach (var vector in distinctVectors) { for (var index = 0; index < 3; index++) { ProproteinInterfaceSpreadsheetRecord record; if (index == 0) { record = Record(vectorProteinInterfaceWholeList.Where(b => b.VectorString(vectorType) == vector).ToList()); } else if (index == 1) { record = Record(vectorProteinInterfaceWholeList.Where(b => b.VectorString(vectorType) == vector && !b.ReversedSequence).ToList()); } else if (index == 2) { record = Record(vectorProteinInterfaceWholeList.Where(b => b.VectorString(vectorType) == vector && b.ReversedSequence).ToList()); } else { throw new IndexOutOfRangeException(); } if (record == null) { continue; } record.MotifName = vector; record.MotifSource = VectorProteinInterfaceWhole.VectorStringDescription(vectorType); result.Add(record); } } } return(result); }
public static List <AminoAcidDistributionSpreadsheetRecord> PatternDistributionSpreadsheetRecords(List <VectorProteinInterfaceWhole> vectorProteinInterfaceWholeList, List <string> pdbIdList, List <ISequence> seqList, int vectorType) { if (vectorProteinInterfaceWholeList == null) { throw new ArgumentNullException(nameof(vectorProteinInterfaceWholeList)); } if (pdbIdList == null) { throw new ArgumentNullException(nameof(pdbIdList)); } if (seqList == null) { throw new ArgumentNullException(nameof(seqList)); } var result = new List <AminoAcidDistributionSpreadsheetRecord>(); // patterns var patternProteinDictionary = new Dictionary <string, AminoAcidChainComposition>(); var patternProteinInterfaceDictionary = new Dictionary <string, AminoAcidChainComposition>(); var patternProteinInterfaceInteractionDictionary = new Dictionary <string, AminoAcidChainComposition>(); var patternProteinInterfaceNonInteractionDictionary = new Dictionary <string, AminoAcidChainComposition>(); foreach (var vectorProteinInterfaceWhole in vectorProteinInterfaceWholeList) { var interactionBools = vectorProteinInterfaceWhole.InteractionBools(); string pattern; if (vectorType >= 0 && vectorType <= 3) { pattern = vectorProteinInterfaceWhole.VectorString(vectorType); } else if (vectorType == 4) { pattern = vectorProteinInterfaceWhole.SecondaryStructure; } else if (vectorType == 5) { pattern = "ProteinInterface Length " + vectorProteinInterfaceWhole.ProteinInterfaceLength; } else { throw new ArgumentOutOfRangeException(nameof(vectorType)); } if (!patternProteinDictionary.ContainsKey(pattern)) { patternProteinDictionary.Add(pattern, new AminoAcidChainComposition()); } if (!patternProteinInterfaceDictionary.ContainsKey(pattern)) { patternProteinInterfaceDictionary.Add(pattern, new AminoAcidChainComposition()); } if (!patternProteinInterfaceInteractionDictionary.ContainsKey(pattern)) { patternProteinInterfaceInteractionDictionary.Add(pattern, new AminoAcidChainComposition()); } if (!patternProteinInterfaceNonInteractionDictionary.ContainsKey(pattern)) { patternProteinInterfaceNonInteractionDictionary.Add(pattern, new AminoAcidChainComposition()); } var aminoAcids1L = vectorProteinInterfaceWhole.ProteinInterfaceAminoAcids1L(); for (int index = 0; index < aminoAcids1L.Length; index++) { var c = aminoAcids1L[index]; patternProteinInterfaceDictionary[pattern].IncrementAminoAcidCount(c); if (interactionBools[index]) { patternProteinInterfaceInteractionDictionary[pattern].IncrementAminoAcidCount(c); } else { patternProteinInterfaceNonInteractionDictionary[pattern].IncrementAminoAcidCount(c); } } patternProteinDictionary[pattern].NumberSamples++; patternProteinInterfaceDictionary[pattern].NumberSamples++; patternProteinInterfaceInteractionDictionary[pattern].NumberSamples++; patternProteinInterfaceNonInteractionDictionary[pattern].NumberSamples++; var bsSeqList = seqList.Where(a => SequenceIdSplit.SequenceIdToPdbIdAndChainId(a.ID).PdbId == vectorProteinInterfaceWhole.FullProteinInterfaceId.ProteinId && SequenceIdSplit.SequenceIdToPdbIdAndChainId(a.ID).ChainId == SpreadsheetFileHandler.AlphabetLetterRollOver(vectorProteinInterfaceWhole.FullProteinInterfaceId.ChainId)).ToList(); foreach (var chain in bsSeqList) { var seq = chain.ConvertToString(); foreach (var c in seq) { patternProteinDictionary[pattern].IncrementAminoAcidCount(c); } } } foreach (var kvp in patternProteinDictionary) { var recordAllComposition = new AminoAcidDistributionSpreadsheetRecord(); var recordProteinInterfaceComposition = new AminoAcidDistributionSpreadsheetRecord(); var recordProteinInterfaceInteractionComposition = new AminoAcidDistributionSpreadsheetRecord(); var recordProteinInterfaceNonInteractionComposition = new AminoAcidDistributionSpreadsheetRecord(); result.Add(recordAllComposition); result.Add(recordProteinInterfaceComposition); result.Add(recordProteinInterfaceInteractionComposition); result.Add(recordProteinInterfaceNonInteractionComposition); recordAllComposition.Pattern = kvp.Key; recordProteinInterfaceComposition.Pattern = kvp.Key; recordProteinInterfaceInteractionComposition.Pattern = kvp.Key; recordProteinInterfaceNonInteractionComposition.Pattern = kvp.Key; var vectorTypeStr = VectorProteinInterfaceWhole.VectorStringDescription(vectorType); recordAllComposition.Type = vectorTypeStr; recordProteinInterfaceComposition.Type = vectorTypeStr; recordProteinInterfaceInteractionComposition.Type = vectorTypeStr; recordProteinInterfaceNonInteractionComposition.Type = vectorTypeStr; recordAllComposition.Metric = "Protein"; recordProteinInterfaceComposition.Metric = "ProteinInterface"; recordProteinInterfaceInteractionComposition.Metric = "ProteinInterface interactions"; recordProteinInterfaceNonInteractionComposition.Metric = "ProteinInterface non interactions"; // number of samples recordAllComposition.NumberOfSamples = kvp.Value.NumberSamples; recordProteinInterfaceComposition.NumberOfSamples = patternProteinInterfaceDictionary[kvp.Key].NumberSamples; recordProteinInterfaceInteractionComposition.NumberOfSamples = patternProteinInterfaceInteractionDictionary[kvp.Key].NumberSamples; recordProteinInterfaceNonInteractionComposition.NumberOfSamples = patternProteinInterfaceNonInteractionDictionary[kvp.Key].NumberSamples; // number of amino acids recordAllComposition.TotalAminoAcids = kvp.Value.AminoAcidGroupsCount[(int)AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.AminoAcids].Sum(); recordProteinInterfaceComposition.TotalAminoAcids = patternProteinInterfaceDictionary[kvp.Key].AminoAcidGroupsCount[(int)AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.AminoAcids].Sum(); recordProteinInterfaceInteractionComposition.TotalAminoAcids = patternProteinInterfaceInteractionDictionary[kvp.Key].AminoAcidGroupsCount[(int)AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.AminoAcids].Sum(); recordProteinInterfaceNonInteractionComposition.TotalAminoAcids = patternProteinInterfaceNonInteractionDictionary[kvp.Key].AminoAcidGroupsCount[(int)AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.AminoAcids].Sum(); var allPercentage = AminoAcidChainComposition.ConvertToPercentage(kvp.Value); var proteinInterfacePercentage = AminoAcidChainComposition.ConvertToPercentage(patternProteinInterfaceDictionary[kvp.Key]); var proteinInterfaceInteractionPercentage = AminoAcidChainComposition.ConvertToPercentage(patternProteinInterfaceInteractionDictionary[kvp.Key]); var proteinInterfaceNonInteractionPercentage = AminoAcidChainComposition.ConvertToPercentage(patternProteinInterfaceNonInteractionDictionary[kvp.Key]); foreach (AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups enumAminoAcidGroups in Enum.GetValues(typeof(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups))) { var groupItemsTotal = AminoAcidGroups.AminoAcidGroups.GetTotalSubgroups(enumAminoAcidGroups); for (var groupItemIndex = 0; groupItemIndex < groupItemsTotal; groupItemIndex++) { recordAllComposition.Distribution[(int)enumAminoAcidGroups][groupItemIndex] = allPercentage.AminoAcidGroupsCount[(int)enumAminoAcidGroups][groupItemIndex]; recordProteinInterfaceComposition.Distribution[(int)enumAminoAcidGroups][groupItemIndex] = proteinInterfacePercentage.AminoAcidGroupsCount[(int)enumAminoAcidGroups][groupItemIndex]; recordProteinInterfaceInteractionComposition.Distribution[(int)enumAminoAcidGroups][groupItemIndex] = proteinInterfaceInteractionPercentage.AminoAcidGroupsCount[(int)enumAminoAcidGroups][groupItemIndex]; recordProteinInterfaceNonInteractionComposition.Distribution[(int)enumAminoAcidGroups][groupItemIndex] = proteinInterfaceNonInteractionPercentage.AminoAcidGroupsCount[(int)enumAminoAcidGroups][groupItemIndex]; } } } return(result); }
/* * private static int[] LastPdbChainResidueIndexes(string pdbFilename) * { * //var result = new Dictionary<string,int>(); * var pdbFile = new ProteinDataBankFile(pdbFilename, new []{ ATOM_Record.ATOM_Field.FieldName }); * * //var x = ProteinDataBankFileOperations.PdbAtomAcidList(); * * var atomList = pdbFile.ProteinDataBankFileRecordList.Where(a => a.GetType() == typeof (ATOM_Record)).Select(a=>(ATOM_Record)a).ToList(); * * var chainIdList = atomList.Select(a=>a.chainID.FieldValue.ToUpperInvariant()).Distinct().ToList(); * * var result = new int[chainIdList.Count]; * * for (int index = 0; index < chainIdList.Count; index++) * { * var chainId = chainIdList[index]; * var maxResidueIndex = atomList.Where(a => a.chainID.FieldValue.ToUpperInvariant() == chainId).Select(a => int.Parse(a.resSeq.FieldValue)).Max(); * * result[index] = maxResidueIndex; * } * * return result; * } */ private static VectorProteinInterfaceWhole MakeVectorProteinInterfaceWhole(string pdbFilename, ProteinInterfaceSequenceAndPositionData proteinInterfaceSequenceAndPositionData, bool reversedSequence, bool reversedInteractions) { if (pdbFilename == null) { throw new ArgumentNullException(nameof(pdbFilename)); } if (proteinInterfaceSequenceAndPositionData == null) { throw new ArgumentNullException(nameof(proteinInterfaceSequenceAndPositionData)); } ProteinInterfaceAminoAcidMetaData[] proteinInterfaceAminoAcidMetaDataArray = proteinInterfaceSequenceAndPositionData.AminoAcidSequenceAllResidueSequenceIndexes; var vectorProteinInterfaceWhole = new VectorProteinInterfaceWhole { FullProteinInterfaceId = new FullProteinInterfaceId(proteinInterfaceSequenceAndPositionData.FullProteinInterfaceId), ProteinInterfaceLength = proteinInterfaceSequenceAndPositionData.ProteinInterfaceLength, FirstResidueSequenceIndex = proteinInterfaceSequenceAndPositionData.StartPosition, LastResidueSequenceIndex = proteinInterfaceSequenceAndPositionData.EndPosition, ReversedInteractions = reversedInteractions, ReversedSequence = reversedSequence, }; //vectorProteinInterfaceWhole.FullSequenceLength = LastPdbChainResidueIndexes(pdbFilename)[vectorProteinInterfaceWhole.FullProteinInterfaceId.ChainId]; vectorProteinInterfaceWhole.SecondaryStructure = ProteinInterfaceSecondaryStructureLoader.ProteinInterfaceSecondaryStructure(pdbFilename, SpreadsheetFileHandler.AlphabetLetterRollOver(vectorProteinInterfaceWhole.FullProteinInterfaceId.ChainId), vectorProteinInterfaceWhole.FirstResidueSequenceIndex, vectorProteinInterfaceWhole.LastResidueSequenceIndex, vectorProteinInterfaceWhole.ReversedSequence); for (int proteinInterfaceAminoAcidMetaDataArrayIndex = 0; proteinInterfaceAminoAcidMetaDataArrayIndex < proteinInterfaceAminoAcidMetaDataArray.Length; proteinInterfaceAminoAcidMetaDataArrayIndex++) { ProteinInterfaceAminoAcidMetaData proteinInterfaceAminoAcidMetaData = proteinInterfaceAminoAcidMetaDataArray[proteinInterfaceAminoAcidMetaDataArrayIndex]; var vectorProteinInterfacePart = new VectorProteinInterfacePart(proteinInterfaceAminoAcidMetaData.OppoproteinInterfaceInteractions.Length) { FullProteinInterfaceId = new FullProteinInterfaceId(proteinInterfaceSequenceAndPositionData.FullProteinInterfaceId), ResidueId = proteinInterfaceAminoAcidMetaDataArrayIndex, SourceAminoAcid1L = proteinInterfaceAminoAcidMetaData.ResidueName1L, SourceAminoAcid3L = proteinInterfaceAminoAcidMetaData.ResidueName3L, InteractionAminoAcids1L = proteinInterfaceAminoAcidMetaData.ProteinInterfaceInteractionResidueNames1L, InteractionNonProteinInterfaceAminoAcids1L = proteinInterfaceAminoAcidMetaData.NonProteinInterfaceInteractionResidueNames1L, InteractionFlagBools = new bool[proteinInterfaceAminoAcidMetaData.OppoproteinInterfaceInteractions.Length] }; vectorProteinInterfaceWhole.VectorProteinInterfacePartList.Add(vectorProteinInterfacePart); Array.Copy(proteinInterfaceAminoAcidMetaData.OppoproteinInterfaceInteractions, vectorProteinInterfacePart.InteractionFlagBools, proteinInterfaceAminoAcidMetaData.OppoproteinInterfaceInteractions.Length); if (reversedInteractions) { Array.Reverse(vectorProteinInterfacePart.InteractionFlagBools); } vectorProteinInterfacePart.InteractionToNonProteinInterface = proteinInterfaceAminoAcidMetaData.ProteinInterfaceInteractionType.HasFlag(ProteinInterfaceInteractionType.InteractionWithNonProteinInterface); } if (vectorProteinInterfaceWhole.ReversedSequence) { vectorProteinInterfaceWhole.VectorProteinInterfacePartList.Reverse(); } return(vectorProteinInterfaceWhole); }
public static void BestDistanceWithPartsAlignment(VectorProteinInterfaceWhole vectorProteinInterfaceWhole1, VectorProteinInterfaceWhole vectorProteinInterfaceWhole2, VectorDistanceMeasurementValues vectorDistanceMeasurementValues, out double optimisticDistance /*, out double pessimisticDistance*/) { if (vectorProteinInterfaceWhole1 == null) { throw new ArgumentNullException(nameof(vectorProteinInterfaceWhole1)); } if (vectorProteinInterfaceWhole2 == null) { throw new ArgumentNullException(nameof(vectorProteinInterfaceWhole2)); } if (vectorDistanceMeasurementValues == null) { throw new ArgumentNullException(nameof(vectorDistanceMeasurementValues)); } var proteinInterfaceLength1 = vectorProteinInterfaceWhole1.ProteinInterfaceLength; var proteinInterfaceLength2 = vectorProteinInterfaceWhole2.ProteinInterfaceLength; var proteinInterfaceLengthDifference = Math.Abs(proteinInterfaceLength1 - proteinInterfaceLength2); var longerProteinInterfaceLength = proteinInterfaceLength1 > proteinInterfaceLength2 ? proteinInterfaceLength1 : proteinInterfaceLength2; var shorterProteinInterfaceLength = proteinInterfaceLength1 < proteinInterfaceLength2 ? proteinInterfaceLength1 : proteinInterfaceLength2; var longerProteinInterface = proteinInterfaceLength1 > proteinInterfaceLength2 ? vectorProteinInterfaceWhole1 : vectorProteinInterfaceWhole2; var shorterProteinInterface = longerProteinInterface == vectorProteinInterfaceWhole1 ? vectorProteinInterfaceWhole2 : vectorProteinInterfaceWhole1; /* * A: Longer ProteinInterface * B: Shorter ProteinInterface * * A: Length 5 * 0 1 2 3 4 * A: 0000 0000 0000 0000 0000 * * B: Length 3 * 0 1 2 3 4 [proteinInterfacePartIndex] * B: 0: 0000 0000 0000 ____ ____ [shorterPartOffset = 0;] 3: proteinInterfacePartIndex>shorterProteinInterfaceLength+shorterPartOffset * B: 1: ____ 0000 0000 0000 ____ [shorterPartOffset = 1;] 1: proteinInterfacePartIndex<shorterPartOffset * B: 2: ____ ____ 0000 0000 0000 [shorterPartOffset = 2;] * [] * * LEN(A) - LEN(B) = 2 * * 1. Loop through every proteinInterface part * 2. If (part offset > proteinInterface part index OR part offset) * */ /* F 111000 * F 101010 * * F 111000 * R 010101 * * R 000111 * R 010101 * * R 000111 * F 101010 * */ optimisticDistance = double.MaxValue; //pessimisticDistance = double.MinValue; const int directionCount = 1; for (var direction = 0; direction < directionCount; direction++) { for (var shorterPartOffset = 0; shorterPartOffset <= proteinInterfaceLengthDifference; shorterPartOffset++) { double optimisticOffsetDistanceResult = 0; //double pessimisticOffsetDistanceResult = 0; for (var proteinInterfacePartIndex = 0; proteinInterfacePartIndex < longerProteinInterfaceLength; proteinInterfacePartIndex++) { double optimisticPartDistanceResult = 0; //double pessimisticPartDistanceResult = 0; if (proteinInterfacePartIndex < shorterPartOffset || proteinInterfacePartIndex >= shorterProteinInterfaceLength + shorterPartOffset) { var longerProteinInterfaceBools = longerProteinInterface.VectorProteinInterfacePartList[proteinInterfacePartIndex].InteractionFlagBools; var distanceForProteinInterface = VectorDistanceValue(vectorDistanceMeasurementValues, longerProteinInterfaceBools, new bool[longerProteinInterfaceBools.Length]); optimisticPartDistanceResult = distanceForProteinInterface; //pessimisticPartDistanceResult = distanceForProteinInterface; var distanceForNonProteinInterface = VectorDistanceValue(vectorDistanceMeasurementValues, longerProteinInterface.VectorProteinInterfacePartList[proteinInterfacePartIndex].InteractionToNonProteinInterface, false); optimisticPartDistanceResult += distanceForNonProteinInterface; //pessimisticPartDistanceResult += distanceForNonProteinInterface; } else { var longerProteinInterfaceBools = longerProteinInterface.VectorProteinInterfacePartList[proteinInterfacePartIndex].InteractionFlagBools; var shorterProteinInterfaceBools = shorterProteinInterface.VectorProteinInterfacePartList[proteinInterfacePartIndex - shorterPartOffset].InteractionFlagBools; var longerProteinInterfaceBoolsCopy = new bool[longerProteinInterfaceBools.Length]; Array.Copy(longerProteinInterfaceBools, longerProteinInterfaceBoolsCopy, longerProteinInterfaceBools.Length); var shorterProteinInterfaceBoolsCopy = new bool[shorterProteinInterfaceBools.Length]; Array.Copy(shorterProteinInterfaceBools, shorterProteinInterfaceBoolsCopy, shorterProteinInterfaceBools.Length); if (direction == 1) { Array.Reverse(longerProteinInterfaceBoolsCopy); } double optimisticPartDistance; //double pessimisticPartDistance; CustomDistance(longerProteinInterfaceBoolsCopy, shorterProteinInterfaceBoolsCopy, vectorDistanceMeasurementValues, out optimisticPartDistance /*, out pessimisticPartDistance*/); optimisticPartDistanceResult = optimisticPartDistance; //pessimisticPartDistanceResult = pessimisticPartDistance; var isInteractionToNonProteinInterface1 = longerProteinInterface.VectorProteinInterfacePartList[proteinInterfacePartIndex].InteractionToNonProteinInterface; var isInteractionToNonProteinInterface2 = shorterProteinInterface.VectorProteinInterfacePartList[proteinInterfacePartIndex - shorterPartOffset].InteractionToNonProteinInterface; var distanceForNonProteinInterface = VectorDistanceValue(vectorDistanceMeasurementValues, isInteractionToNonProteinInterface1, isInteractionToNonProteinInterface2); optimisticPartDistanceResult += distanceForNonProteinInterface; //pessimisticPartDistanceResult += distanceForNonProteinInterface; } optimisticOffsetDistanceResult += optimisticPartDistanceResult; //pessimisticOffsetDistanceResult += pessimisticPartDistanceResult; } if (optimisticOffsetDistanceResult < optimisticDistance) { optimisticDistance = optimisticOffsetDistanceResult; } //if (pessimisticOffsetDistanceResult > pessimisticDistance) //{ // pessimisticDistance = pessimisticOffsetDistanceResult; //} } } }