public static bool IsProteinChainListContainerNullOrEmpty(ProteinChainListContainer pdbFileChains) { if (pdbFileChains == null || pdbFileChains.ChainList == null || pdbFileChains.ChainList.Count == 0) { return(true); } return(false); }
public static ProteinChainListContainer AtomPairListToUnpairedAtomLists(List <AtomPair> atomPairList, bool distinct = false) { var proteinChainSequenceListContainer = new ProteinChainListContainer { ChainList = new List <ProteinAtomListContainer>() }; ProteinAtomListContainer atoms1 = GetListOfAtomFromAtomPair(atomPairList, 1, distinct); proteinChainSequenceListContainer.ChainList.Add(atoms1); ProteinAtomListContainer atoms2 = GetListOfAtomFromAtomPair(atomPairList, 2, distinct); proteinChainSequenceListContainer.ChainList.Add(atoms2); return(proteinChainSequenceListContainer); }
public static int FindChainByLetter(ProteinChainListContainer pdb, string chainIdLetter) { for (int index = 0; index < pdb.ChainList.Count; index++) { var chain = pdb.ChainList[index]; if (chain.AtomList.Count == 0) { continue; } if (chain.AtomList[0].chainID.FieldValue == chainIdLetter) { return(index); } } return(-1); }
/// <summary> /// Generate stats of interactions... also removes proteins not meeting minimum interactions requirement. /// </summary> /// <param name="cancellationToken"></param> /// <param name="pdbFolders"></param> /// <param name="pdbIdList"></param> /// <param name="progressBar"></param> /// <param name="estimatedTimeRemainingLabel"></param> /// <returns></returns> public static Dictionary <string, decimal> CalculateStructureSymmetry(CancellationToken cancellationToken, decimal maxAtomInterationDistance, string[] pdbFolders, List <string> pdbIdList = null, Dictionary <string, List <string> > pdbIdChainIdList = null, ProgressActionSet progressActionSet = null, int totalThreads = -1) { if (pdbFolders == null || pdbFolders.Length == 0) { throw new ArgumentOutOfRangeException(nameof(pdbFolders)); } if (pdbIdList == null || pdbIdList.Count == 0) { throw new ArgumentOutOfRangeException(nameof(pdbIdList)); } if (progressActionSet == null) { throw new ArgumentOutOfRangeException(nameof(progressActionSet)); } const int requiredNumberOfChains = 2; string[] pdbFilesArray = ProteinDataBankFileOperations.RemoveNonWhiteListedPdbIdFromPdbFilesArray(pdbIdList, ProteinDataBankFileOperations.GetPdbFilesArray(pdbFolders)); WorkDivision <Dictionary <string, decimal> > workDivision = new WorkDivision <Dictionary <string, decimal> >(pdbFilesArray.Length, totalThreads); ProteinDataBankFileOperations.ShowMissingPdbFiles(pdbFilesArray, pdbIdList, progressActionSet); ProgressActionSet.StartAction(pdbFilesArray.Length, progressActionSet); for (int threadIndex = 0; threadIndex < workDivision.ThreadCount; threadIndex++) { int localThreadIndex = threadIndex; Task <Dictionary <string, decimal> > task = Task.Run(() => { var taskResult = new Dictionary <string, decimal>(); for (int pdbFileNumber = workDivision.ThreadFirstIndex[localThreadIndex]; pdbFileNumber <= workDivision.ThreadLastIndex[localThreadIndex]; pdbFileNumber++) { if (cancellationToken.IsCancellationRequested) { break; } try { string pdbFilename = pdbFilesArray[pdbFileNumber]; string proteinId = ProteinDataBankFileOperations.PdbIdFromPdbFilename(pdbFilename); // Check if the file found is included in the white list. if (pdbIdList != null && !pdbIdList.Contains(proteinId)) { continue; } var chainIdList = pdbIdChainIdList != null ? (proteinId != null && pdbIdChainIdList.ContainsKey(proteinId) ? pdbIdChainIdList[proteinId].ToArray() : null) : null; // Get atom chains. ProteinChainListContainer proteinFileChains = ProteinDataBankFileOperations.PdbAtomicChains(pdbFilename, chainIdList, requiredNumberOfChains, requiredNumberOfChains, true); if (proteinFileChains == null || proteinFileChains.ChainList == null || proteinFileChains.ChainList.Count != 2 || proteinFileChains.ChainList[StaticValues.ChainA] == null || proteinFileChains.ChainList[StaticValues.ChainA].AtomList == null || proteinFileChains.ChainList[StaticValues.ChainA].AtomList.Count == 0 || proteinFileChains.ChainList[StaticValues.ChainB] == null || proteinFileChains.ChainList[StaticValues.ChainB].AtomList == null || proteinFileChains.ChainList[StaticValues.ChainB].AtomList.Count == 0) { continue; } // Make a list to save interactions found. var interactionMatchPercentage = new InteractionMatchPercentage(proteinId); List <AtomPair> interactions = SearchInteractions.FindInteractions(cancellationToken, maxAtomInterationDistance, pdbFilename, pdbIdChainIdList); interactionMatchPercentage.IncrementTotalInteractions(interactions.Count); for (int interactionsIndex = 0; interactionsIndex < interactions.Count; interactionsIndex++) { interactionMatchPercentage.AddResidueSequenceIndex(StaticValues.ChainA, interactions[interactionsIndex].Atom1.resSeq.FieldValue); interactionMatchPercentage.AddResidueSequenceIndex(StaticValues.ChainB, interactions[interactionsIndex].Atom2.resSeq.FieldValue); } InteractionMatchPercentage.CalculatePercentageResult calculatedPercentage = interactionMatchPercentage.CalculatePercentage(); taskResult.Add(interactionMatchPercentage.ProteinId, calculatedPercentage.InteractionMatchPercentageAverage); } finally { workDivision.IncrementItemsCompleted(1); ProgressActionSet.ProgressAction(1, progressActionSet); ProgressActionSet.EstimatedTimeRemainingAction(workDivision.StartTicks, workDivision.ItemsCompleted, workDivision.ItemsToProcess, progressActionSet); } } return(taskResult); }, cancellationToken); workDivision.TaskList.Add(task); } workDivision.WaitAllTasks(); var result = new Dictionary <string, decimal>(); foreach (var task in workDivision.TaskList.Where(t => t != null && t.Result != null && t.IsCompleted && !t.IsCanceled && !t.IsFaulted)) { foreach (var kvp in task.Result) { //if (result.ContainsKey(kvp.Key)) //{ // Console.WriteLine("Key already exists: '" + kvp.Key + "'"); //} result.Add(kvp.Key, kvp.Value); } } return(result); }
/// <summary> /// Makes spreadsheets with scientific data outputs about given proteins. /// </summary> /// <param name="cancellationToken"></param> /// <param name="pdbFolders">The location of the PDB files</param> /// <param name="pdbIdList">The PDB files which should be used.</param> /// <param name="consoleTextBox"></param> /// <param name="progressBar">User proteinInterface progress bar for user feedback.</param> /// <param name="estimatedTimeRemainingLabel">User proteinInterface estimated time remaining label for user feedback.</param> /// <param name="requestedTotalThreads"></param> /// <returns>Returns the generated spreadsheets with scientific data.</returns> public static List <List <SpreadsheetCell[]> > MakeHomodimerStatisticsSpreadsheetsList(CancellationToken cancellationToken, decimal maxAtomInterationDistance, string[] pdbFolders, List <string> pdbIdList = null, Dictionary <string, List <string> > pdbIdChainIdList = null, ProgressActionSet progressActionSet = null, int requestedTotalThreads = -1) { if (pdbFolders == null || pdbFolders.Length == 0) { throw new ArgumentOutOfRangeException(nameof(pdbFolders)); } if (pdbIdList == null || pdbIdList.Count == 0) { throw new ArgumentOutOfRangeException(nameof(pdbIdList)); } if (progressActionSet == null) { throw new ArgumentNullException(nameof(progressActionSet)); } // this method creates // 1. a list of interactions // 2. a list of symmetry percentage // 3. an "expected" heatmap by combining every possible a/b amino acid combination // 4. an actual heatmap for the proteinInterfaces // 5. normalised versions of both of the heatmaps string[] pdbFilesArray = ProteinDataBankFileOperations.RemoveNonWhiteListedPdbIdFromPdbFilesArray(pdbIdList, ProteinDataBankFileOperations.GetPdbFilesArray(pdbFolders)); //var interactionRecordList = new List<ProteinInteractionRecord>(); //var interactionMatchPercentageList = new List<InteractionMatchPercentage>(); //var wholeProteinChainsAminoAcidCounter = new List<AminoAcidChainComposition>(); //var interactionChainsAminoAcidCounter = new List<AminoAcidChainComposition>(); //var interactionsAminoAcidToAminoAcidCounter = new AminoAcidPairCompositionMatrix(); ////var wholeProteinAminoAcidToAminoAcidCounter2x2 = new AminoAcidPairCompositionMatrix(); // composition of every amino acid paired in every possible combination var workDivision = new WorkDivision <HomodimersStatisticsMinerTaskResult>(pdbFilesArray.Length, requestedTotalThreads); ProgressActionSet.StartAction(pdbFilesArray.Length, progressActionSet); int checkAllFilesProcessed = 0; var lockCheckAllFilesProcessed = new object(); var pdbFilesProcessed = new bool[pdbFilesArray.Length]; Array.Clear(pdbFilesProcessed, 0, pdbFilesProcessed.Length); for (int threadIndex = 0; threadIndex < workDivision.ThreadCount; threadIndex++) { int localThreadIndex = threadIndex; Task <HomodimersStatisticsMinerTaskResult> task = Task.Run(() => { var result = new HomodimersStatisticsMinerTaskResult(); for (int pdbFileNumber = workDivision.ThreadFirstIndex[localThreadIndex]; pdbFileNumber <= workDivision.ThreadLastIndex[localThreadIndex]; pdbFileNumber++) { if (cancellationToken.IsCancellationRequested) { break; } lock (lockCheckAllFilesProcessed) { checkAllFilesProcessed++; pdbFilesProcessed[pdbFileNumber] = true; } try { string pdbFilename = pdbFilesArray[pdbFileNumber]; string proteinId = ProteinDataBankFileOperations.PdbIdFromPdbFilename(pdbFilename); // Check if the file found is included in the white list. if (/*pdbIdList != null && */ !pdbIdList.Contains(proteinId)) { ProgressActionSet.Report("Error: " + proteinId + " was not in the PDB ID white list.", progressActionSet); continue; } List <AtomPair> interactions = SearchInteractions.FindInteractions(cancellationToken, maxAtomInterationDistance, pdbFilename, pdbIdChainIdList); // Make a list to save interactions found. var interactionMatchPercentage = new InteractionMatchPercentage(proteinId); var chainAminoAcidCounterA1X1 = new AminoAcidChainComposition(proteinId, "A"); var chainAminoAcidCounterB1X1 = new AminoAcidChainComposition(proteinId, "B"); var chainInteractionAminoAcidCounterA = new AminoAcidChainComposition(proteinId, "A"); var chainInteractionAminoAcidCounterB = new AminoAcidChainComposition(proteinId, "B"); if (interactions != null && interactions.Count > 0) { interactionMatchPercentage.IncrementTotalInteractions(interactions.Count); for (int interactionsIndex = 0; interactionsIndex < interactions.Count; interactionsIndex++) { chainInteractionAminoAcidCounterA.IncrementAminoAcidCount(interactions[interactionsIndex].Atom1.resName.FieldValue); chainInteractionAminoAcidCounterB.IncrementAminoAcidCount(interactions[interactionsIndex].Atom2.resName.FieldValue); result.InteractionRecordList.Add(new ProteinInteractionRecord(proteinId, interactionsIndex + 1, interactions[interactionsIndex])); interactionMatchPercentage.AddResidueSequenceIndex(StaticValues.ChainA, interactions[interactionsIndex].Atom1.resSeq.FieldValue); interactionMatchPercentage.AddResidueSequenceIndex(StaticValues.ChainB, interactions[interactionsIndex].Atom2.resSeq.FieldValue); result.InteractionsAminoAcidToAminoAcidCounter.IncrementAminoAcidCount(interactions[interactionsIndex].Atom1.resName.FieldValue, interactions[interactionsIndex].Atom2.resName.FieldValue); } } var chainIdList = pdbIdChainIdList != null ? (pdbIdChainIdList.ContainsKey(proteinId) ? pdbIdChainIdList[proteinId].ToArray() : null) : null; ProteinChainListContainer proteinFileChains = ProteinDataBankFileOperations.PdbAtomicChains(pdbFilename, chainIdList, 2, 2, true); if (proteinFileChains == null || proteinFileChains.ChainList == null || proteinFileChains.ChainList.Count != 2 || proteinFileChains.ChainList[StaticValues.ChainA] == null || proteinFileChains.ChainList[StaticValues.ChainA].AtomList == null || proteinFileChains.ChainList[StaticValues.ChainA].AtomList.Count == 0 || proteinFileChains.ChainList[StaticValues.ChainB] == null || proteinFileChains.ChainList[StaticValues.ChainB].AtomList == null || proteinFileChains.ChainList[StaticValues.ChainB].AtomList.Count == 0) { if (!File.Exists(pdbFilename)) { ProgressActionSet.Report("Error: " + pdbFilename + " (" + proteinId + ") file not found", progressActionSet); } else { int proteinFileChainCount = ProteinDataBankFileOperations.PdbAtomicChainsCount(pdbFilename); ProgressActionSet.Report("Error: " + proteinId + " did not have exactly 2 chains (" + proteinFileChainCount + " chains found) - skipping.", progressActionSet); } continue; } // count total of how many of each type of amino acids are in Chain A. for (int atomIndexA = 0; atomIndexA < proteinFileChains.ChainList[StaticValues.ChainA].AtomList.Count; atomIndexA++) { chainAminoAcidCounterA1X1.IncrementAminoAcidCount(proteinFileChains.ChainList[StaticValues.ChainA].AtomList[atomIndexA].resName.FieldValue); } // count total of how many of each type of amino acids are in Chain B. for (int atomIndexB = 0; atomIndexB < proteinFileChains.ChainList[StaticValues.ChainB].AtomList.Count; atomIndexB++) { chainAminoAcidCounterB1X1.IncrementAminoAcidCount(proteinFileChains.ChainList[StaticValues.ChainB].AtomList[atomIndexB].resName.FieldValue); } interactionMatchPercentage.CalculatePercentage(); result.InteractionMatchPercentageList.Add(interactionMatchPercentage); result.WholeProteinChainsAminoAcidCounter.Add(chainAminoAcidCounterA1X1); result.WholeProteinChainsAminoAcidCounter.Add(chainAminoAcidCounterB1X1); result.InteractionChainsAminoAcidCounter.Add(chainInteractionAminoAcidCounterA); result.InteractionChainsAminoAcidCounter.Add(chainInteractionAminoAcidCounterB); } finally { workDivision.IncrementItemsCompleted(1); ProgressActionSet.ProgressAction(1, progressActionSet); ProgressActionSet.EstimatedTimeRemainingAction(workDivision.StartTicks, workDivision.ItemsCompleted, workDivision.ItemsToProcess, progressActionSet); } } return(result); }, cancellationToken); workDivision.TaskList.Add(task); } workDivision.WaitAllTasks(); ProgressActionSet.FinishAction(true, progressActionSet); // merge all instances of the results var spreadsheetTaskResult = new HomodimersStatisticsMinerTaskResult(); foreach (var task in workDivision.TaskList.Where(t => t != null && t.IsCompleted && !t.IsCanceled && !t.IsFaulted && t.Result != null)) { if (task.Result.InteractionChainsAminoAcidCounter != null && task.Result.InteractionChainsAminoAcidCounter.Count > 0) { spreadsheetTaskResult.InteractionChainsAminoAcidCounter.AddRange(task.Result.InteractionChainsAminoAcidCounter); } if (task.Result.InteractionMatchPercentageList != null && task.Result.InteractionMatchPercentageList.Count > 0) { spreadsheetTaskResult.InteractionMatchPercentageList.AddRange(task.Result.InteractionMatchPercentageList); } if (task.Result.InteractionRecordList != null && task.Result.InteractionRecordList.Count > 0) { spreadsheetTaskResult.InteractionRecordList.AddRange(task.Result.InteractionRecordList); } if (task.Result.InteractionsAminoAcidToAminoAcidCounter != null) { if (task.Result.InteractionsAminoAcidToAminoAcidCounter.AminoAcidToAminoAcid != null) { foreach (AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups enumAminoAcidGroups in Enum.GetValues(typeof(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups))) { var totalGroups = AminoAcidGroups.AminoAcidGroups.GetTotalSubgroups(enumAminoAcidGroups); for (int x = 0; x < totalGroups; x++) { for (int y = 0; y < totalGroups; y++) { spreadsheetTaskResult.InteractionsAminoAcidToAminoAcidCounter.AminoAcidToAminoAcid[(int)enumAminoAcidGroups][x, y] += task.Result.InteractionsAminoAcidToAminoAcidCounter.AminoAcidToAminoAcid[(int)enumAminoAcidGroups][x, y]; } } } } } if (task.Result.WholeProteinChainsAminoAcidCounter != null && task.Result.WholeProteinChainsAminoAcidCounter.Count > 0) { spreadsheetTaskResult.WholeProteinChainsAminoAcidCounter.AddRange(task.Result.WholeProteinChainsAminoAcidCounter); } } if (pdbFilesProcessed.Count(file => file == false) > 0) { ProgressActionSet.Report("ERROR: " + pdbFilesProcessed.Count(file => file == false) + " PDB FILES WERE SKIPPED! 0x01", progressActionSet); } else { ProgressActionSet.Report("CHECK: NO PDB FILES WERE SKIPPED! 0x01", progressActionSet); } if (checkAllFilesProcessed != pdbFilesArray.Length) { ProgressActionSet.Report("ERROR: " + (pdbFilesArray.Length - checkAllFilesProcessed) + " PDB FILES WERE SKIPPED! 0x02", progressActionSet); } else { ProgressActionSet.Report("CHECK: NO PDB FILES WERE SKIPPED! 0x02", progressActionSet); } spreadsheetTaskResult.WholeProteinChainsAminoAcidCounter = spreadsheetTaskResult.WholeProteinChainsAminoAcidCounter.OrderBy(a => a.ProteinId).ThenBy(b => b.ChainId).ToList(); spreadsheetTaskResult.InteractionChainsAminoAcidCounter = spreadsheetTaskResult.InteractionChainsAminoAcidCounter.OrderBy(a => a.ProteinId).ThenBy(b => b.ChainId).ToList(); AminoAcidChainComposition wholeProteinChainsTotals = AminoAcidChainComposition.TotalFromAminoAcidChainCompositionList(spreadsheetTaskResult.WholeProteinChainsAminoAcidCounter); AminoAcidChainComposition interactionChainsTotals = AminoAcidChainComposition.TotalFromAminoAcidChainCompositionList(spreadsheetTaskResult.InteractionChainsAminoAcidCounter); AminoAcidPairCompositionMatrix wholeProteinAminoAcidToAminoAcidCounter1X1 = AminoAcidChainComposition.ConvertToMatrix(wholeProteinChainsTotals); var results = new List <List <SpreadsheetCell[]> >(); { /* start test */ var spreadsheet1 = new List <SpreadsheetCell[]>(); spreadsheet1.Add(new[] { new SpreadsheetCell("%batch_number%%batch_letter% TEST SHEET 0"), }); spreadsheet1.Add(new[] { new SpreadsheetCell("TEST SHEET 0"), }); foreach (AminoAcidChainComposition item in spreadsheetTaskResult.WholeProteinChainsAminoAcidCounter) { //spreadsheet1.Add(item.ProteinId); //spreadsheet1.Add(item.ChainId); spreadsheet1.Add(item.SpreadsheetDataRow()); } results.Add(spreadsheet1); spreadsheet1 = null; /* end test */ } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// { var spreadsheet2 = new List <SpreadsheetCell[]>(); spreadsheet2.Add(new[] { new SpreadsheetCell("%batch_number%%batch_letter% L Interaction Symmetry"), }); spreadsheet2.Add(new[] { new SpreadsheetCell("Homodimers - List - Interaction Count And Interaction Match Percentage (Symmetry Measurement)") }); spreadsheet2.Add(InteractionMatchPercentage.SpreadsheetColumnHeadersRow()); var range2 = spreadsheetTaskResult.InteractionMatchPercentageList.Select(record => record.SpreadsheetDataRow()).ToList(); //range2.Sort(); range2 = range2 .OrderBy(a => a[0].CellData) .ThenBy(a => a[1].CellData) .ThenBy(a => a[2].CellData) .ThenBy(a => a[3].CellData) .ThenBy(a => a[4].CellData) .ThenBy(a => a[5].CellData) .ThenBy(a => a[6].CellData) .ThenBy(a => a[7].CellData) .ThenBy(a => a[8].CellData) .ToList(); spreadsheet2.AddRange(range2); range2 = null; results.Add(spreadsheet2); var spreadsheetHistogram2 = new List <SpreadsheetCell[]>(); spreadsheetHistogram2.Add(new[] { new SpreadsheetCell("%batch_number%%batch_letter% HG Interaction Symmetry"), }); spreadsheetHistogram2.Add(new[] { new SpreadsheetCell("Homodimers - List - Interaction Count And Interaction Match Percentage (Symmetry Measurement) Histogram") }); spreadsheetHistogram2.AddRange(Histogram.MatrixToHistogram(spreadsheet2.ToArray(), Histogram.MakeBinDecimals(0, 100, 9, 10), new[] { 6, 7, 8 }, 2, -1, true)); results.Add(spreadsheetHistogram2); spreadsheet2 = null; spreadsheetHistogram2 = null; } // { var spreadsheet3 = new List <SpreadsheetCell[]>(); spreadsheet3.Add(new[] { new SpreadsheetCell("%batch_number%%batch_letter% L Interaction Records"), }); spreadsheet3.Add(new[] { new SpreadsheetCell("Homodimers - List - Protein Interaction Record"), }); spreadsheet3.Add(ProteinInteractionRecord.TsvColumnHeadersRow()); var range3 = spreadsheetTaskResult.InteractionRecordList.Select(record => record.SpreadsheetDataRow()).ToList(); //range3.Sort(); range3 = range3 .OrderBy(a => a[0].CellData) .ThenBy(a => a[1].CellData) .ThenBy(a => a[3].CellData) .ThenBy(a => a[5].CellData) .ThenBy(a => a[13].CellData) .ThenBy(a => a[15].CellData) .ToList(); spreadsheet3.AddRange(range3); range3 = null; results.Add(spreadsheet3); var spreadsheetHistogram3 = new List <SpreadsheetCell[]>(); spreadsheetHistogram3.Add(new[] { new SpreadsheetCell("%batch_number%%batch_letter% L Interaction Records Histogram"), }); spreadsheetHistogram3.Add(new[] { new SpreadsheetCell("Homodimers - List - Protein Interaction Record - Histogram"), }); spreadsheetHistogram3.AddRange(Histogram.MatrixToHistogram(spreadsheet3.ToArray(), Histogram.MakeBinDecimals(0m, 5m, 0m, 0.05m), new[] { 1 }, 2, -1, true)); results.Add(spreadsheetHistogram3); //spreadsheet3 = Histogram.InsertMatrixOverwrite(spreadsheet3.ToArray(), histogram3, 2, Histogram.MaxColumns(spreadsheet3.ToArray()) + 1).ToList(); spreadsheet3 = null; spreadsheetHistogram3 = null; } // { var spreadsheet4 = new List <SpreadsheetCell[]>(); spreadsheet4.Add(new[] { new SpreadsheetCell("%batch_number%%batch_letter% L Interaction Count - A-Z"), }); spreadsheet4.Add(new[] { new SpreadsheetCell("Homodimers - List - Protein Amino Acid Count - Interactions - A to Z"), }); spreadsheet4.Add(AminoAcidChainComposition.SpreadsheetTitleRow()); var range4 = spreadsheetTaskResult.InteractionChainsAminoAcidCounter.Select(record => record.SpreadsheetDataRow()).ToList(); //range4.Sort(); range4 = range4 .OrderBy(a => a[0].CellData) .ThenBy(a => a[1].CellData) .ToList(); spreadsheet4.AddRange(range4); range4 = null; spreadsheet4.Add(interactionChainsTotals.SpreadsheetDataRow()); results.Add(spreadsheet4); spreadsheet4 = null; } // { foreach (AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups enumAminoAcidGroups in Enum.GetValues(typeof(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups))) { var spreadsheet5 = new List <SpreadsheetCell[]>(); spreadsheet5.Add(new[] { new SpreadsheetCell("%batch_number%%batch_letter% L Interaction Count - Groups " + enumAminoAcidGroups), }); spreadsheet5.Add(new[] { new SpreadsheetCell("Homodimers - List - Protein Amino Acid Count - Interactions - Acid Groups " + enumAminoAcidGroups), }); spreadsheet5.Add(AminoAcidChainComposition.SpreadsheetGroupsTitleRow(enumAminoAcidGroups)); var range5 = spreadsheetTaskResult.InteractionChainsAminoAcidCounter.Select(record => record.SpreadsheetGroupsDataRow(enumAminoAcidGroups)).ToList(); //range4.Sort(); range5 = range5 .OrderBy(a => a[0].CellData) .ThenBy(a => a[1].CellData) .ToList(); spreadsheet5.AddRange(range5); range5 = null; spreadsheet5.Add(interactionChainsTotals.SpreadsheetGroupsDataRow(enumAminoAcidGroups)); results.Add(spreadsheet5); spreadsheet5 = null; } } // { var spreadsheet6 = new List <SpreadsheetCell[]>(); spreadsheet6.Add(new[] { new SpreadsheetCell("%batch_number%%batch_letter% L Entire Count - A-Z"), }); spreadsheet6.Add(new[] { new SpreadsheetCell("Homodimers - List - Protein Amino Acid Count - All Atoms - A to Z"), }); spreadsheet6.Add(AminoAcidChainComposition.SpreadsheetTitleRow()); var range6 = spreadsheetTaskResult.WholeProteinChainsAminoAcidCounter.Select(record => record.SpreadsheetDataRow()).ToList(); //range6.Sort(); range6 = range6 .OrderBy(a => a[0].CellData) .ThenBy(a => a[1].CellData) .ToList(); spreadsheet6.AddRange(range6); range6 = null; spreadsheet6.Add(wholeProteinChainsTotals.SpreadsheetDataRow()); results.Add(spreadsheet6); var spreadsheetHistogram6 = new List <SpreadsheetCell[]>(); spreadsheetHistogram6.Add(new[] { new SpreadsheetCell("%batch_number%%batch_letter% L Entire Count - A-Z - Historgram"), }); spreadsheetHistogram6.Add(new[] { new SpreadsheetCell("Homodimers - List - Protein Amino Acid Count - All Atoms - A to Z - Histogram"), }); spreadsheetHistogram6.AddRange(Histogram.MatrixToHistogram(spreadsheet6.ToArray(), Histogram.MakeBinDecimals(0, 10500, 0, 500), new[] { 28 }, 2, -1, true)); spreadsheetHistogram6.Add(new [] { new SpreadsheetCell(""), }); spreadsheetHistogram6.AddRange(Histogram.MatrixToHistogram(spreadsheet6.ToArray(), Histogram.MakeBinDecimals(0, 1000, 0, 100), new[] { 28 }, 2, -1, true)); results.Add(spreadsheetHistogram6); spreadsheet6 = null; spreadsheetHistogram6 = null; } // { foreach (AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups enumAminoAcidGroups in Enum.GetValues(typeof(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups))) { var spreadsheet7 = new List <SpreadsheetCell[]>(); spreadsheet7.Add(new[] { new SpreadsheetCell("%batch_number%%batch_letter% L Entire Count - Groups " + enumAminoAcidGroups), }); spreadsheet7.Add(new[] { new SpreadsheetCell("Homodimers - List - Protein Amino Acid Count - All Atoms - Acid Groups " + enumAminoAcidGroups), }); spreadsheet7.Add(AminoAcidChainComposition.SpreadsheetGroupsTitleRow(enumAminoAcidGroups)); var range7 = spreadsheetTaskResult.WholeProteinChainsAminoAcidCounter.Select(record => record.SpreadsheetGroupsDataRow(enumAminoAcidGroups)).ToList(); //range7.Sort(); range7 = range7 .OrderBy(a => a[0].CellData) .ThenBy(a => a[1].CellData) .ToList(); spreadsheet7.AddRange(range7); range7 = null; spreadsheet7.Add(wholeProteinChainsTotals.SpreadsheetGroupsDataRow(enumAminoAcidGroups)); results.Add(spreadsheet7); spreadsheet7 = null; } } // convert to percentage for creating mean average protein composition var meanProteinComposition = new AminoAcidChainComposition("Mean Composition", "-"); foreach (AminoAcidChainComposition aminoAcidChainComposition in spreadsheetTaskResult.WholeProteinChainsAminoAcidCounter) { // get percentage for row AminoAcidChainComposition percentage = AminoAcidChainComposition.ConvertToPercentage(aminoAcidChainComposition); // add percentage to overall tally foreach (AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups enumAminoAcidGroups in Enum.GetValues(typeof(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups))) { for (int x = 0; x < AminoAcidGroups.AminoAcidGroups.GetTotalSubgroups(enumAminoAcidGroups); x++) { meanProteinComposition.AminoAcidGroupsCount[(int)enumAminoAcidGroups][x] += (percentage.AminoAcidGroupsCount[(int)enumAminoAcidGroups][x] / spreadsheetTaskResult.WholeProteinChainsAminoAcidCounter.Count); } } } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// { /* start test */ var spreadsheet8 = new List <SpreadsheetCell[]>(); spreadsheet8.Add(new[] { new SpreadsheetCell("%batch_number%%batch_letter% TEST SHEET 1"), }); // Worksheet name. spreadsheet8.Add(new[] { new SpreadsheetCell("TEST SHEET 1"), }); // Spreadsheet title spreadsheet8.Add(new[] { new SpreadsheetCell(string.Empty), }); spreadsheet8.Add(meanProteinComposition.SpreadsheetDataRow()); foreach (AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups enumAminoAcidGroups in Enum.GetValues(typeof(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups))) { spreadsheet8.Add(meanProteinComposition.SpreadsheetGroupsDataRow(enumAminoAcidGroups)); } results.Add(spreadsheet8); spreadsheet8 = null; /* end test */ } AminoAcidPairCompositionMatrix meanProteinMatrix = AminoAcidChainComposition.ConvertToMatrix(meanProteinComposition); { var spreadsheet9 = new List <SpreadsheetCell[]>(); spreadsheet9.Add(new[] { new SpreadsheetCell("%batch_number%%batch_letter% HM All Atoms 3x3"), }); // Worksheet name. spreadsheet9.Add(new[] { new SpreadsheetCell("Homodimers - Amino Acid Heat Map - All Atoms - Average Chain Composition"), }); // Spreadsheet title. //spreadsheet9.Add(new[] { new SpreadsheetCell(string.Empty), }); //spreadsheet9.Add(new[] { new SpreadsheetCell("Homodimers - Amino Acid Heat Map - All Atoms - Average Chain Composition - Percentage Composition - A to Z"), }); // Section title. //spreadsheet9.AddRange(meanProteinMatrix.SpreadsheetAminoAcidColorGroupsHeatMap()); foreach (AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups enumAminoAcidGroups in Enum.GetValues(typeof(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups))) { spreadsheet9.Add(new[] { new SpreadsheetCell(string.Empty), }); spreadsheet9.Add(new[] { new SpreadsheetCell("Homodimers - Amino Acid Heat Map - All Atoms - Average Chain Composition - Percentage Composition - Acid Groups " + enumAminoAcidGroups), }); // Section title. spreadsheet9.AddRange(meanProteinMatrix.SpreadsheetAminoAcidColorGroupsHeatMap(enumAminoAcidGroups)); } results.Add(spreadsheet9); spreadsheet9 = null; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //if (outputAllAtoms1x1) //{ AminoAcidPairCompositionMatrix wholeProteinAminoAcidToAminoAcidCounterPercentage1X1 = AminoAcidPairCompositionMatrix.CalculatePercentageMatrix(wholeProteinAminoAcidToAminoAcidCounter1X1); { var spreadsheet10 = new List <SpreadsheetCell[]>(); spreadsheet10.Add(new[] { new SpreadsheetCell("%batch_number%%batch_letter% HM All Atoms 1x1") }); // Worksheet name. spreadsheet10.Add(new[] { new SpreadsheetCell("Homodimers - Amino Acid Heat Map - All Atoms - Overall Composition") }); // Spreadsheet title. //spreadsheet10.Add(new[] { new SpreadsheetCell(string.Empty)}); //spreadsheet10.Add(new[] { new SpreadsheetCell("Homodimers - Amino Acid Heat Map - All Atoms - Overall Percentage Composition - A to Z")}); // Section title. //spreadsheet10.AddRange(wholeProteinAminoAcidToAminoAcidCounterPercentage1X1.SpreadsheetAminoAcidColorGroupsHeatMap()); foreach (AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups enumAminoAcidGroups in Enum.GetValues(typeof(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups))) { spreadsheet10.Add(new[] { new SpreadsheetCell(string.Empty) }); spreadsheet10.Add(new[] { new SpreadsheetCell("Homodimers - Amino Acid Heat Map - All Atoms - Overall Percentage Composition - Acid Groups " + enumAminoAcidGroups) }); // Section title. spreadsheet10.AddRange(wholeProteinAminoAcidToAminoAcidCounterPercentage1X1.SpreadsheetAminoAcidColorGroupsHeatMap(enumAminoAcidGroups)); } AminoAcidPairCompositionMatrix wholeProteinAminoAcidToAminoAcidCounterNormalised1X1 = AminoAcidPairCompositionMatrix.NormalizeWithCompositionMatrix(wholeProteinAminoAcidToAminoAcidCounterPercentage1X1, UniProtProteinDatabaseComposition.AminoAcidCompositionAsMatrix()); //spreadsheet10.Add(new[] { new SpreadsheetCell(string.Empty)}); //spreadsheet10.Add(new[] { new SpreadsheetCell("Homodimers - Amino Acid Heat Map - All Atoms - Overall UniProt Normalised - A to Z ")}); // Section title. //spreadsheet10.AddRange(wholeProteinAminoAcidToAminoAcidCounterNormalised1X1.SpreadsheetAminoAcidColorGroupsHeatMap()); foreach (AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups enumAminoAcidGroups in Enum.GetValues(typeof(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups))) { spreadsheet10.Add(new[] { new SpreadsheetCell(string.Empty) }); spreadsheet10.Add(new[] { new SpreadsheetCell("Homodimers - Amino Acid Heat Map - All Atoms - Overall UniProt Normalised - Acid Groups " + enumAminoAcidGroups) }); // Section title. spreadsheet10.AddRange(wholeProteinAminoAcidToAminoAcidCounterNormalised1X1.SpreadsheetAminoAcidColorGroupsHeatMap(enumAminoAcidGroups)); } AminoAcidPairCompositionMatrix wholeProteinAminoAcidToAminoAcidCounterDifference1X1 = AminoAcidPairCompositionMatrix.DifferenceWithCompositionMatrix(wholeProteinAminoAcidToAminoAcidCounterPercentage1X1, UniProtProteinDatabaseComposition.AminoAcidCompositionAsMatrix()); //spreadsheet10.Add(new[] { new SpreadsheetCell(string.Empty)}); //spreadsheet10.Add(new[] { new SpreadsheetCell("Homodimers - Amino Acid Heat Map - All Atoms - Overall A to Z - UniProt Difference")}); // Section title. //spreadsheet10.AddRange(wholeProteinAminoAcidToAminoAcidCounterDifference1X1.SpreadsheetAminoAcidColorGroupsHeatMap()); foreach (AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups enumAminoAcidGroups in Enum.GetValues(typeof(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups))) { spreadsheet10.Add(new[] { new SpreadsheetCell(string.Empty) }); spreadsheet10.Add(new[] { new SpreadsheetCell("Homodimers - Amino Acid Heat Map - All Atoms - Overall Acid Groups " + enumAminoAcidGroups + " - UniProt Difference") }); // Section title. spreadsheet10.AddRange(wholeProteinAminoAcidToAminoAcidCounterDifference1X1.SpreadsheetAminoAcidColorGroupsHeatMap(enumAminoAcidGroups)); } results.Add(spreadsheet10); spreadsheet10 = null; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// { AminoAcidPairCompositionMatrix interactionsAminoAcidToAminoAcidCounterPercentage = AminoAcidPairCompositionMatrix.CalculatePercentageMatrix(spreadsheetTaskResult.InteractionsAminoAcidToAminoAcidCounter); var spreadsheet11 = new List <SpreadsheetCell[]>(); spreadsheet11.Add(new[] { new SpreadsheetCell("%batch_number%%batch_letter% HM Interactions Only") }); // Worksheet name. spreadsheet11.Add(new[] { new SpreadsheetCell("Homodimers - Amino Acid Heat Map - Interactions Only") }); // Spreadsheet title. //spreadsheet11.Add(new[] { new SpreadsheetCell(string.Empty)}); //spreadsheet11.Add(new[] { new SpreadsheetCell("Homodimers - Amino Acid Heat Map - Interactions Only - A to Z")}); // Section title. //spreadsheet11.AddRange(spreadsheetTaskResult.InteractionsAminoAcidToAminoAcidCounter.SpreadsheetAminoAcidColorGroupsHeatMap()); foreach (AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups enumAminoAcidGroups in Enum.GetValues(typeof(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups))) { spreadsheet11.Add(new[] { new SpreadsheetCell(string.Empty) }); spreadsheet11.Add(new[] { new SpreadsheetCell("Homodimers - Amino Acid Heat Map - Interactions Only - Acid Groups " + enumAminoAcidGroups) }); // Section title. spreadsheet11.AddRange(spreadsheetTaskResult.InteractionsAminoAcidToAminoAcidCounter.SpreadsheetAminoAcidColorGroupsHeatMap(enumAminoAcidGroups)); } AminoAcidPairCompositionMatrix interactionsAminoAcidToAminoAcidCounterNormalised = AminoAcidPairCompositionMatrix.NormalizeWithCompositionMatrix(interactionsAminoAcidToAminoAcidCounterPercentage, UniProtProteinDatabaseComposition.AminoAcidCompositionAsMatrix()); //spreadsheet11.Add(new[] { new SpreadsheetCell(string.Empty)}); //spreadsheet11.Add(new[] { new SpreadsheetCell("Homodimers - Amino Acid Heat Map - Interactions Only - A to Z - UniProt Normalised")}); // Section title. //spreadsheet11.AddRange(interactionsAminoAcidToAminoAcidCounterNormalised.SpreadsheetAminoAcidColorGroupsHeatMap()); foreach (AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups enumAminoAcidGroups in Enum.GetValues(typeof(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups))) { spreadsheet11.Add(new[] { new SpreadsheetCell(string.Empty) }); spreadsheet11.Add(new[] { new SpreadsheetCell("Homodimers - Amino Acid Heat Map - Interactions Only - Acid Groups " + enumAminoAcidGroups + " - UniProt Normalised") }); // Section title. spreadsheet11.AddRange(interactionsAminoAcidToAminoAcidCounterNormalised.SpreadsheetAminoAcidColorGroupsHeatMap(enumAminoAcidGroups)); } AminoAcidPairCompositionMatrix interactionsAminoAcidToAminoAcidCounterDifference = AminoAcidPairCompositionMatrix.DifferenceWithCompositionMatrix(interactionsAminoAcidToAminoAcidCounterPercentage, UniProtProteinDatabaseComposition.AminoAcidCompositionAsMatrix()); //spreadsheet11.Add(new[] { new SpreadsheetCell(string.Empty)}); //spreadsheet11.Add(new[] { new SpreadsheetCell("Homodimers - Amino Acid Heat Map - Interactions Only - A to Z - UniProt Difference")}); // Section title. //spreadsheet11.AddRange(interactionsAminoAcidToAminoAcidCounterDifference.SpreadsheetAminoAcidColorGroupsHeatMap()); foreach (AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups enumAminoAcidGroups in Enum.GetValues(typeof(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups))) { spreadsheet11.Add(new[] { new SpreadsheetCell(string.Empty) }); spreadsheet11.Add(new[] { new SpreadsheetCell("Homodimers - Amino Acid Heat Map - Interactions Only - Acid Groups " + enumAminoAcidGroups + " - UniProt Difference") }); // Section title. spreadsheet11.AddRange(interactionsAminoAcidToAminoAcidCounterDifference.SpreadsheetAminoAcidColorGroupsHeatMap(enumAminoAcidGroups)); } results.Add(spreadsheet11); spreadsheet11 = null; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// { var spreadsheet12 = new List <SpreadsheetCell[]>(); spreadsheet12.Add(new[] { new SpreadsheetCell("%batch_number%%batch_letter% HM Interactions v Homodimers") }); // Worksheet name. spreadsheet12.Add(new[] { new SpreadsheetCell("Homodimers - Amino Acid Heat Map - Difference between homodimer composition and homodimer interactions") }); // Spreadsheet title spreadsheet12.Add(new[] { new SpreadsheetCell(string.Empty) }); //spreadsheet12.Add(new[] { new SpreadsheetCell("Homodimers - Amino Acid Heat Map - Difference between homodimer composition and homodimer interactions - A to Z")}); // Section title //spreadsheet12.AddRange(AminoAcidPairCompositionMatrix.DifferenceWithCompositionMatrix(wholeProteinAminoAcidToAminoAcidCounterPercentage1X1, spreadsheetTaskResult.InteractionsAminoAcidToAminoAcidCounter).SpreadsheetAminoAcidColorGroupsHeatMap()); //spreadsheet12.Add(new[] { new SpreadsheetCell(string.Empty)}); foreach (AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups enumAminoAcidGroups in Enum.GetValues(typeof(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups))) { spreadsheet12.Add(new[] { new SpreadsheetCell("Homodimers - Amino Acid Heat Map - Difference between homodimer composition and homodimer interactions - Acid Groups " + enumAminoAcidGroups) }); // Section title. spreadsheet12.AddRange(AminoAcidPairCompositionMatrix.DifferenceWithCompositionMatrix(wholeProteinAminoAcidToAminoAcidCounterPercentage1X1, spreadsheetTaskResult.InteractionsAminoAcidToAminoAcidCounter).SpreadsheetAminoAcidColorGroupsHeatMap(enumAminoAcidGroups)); spreadsheet12.Add(new[] { new SpreadsheetCell(string.Empty) }); } results.Add(spreadsheet12); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// return(results); }
/// <summary> /// This method loads 1 pdb file and returns the atoms contained in the different chains. /// </summary> /// <param name="pdbFilename"></param> /// <param name="chainIdWhiteList"></param> /// <param name="minimumChains"></param> /// <param name="maximumChains"></param> /// <returns></returns> public static ProteinChainListContainer PdbAtomicChains(string pdbFilename, string[] chainIdWhiteList, int minimumChains = 2, int maximumChains = 2, bool onlyCarbonAlphas = false) { ////////Console.WriteLine(pdbFilename); // Check file exists. if (!File.Exists(pdbFilename)) { //return null; throw new FileNotFoundException("File not found", pdbFilename); } // Check min chains not more than max chains. if (minimumChains > maximumChains) { throw new ArgumentOutOfRangeException(nameof(minimumChains)); } // Load pdb/protein file, excluding all records but ATOM, HETATM and TER. var proteinDataBankFile = new ProteinDataBankFile(pdbFilename, new[] { ATOM_Record.ATOM_Field.FieldName, HETATM_Record.HETATM_Field.FieldName, TER_Record.TER_Field.FieldName, MODEL_Record.MODEL_Field.FieldName, ENDMDL_Record.ENDMDL_Field.FieldName }); // Make new array for atom chain. //List<ATOM_Record>[] proteinFileChains = new List<ATOM_Record>[maximumChains]; var pdbFileChains = new ProteinChainListContainer(); //var fileError = false; //var chainCount = 0; // Loop through all the previously loaded protein file records to make lists of atoms in each chain. // Also make a list of residue numbers (which will be sorted later just in case it is out of order). var atomRecordListDictionary = new Dictionary <string, List <ProteinDataBankFileRecord> >(); var hetAtomRecordListDictionary = new Dictionary <string, List <ProteinDataBankFileRecord> >(); int terCount = 0; for (int proteinDataBankFileRecordIndex = 0; proteinDataBankFileRecordIndex < proteinDataBankFile.Count; proteinDataBankFileRecordIndex++) { ProteinDataBankFileRecord currentRecord = proteinDataBankFile.NextRecord(); if (currentRecord == null) { continue; } if (currentRecord.GetType() == typeof(ATOM_Record)) { var atom = (ATOM_Record)currentRecord; if (onlyCarbonAlphas && atom.name.FieldValue.Trim().ToUpperInvariant() != StaticValues.CarbonAlpha) { continue; } string chainIdKey = atom.chainID.FieldValue.Trim().ToUpperInvariant(); if (chainIdWhiteList != null && !chainIdWhiteList.Contains(chainIdKey)) { continue; } if (!atomRecordListDictionary.ContainsKey(chainIdKey)) { atomRecordListDictionary.Add(chainIdKey, new List <ProteinDataBankFileRecord>()); } if (ParameterValidation.IsAminoAcidCodeValid(atom.resName.FieldValue)) { atomRecordListDictionary[chainIdKey].Add(atom); } } else if (currentRecord.GetType() == typeof(HETATM_Record)) { var hetatm = (HETATM_Record)currentRecord; if (onlyCarbonAlphas && hetatm.name.FieldValue.Trim().ToUpperInvariant() != StaticValues.CarbonAlpha) { continue; } string chainIdKey = hetatm.chainID.FieldValue.Trim().ToUpperInvariant(); if (chainIdWhiteList != null && !chainIdWhiteList.Contains(chainIdKey)) { continue; } if (!hetAtomRecordListDictionary.ContainsKey(chainIdKey)) { hetAtomRecordListDictionary.Add(chainIdKey, new List <ProteinDataBankFileRecord>()); } //if (!ParameterValidation.IsAminoAcidCodeValid(hetatm.resName.FieldValue)) //{ // ////////Console.WriteLine(hetatm.columnFormatLine); // hetatm.resName.FieldValue = UnspecifiedOrUnknownAminoAcid.Code3L; // hetatm.columnFormatLine = hetatm.columnFormatLine.Remove(ProteinDataBankFile.HETATM_Record.resName_Field.FirstColumn - 1, (ProteinDataBankFile.HETATM_Record.resName_Field.LastColumn - ProteinDataBankFile.HETATM_Record.resName_Field.FirstColumn) + 1); // hetatm.columnFormatLine = hetatm.columnFormatLine.Insert(ProteinDataBankFile.HETATM_Record.resName_Field.FirstColumn - 1, UnspecifiedOrUnknownAminoAcid.Code3L); // ////////Console.WriteLine(hetatm.columnFormatLine); //} if (ParameterValidation.IsAminoAcidCodeValid(hetatm.resName.FieldValue)) { hetAtomRecordListDictionary[chainIdKey].Add(hetatm); } } else if (currentRecord.GetType() == typeof(TER_Record)) { var ter = (TER_Record)currentRecord; string chainIdKey = ter.chainID.FieldValue.Trim().ToUpperInvariant(); if (chainIdWhiteList != null && !chainIdWhiteList.Contains(chainIdKey)) { continue; } terCount++; if (terCount >= maximumChains) { break; //return null; } } else if (currentRecord.GetType() == typeof(ENDMDL_Record)) { break; } } // file has been parsed so clear used file data from memory as soon as possible proteinDataBankFile.UnloadFile(); int totalChains = atomRecordListDictionary.Count > hetAtomRecordListDictionary.Count ? atomRecordListDictionary.Count : hetAtomRecordListDictionary.Count; for (int chainIndex = 0; chainIndex < totalChains; chainIndex++) { pdbFileChains.ChainList.Add(new ProteinAtomListContainer()); } atomRecordListDictionary = atomRecordListDictionary.OrderBy(a => a.Key).ToDictionary(a => a.Key, a => a.Value); int chainIndex2 = -1; foreach (var atomRecordListKvp in atomRecordListDictionary) { chainIndex2++; string chainName = atomRecordListKvp.Key; List <ProteinDataBankFileRecord> chainRecords = atomRecordListKvp.Value; if (chainRecords == null || chainRecords.Count == 0) { continue; } chainRecords = chainRecords.OrderBy(a => NullableTryParseInt32(((ATOM_Record)a).serial.FieldValue)).ToList(); pdbFileChains.ChainList[chainIndex2].AtomList = chainRecords.Select(a => (ATOM_Record)a).ToList(); } hetAtomRecordListDictionary = hetAtomRecordListDictionary.OrderBy(a => a.Key).ToDictionary(a => a.Key, a => a.Value); int chainIndex3 = -1; foreach (var hetAtomRecordListKvp in hetAtomRecordListDictionary) { chainIndex3++; string chainName = hetAtomRecordListKvp.Key; List <ProteinDataBankFileRecord> chainRecords = hetAtomRecordListKvp.Value; if (chainRecords == null || chainRecords.Count == 0) { continue; } chainRecords = chainRecords.OrderBy(a => NullableTryParseInt32(((HETATM_Record)a).serial.FieldValue)).ToList(); foreach (ProteinDataBankFileRecord proteinDataBankFileRecord in chainRecords) { var chainRecord = (HETATM_Record)proteinDataBankFileRecord; string residueSequenceToFind = chainRecord.resSeq.FieldValue; string atomChainId = chainRecord.chainID.FieldValue.Trim().ToUpperInvariant(); if (!atomRecordListDictionary.ContainsKey(atomChainId) || atomRecordListDictionary[atomChainId].Count(a => ((ATOM_Record)a).resSeq.FieldValue == residueSequenceToFind) == 0) { ATOM_Record atom = ConvertHetatmRecordToAtomRecord(chainRecord); pdbFileChains.ChainList[chainIndex3].AtomList.Add(atom); } } } int nonEmptyChainCount = pdbFileChains.ChainList.Count(a => a != null && a.AtomList != null && a.AtomList.Count > 0); if (nonEmptyChainCount >= minimumChains && nonEmptyChainCount <= maximumChains) { return(pdbFileChains); } ////////Console.WriteLine("Too many chains (" + nonEmptyChainCount + "): " + pdbFilename); return(null); }