/// <summary> /// Perform sequence alignment on the chains of each protein to see if it is a homodimer. /// </summary> /// <returns></returns> public static DimerSequenceTypeCategories <ISequence> SplitDimerTypes(CancellationToken cancellationToken, List <ISequence> sequences, decimal minimumHeterodimerSimilarityRequired = 30.0m, decimal minimumHomodimerSimiliarityRequired = 90.0m, ProgressActionSet progressActionSet = null, int totalThreads = -1) { if (sequences == null || sequences.Count == 0) { throw new ArgumentOutOfRangeException(nameof(sequences)); } if (progressActionSet == null) { throw new ArgumentNullException(nameof(progressActionSet)); } var splitDimers = SplitDimersHomoHetero(cancellationToken, sequences, minimumHeterodimerSimilarityRequired, minimumHomodimerSimiliarityRequired, progressActionSet, totalThreads); var heterodimers = new List <ISequence>(sequences); var homodimers = new List <ISequence>(sequences); var homologydimers = new List <ISequence>(sequences); heterodimers = RemoveSequences(cancellationToken, heterodimers, splitDimers.HeteroDimerPdbIdList, RemoveSequencesOptions.RemoveSequencesNotInList); homodimers = RemoveSequences(cancellationToken, homodimers, splitDimers.HomoDimerPdbIdList, RemoveSequencesOptions.RemoveSequencesNotInList); homologydimers = RemoveSequences(cancellationToken, homologydimers, splitDimers.HomologyDimerPdbIdList, RemoveSequencesOptions.RemoveSequencesNotInList); var result = new DimerSequenceTypeCategories <ISequence>() { HomoDimerPdbIdList = homodimers, HeteroDimerPdbIdList = heterodimers, HomologyDimerPdbIdList = homologydimers, }; return(result); }
/// <summary> /// Perform sequence alignment on the chains of each protein to see if it is a homodimer or heterodimer /// </summary> /// <returns></returns> public static DimerSequenceTypeCategories <string> SplitDimersHomoHetero(CancellationToken cancellationToken, List <ISequence> sequences, decimal minimumHeterodimerSimilarityRequired = 30.0m, decimal minimumHomodimerSimiliarityRequired = 90.0m, ProgressActionSet progressActionSet = null, int totalThreads = -1) { if (sequences == null || sequences.Count == 0) { throw new ArgumentOutOfRangeException(nameof(sequences)); } if (progressActionSet == null) { throw new ArgumentNullException(nameof(progressActionSet)); } var workDivision = new WorkDivision <DimerSequenceTypeCategories <string> >(sequences.Count, totalThreads); ProgressActionSet.StartAction(sequences.Count, progressActionSet); for (int threadIndex = 0; threadIndex < workDivision.ThreadCount; threadIndex++) { int localThreadIndex = threadIndex; Task <DimerSequenceTypeCategories <string> > task = Task.Run(() => { var taskResult = new DimerSequenceTypeCategories <string>(); for (int sequencesIndexA = workDivision.ThreadFirstIndex[localThreadIndex]; sequencesIndexA <= workDivision.ThreadLastIndex[localThreadIndex]; sequencesIndexA++) { if (cancellationToken.IsCancellationRequested) { break; } string proteinIdA = SequenceIdSplit.SequenceIdToPdbIdAndChainId(sequences[sequencesIndexA].ID).PdbId; for (int sequencesIndexB = 0; sequencesIndexB < sequences.Count; sequencesIndexB++) { if (cancellationToken.IsCancellationRequested) { break; } // Don't align the same sequence index. Skip calculating indexes already calculated. Perform alignment operation if protein id is the same. var proteinIdB = SequenceIdSplit.SequenceIdToPdbIdAndChainId(sequences[sequencesIndexB].ID).PdbId; if (sequencesIndexA == sequencesIndexB || sequencesIndexB < sequencesIndexA || proteinIdA != proteinIdB) { continue; } var dimerType = FindDimerType(sequences[sequencesIndexA], sequences[sequencesIndexB], minimumHeterodimerSimilarityRequired, minimumHomodimerSimiliarityRequired); if (dimerType == DimerType.HeteroDimer) { taskResult.HeteroDimerPdbIdList.Add(proteinIdA); } else if (dimerType == DimerType.HomoDimer) { taskResult.HomoDimerPdbIdList.Add(proteinIdA); } else if (dimerType == DimerType.HomologyDimer) { taskResult.HomologyDimerPdbIdList.Add(proteinIdA); } } workDivision.IncrementItemsCompleted(1); ProgressActionSet.ProgressAction(1, progressActionSet); ProgressActionSet.EstimatedTimeRemainingAction(workDivision.StartTicks, workDivision.ItemsCompleted, workDivision.ItemsToProcess, progressActionSet); } return(taskResult); }, cancellationToken); workDivision.TaskList.Add(task); } workDivision.WaitAllTasks(); var dimerSequenceTypeCategories = new DimerSequenceTypeCategories <string>(); foreach (var task in workDivision.TaskList.Where(t => t != null && t.IsCompleted && !t.IsFaulted && !t.IsCanceled && t.Result != null)) { dimerSequenceTypeCategories.HeteroDimerPdbIdList.AddRange(task.Result.HeteroDimerPdbIdList); dimerSequenceTypeCategories.HomoDimerPdbIdList.AddRange(task.Result.HomoDimerPdbIdList); dimerSequenceTypeCategories.HomologyDimerPdbIdList.AddRange(task.Result.HomologyDimerPdbIdList); } dimerSequenceTypeCategories.HeteroDimerPdbIdList = dimerSequenceTypeCategories.HeteroDimerPdbIdList.Distinct().ToList(); dimerSequenceTypeCategories.HomoDimerPdbIdList = dimerSequenceTypeCategories.HomoDimerPdbIdList.Distinct().ToList(); dimerSequenceTypeCategories.HomologyDimerPdbIdList = dimerSequenceTypeCategories.HomologyDimerPdbIdList.Distinct().ToList(); return(dimerSequenceTypeCategories); }