public PairwiseIBDRegion GetCopy() { PairwiseIBDRegion newPairwiseRegion = new PairwiseIBDRegion(this.firstIndividualID, this.secondIndividualID, this.start, this.end, this.lastBlockID, this.firstBlockID); newPairwiseRegion.haplotype = (ArrayList)(this.haplotype).Clone(); return(newPairwiseRegion); }
private bool CombineAdjacentPairwiseRegion(PairwiseIBDRegion firstRegion, PairwiseIBDRegion secondRegion, Individual thatIndividual) { if (firstRegion.GetLastBlockID() + 2 < secondRegion.GetFirstBlockID()) { return(false); } else { int errorCount = 0; for (int i = firstRegion.GetEnd() + 1; i < secondRegion.GetStart(); i++) { if (Math.Abs(this.genotype[i] - thatIndividual.genotype[i]) == 2) { errorCount++; } } if (errorCount > 1) { return(false); } else { if (firstRegion.GetLastBlockID() + 2 == secondRegion.GetFirstBlockID()) { ArrayList errorBlockHaplotypeList = new ArrayList(); foreach (HaplotypePair newPair in ((ArrayList)(this.haplotype[firstRegion.GetLastBlockID() + 1]))) { errorBlockHaplotypeList.Add(newPair.firstHaplotypeID); errorBlockHaplotypeList.Add(newPair.secondHaplotypeID); } foreach (HaplotypePair newPair in ((ArrayList)(thatIndividual.haplotype[firstRegion.GetLastBlockID() + 1]))) { errorBlockHaplotypeList.Add(newPair.firstHaplotypeID); errorBlockHaplotypeList.Add(newPair.secondHaplotypeID); } errorBlockHaplotypeList.Sort(); for (int i = 1; i < errorBlockHaplotypeList.Count; i++) { if (((int)(errorBlockHaplotypeList[i])) == ((int)(errorBlockHaplotypeList[i - 1]))) { errorBlockHaplotypeList.RemoveAt(i); i--; } } firstRegion.AddHaplotype(errorBlockHaplotypeList); } for (int i = 0; i < secondRegion.GetHaplotypeLength(); i++) { firstRegion.AddHaplotype(secondRegion.GetHaplotype(i)); } firstRegion.SetLastBlockID(secondRegion.GetLastBlockID()); firstRegion.SetEnd(secondRegion.GetEnd()); return(true); // can be combined } } }
public void PairwiseComparison(ArrayList pairwiseIBDRegionList, Individual thatIndividual, Block[] blockList, MapData newMapData, double lengthCut, int startBlockID, int endBlockID) { PairwiseIBDRegion newPairwiseIBDRegion = new PairwiseIBDRegion(this.GetIndividualID(), thatIndividual.GetIndividualID(), 0, 0, -1, -1); for (int blockID = startBlockID; blockID < endBlockID; blockID++) //foreach block, from first to end. { if (newPairwiseIBDRegion.GetLastBlockID() == -1) // last IBD region has just added or deleted, need to construc a new one. { newPairwiseIBDRegion.SetStart(blockList[blockID].GetStartSnpID()); newPairwiseIBDRegion.SetEnd(blockList[blockID].GetEndSnpID()); newPairwiseIBDRegion.SetLastBlockID(blockID); newPairwiseIBDRegion.SetFirstBlockID(blockID); newPairwiseIBDRegion.ClearHaplotype(); } ArrayList sharedHaplotypeID = new ArrayList(); ArrayList thisHaplotypeList = new ArrayList(); //store all the possible haplotype for this individual in this block. ArrayList thatHaplotypeList = new ArrayList(); //store all the possible haplotype for that individual in this block. bool thisUnknownHaplotype = false; bool thatUnknownHaplotype = false; for (int i = 0; i < this.haplotype[blockID].Count; i++) //store all the possible haplotype for this individual in this block. { if (((HaplotypePair)this.haplotype[blockID][i]).firstHaplotypeID != -1) { thisHaplotypeList.Add(((HaplotypePair)this.haplotype[blockID][i]).firstHaplotypeID); } else { thisUnknownHaplotype = true; break; } if (((HaplotypePair)this.haplotype[blockID][i]).secondHaplotypeID != -1 && ((HaplotypePair)this.haplotype[blockID][i]).secondHaplotypeID != ((HaplotypePair)this.haplotype[blockID][i]).firstHaplotypeID) { thisHaplotypeList.Add(((HaplotypePair)this.haplotype[blockID][i]).secondHaplotypeID); } else { if (((HaplotypePair)this.haplotype[blockID][i]).secondHaplotypeID == -1) { thisUnknownHaplotype = true; } break; } } for (int i = 0; i < thatIndividual.haplotype[blockID].Count; i++)//store all the possible haplotype for that individual in this block. { if (((HaplotypePair)thatIndividual.haplotype[blockID][i]).firstHaplotypeID != -1) { thatHaplotypeList.Add(((HaplotypePair)thatIndividual.haplotype[blockID][i]).firstHaplotypeID); } else { thatUnknownHaplotype = true; break; } if (((HaplotypePair)thatIndividual.haplotype[blockID][i]).secondHaplotypeID != -1 && ((HaplotypePair)thatIndividual.haplotype[blockID][i]).secondHaplotypeID != ((HaplotypePair)thatIndividual.haplotype[blockID][i]).firstHaplotypeID) { thatHaplotypeList.Add(((HaplotypePair)thatIndividual.haplotype[blockID][i]).secondHaplotypeID); } else { if (((HaplotypePair)thatIndividual.haplotype[blockID][i]).secondHaplotypeID == -1) { thatUnknownHaplotype = true; } break; } } if (thisUnknownHaplotype && thatUnknownHaplotype) //found unknown haplotype in both sample { for (int s = 0; s < blockList[blockID].GetHaplotypeCount(); s++) { sharedHaplotypeID.Add(s); } } else if (thisUnknownHaplotype && !thatUnknownHaplotype) // only in this sample { foreach (int newId in thatHaplotypeList) { sharedHaplotypeID.Add(newId); } } else if ((!thisUnknownHaplotype) && thatUnknownHaplotype) // only that sample { foreach (int newId in thisHaplotypeList) { sharedHaplotypeID.Add(newId); } } else // normal case ,non unknown { thisHaplotypeList.Sort(); thatHaplotypeList.Sort(); int thisHaplotypeIndex = 0; int thatHaplotypeIndex = 0; while (thisHaplotypeIndex < thisHaplotypeList.Count && thatHaplotypeIndex < thatHaplotypeList.Count) { // store all the shared haplotype id in the sharedHaplotypeID list. if ((int)thisHaplotypeList[thisHaplotypeIndex] == (int)thatHaplotypeList[thatHaplotypeIndex]) { sharedHaplotypeID.Add(thisHaplotypeList[thisHaplotypeIndex]); thisHaplotypeIndex++; thatHaplotypeIndex++; } else if ((int)thisHaplotypeList[thisHaplotypeIndex] < (int)thatHaplotypeList[thatHaplotypeIndex]) { thisHaplotypeIndex++; } else { thatHaplotypeIndex++; } } } bool shareRecombinationRegion = true; if (sharedHaplotypeID.Count == 0) // no shared haplotype found! { if (newMapData.GetGeneticDistance(newPairwiseIBDRegion.GetEnd()) - newMapData.GetGeneticDistance(newPairwiseIBDRegion.GetStart()) >= lengthCut / 10) // only add the pairwise IBD region whose length is >= 2cm { if (newPairwiseIBDRegion.GetLastBlockID() - newPairwiseIBDRegion.GetFirstBlockID() != 0) { if (pairwiseIBDRegionList.Count == 0 || !this.CombineAdjacentPairwiseRegion((PairwiseIBDRegion)pairwiseIBDRegionList[pairwiseIBDRegionList.Count - 1], newPairwiseIBDRegion, thatIndividual)) { if (newMapData.GetGeneticDistance(newPairwiseIBDRegion.GetEnd()) - newMapData.GetGeneticDistance(newPairwiseIBDRegion.GetStart()) >= lengthCut) { pairwiseIBDRegionList.Add(newPairwiseIBDRegion.GetCopy()); } } } } newPairwiseIBDRegion.SetLastBlockID(-1); continue; } else { newPairwiseIBDRegion.SetEnd(blockList[blockID].GetEndSnpID()); newPairwiseIBDRegion.AddHaplotype(sharedHaplotypeID); newPairwiseIBDRegion.SetLastBlockID(blockID); if (blockID == blockList.Length - 1) { if (newMapData.GetGeneticDistance(newPairwiseIBDRegion.GetEnd()) - newMapData.GetGeneticDistance(newPairwiseIBDRegion.GetStart()) >= lengthCut) { if (pairwiseIBDRegionList.Count == 0 || !this.CombineAdjacentPairwiseRegion((PairwiseIBDRegion)pairwiseIBDRegionList[pairwiseIBDRegionList.Count - 1], newPairwiseIBDRegion, thatIndividual)) { if (newMapData.GetGeneticDistance(newPairwiseIBDRegion.GetEnd()) - newMapData.GetGeneticDistance(newPairwiseIBDRegion.GetStart()) >= lengthCut) { pairwiseIBDRegionList.Add(newPairwiseIBDRegion.GetCopy()); } } } return; } else { for (int i = blockList[blockID].GetEndSnpID() + 1; i < blockList[blockID + 1].GetStartSnpID(); i++) { if (Math.Abs(this.genotype[i] - thatIndividual.genotype[i]) == 2) { if (newMapData.GetGeneticDistance(newPairwiseIBDRegion.GetEnd()) - newMapData.GetGeneticDistance(newPairwiseIBDRegion.GetStart()) >= lengthCut) // only add the pairwise IBD region whose length is >= 2cm { if (pairwiseIBDRegionList.Count == 0 || !this.CombineAdjacentPairwiseRegion((PairwiseIBDRegion)pairwiseIBDRegionList[pairwiseIBDRegionList.Count - 1], newPairwiseIBDRegion, thatIndividual)) { if (newMapData.GetGeneticDistance(newPairwiseIBDRegion.GetEnd()) - newMapData.GetGeneticDistance(newPairwiseIBDRegion.GetStart()) >= lengthCut) { pairwiseIBDRegionList.Add(newPairwiseIBDRegion.GetCopy()); } } } newPairwiseIBDRegion.SetLastBlockID(-1); newPairwiseIBDRegion.SetEnd(i - 1); break; } } } } } }
public void findGroupIBD(ArrayList newIndividualList, BlockDictionary newBlockDictionary, double[,] linkageData, List <GroupShare> result, List <GroupShare> resultList, int windowStart, int windowEnd) { ArrayList[] IBDTable = new ArrayList[newBlockDictionary.blockList.Count]; for (int regionIndex = 0; regionIndex < pairwiseRegionList.Count; regionIndex++)//(PairwiseIBDRegion newRegion in this.pairwiseRegionList) { PairwiseIBDRegion newRegion = (PairwiseIBDRegion)(pairwiseRegionList[regionIndex]); for (int blockID = newRegion.GetFirstBlockID(); blockID <= newRegion.GetLastBlockID(); blockID++) { foreach (int haplotypeID in newRegion.GetHaplotype(blockID - newRegion.GetFirstBlockID())) { if (IBDTable[blockID] == null) { int[] newPair = new int[2]; newPair[0] = newRegion.GetFirstIndividual(); newPair[1] = newRegion.GetSecondIndividual(); HaplotypeSharing newHaplotypeShare = new HaplotypeSharing(haplotypeID, newPair); IBDTable[blockID] = new ArrayList(); IBDTable[blockID].Add(newHaplotypeShare); } else { bool added = false; for (int newHaplotypeShare = 0; newHaplotypeShare < IBDTable[blockID].Count; newHaplotypeShare++) { if (((HaplotypeSharing)IBDTable[blockID][newHaplotypeShare]).haplotpeID == haplotypeID) { int[] newPair = new int[2]; newPair[0] = newRegion.GetFirstIndividual(); newPair[1] = newRegion.GetSecondIndividual(); ((HaplotypeSharing)IBDTable[blockID][newHaplotypeShare]).individualList.Add(newPair); added = true; } } if (!added) { int[] newPair = new int[2]; newPair[0] = newRegion.GetFirstIndividual(); newPair[1] = newRegion.GetSecondIndividual(); HaplotypeSharing newHaplotypeShare = new HaplotypeSharing(haplotypeID, newPair); IBDTable[blockID].Add(newHaplotypeShare); } } } } pairwiseRegionList[regionIndex] = null; } pairwiseRegionList = null; GC.Collect(); MyResultComparer newResultComparer = new MyResultComparer(); if (IBDTable[IBDTable.GetUpperBound(0)] == null) { IBDTable[IBDTable.GetUpperBound(0)] = new ArrayList(); } for (int blockIndex = windowStart; blockIndex < windowEnd; blockIndex++) { ArrayList newlyComingSharingList = new ArrayList(); if (IBDTable[blockIndex] == null) { foreach (GroupShare newGroupShare in resultList) { result.Add(newGroupShare); } resultList = new List <GroupShare>(); IBDTable[blockIndex] = new ArrayList(); continue; } foreach (HaplotypeSharing newHaplotypeSharing in IBDTable[blockIndex]) { if (newHaplotypeSharing.individualList.Count > 2 && newHaplotypeSharing.individualList.Count < 0.2 * newIndividualList.Count) //at least 2 pairs sharing this haplotype. { GroupShare newGroupShare = new GroupShare(); newGroupShare.sharePairList = (ArrayList)newHaplotypeSharing.individualList.Clone(); newGroupShare.allSharedFirstBlockID = blockIndex; newGroupShare.allSharedLastBlockID = blockIndex; newGroupShare.haplotypeList = new ArrayList(); newGroupShare.haplotypeList.Add(newHaplotypeSharing.haplotpeID); newGroupShare.SetIndividualList(); newlyComingSharingList.Add(newGroupShare); } } ArrayList newlyAddedList = new ArrayList(); //for the next block, if no haplotype contains more than 1 pair, stop the extension and record all the region waiting to be extended. if (newlyComingSharingList.Count == 0) { foreach (GroupShare newGroup in resultList) { result.Add(newGroup); } } for (int i = 0; i < resultList.Count; i++) { GroupShare newGroupshare1 = (GroupShare)(resultList[i]); if (newGroupshare1.GroupShareExtention(IBDTable, newIndividualList, newBlockDictionary, newlyComingSharingList, result, newlyAddedList) == false) { resultList.RemoveAt(i); i--; } } foreach (GroupShare newGroupShare in newlyAddedList) { bool found = false; foreach (GroupShare checkingGroup in resultList) { if (checkingGroup.ChekcSubGroup(newGroupShare) == true) { found = true; break; } } if (found == false) { resultList.Add(newGroupShare); } } foreach (GroupShare newGroupShare in newlyComingSharingList) { bool found = false; foreach (GroupShare checkingGroup in resultList) { if (checkingGroup.ChekcSubGroup(newGroupShare) == true) { found = true; break; } } if (found == false) { resultList.Add(newGroupShare); } } resultList.Sort(newResultComparer); } foreach (GroupShare newShare in result) { newShare.SetIndividualList(); } result.Sort(newResultComparer); for (int i = 0; i < result.Count - 1; i++) { if (((GroupShare)(result[i])).CheckContain((GroupShare)(result[i + 1])) == true) { result.RemoveAt(i + 1); i--; } else if (((GroupShare)(result[i + 1])).CheckContain(((GroupShare)(result[i]))) == true) { result.RemoveAt(i); i--; } } for (int i = 0; i < result.Count - 1; i++) { if (((GroupShare)(result[i])).CheckOverlap((GroupShare)(result[i + 1]))) { result.RemoveAt(i + 1); i--; } } foreach (GroupShare newShare in result) { newShare.SetIndividualList(); } for (int i = 0; i < result.Count; i++) { ((GroupShare)(result[i])).SetPartiallySharedRegion(IBDTable, newBlockDictionary, newIndividualList, windowStart, windowEnd); Console.SetCursorPosition(16, Console.CursorTop); Console.Write("{0} / {1}", i + 1, result.Count); } for (int i = 0; i < result.Count; i++) { ((GroupShare)(result[i])).SetAllSharedSnpHaplotype(newIndividualList, newBlockDictionary); } for (int i = 0; i < result.Count; i++) { ((GroupShare)(result[i])).SetPartiallySharedSnpHaplotype(newIndividualList, newBlockDictionary); } for (int i = 0; i < result.Count; i++) { ((GroupShare)(result[i])).SetAValue(newBlockDictionary, newIndividualList, linkageData); } Console.WriteLine("\t\t\t\t\tFinished."); result.Sort(newResultComparer); for (int i = 0; i < result.Count - 1; i++) { if (((GroupShare)(result[i])).CheckContain((GroupShare)(result[i + 1]))) { result.RemoveAt(i + 1); i--; } } for (int i = 0; i < result.Count - 1; i++) { if (((GroupShare)(result[i])).CheckOverlap((GroupShare)(result[i + 1]))) { result.RemoveAt(i + 1); i--; } } }