public void SearchRare(ArrayList rareBlockList) { for (int s = 0; s < snpCount - 2; s++) { ArrayList group = new ArrayList(); int end = s; foreach (int i in this.individual) { if (this.snp[i, s] * this.snp[i, s + 1] * this.snp[i, s + 2] > 0) { group.Add(i); } } if (group.Count >= 3) { // Console.Write("{0} {1}\r\n", s, group.Count); end += 2; Boolean flag = true; while (flag) { if (end == this.snpCount - 1) { break; } foreach (int j in group) { if (this.snp[j, end + 1] == 0) { //end++; flag = false; break; } } end++; } RareBlock newRareBlock = new RareBlock(); newRareBlock.SetStart(s); newRareBlock.SetEnd(end - 1); newRareBlock.InitialSequence(newRareBlock.end - newRareBlock.start + 1); for (int i = 0; i < newRareBlock.sequence.Length; i++) { newRareBlock.SetSequence(i, 1); } newRareBlock.group = (int[])group.ToArray(typeof(int)); RareBlock lastRareBlock = new RareBlock(); rareBlockList.Add(newRareBlock); /* * if (newRareBlock.group.Length < this.individual.Length * 100 / 100) * { * if (rareBlockList.Count < 1) * rareBlockList.Add(newRareBlock); * else * { * lastRareBlock = (RareBlock)rareBlockList[rareBlockList.Count - 1]; * //lastRareBlock2 = (RareBlock)rareBlockList[rareBlockList.Count - 2]; * if (lastRareBlock.end == newRareBlock.end && lastRareBlock.group.Length == newRareBlock.group.Length) * { * int i = 0; * int j = 0; * for (i = 0; i < newRareBlock.group.Length; i++) * { * if (lastRareBlock.group[j] == newRareBlock.group[i]) * j++; * else break; * } * if (i != newRareBlock.group.Length) * rareBlockList.Add(newRareBlock); * * } * else * rareBlockList.Add(newRareBlock); * } * } */ } } }
//private void MarcovModel(double p,int start, int end, int[] haplotype, int[] linkageData) public bool Pvalue(double[,] linkageData, ArrayList pairwiseIBDList, int index) { this.InitialP(); this.InitialHaplotype(this.GetSequence().Length); this.SetHaplotype(this.GetSequence()); this.SetP(0, 0); for (int i = 0; i < this.GetP().Length; i++) { if (i == 0) //first snp of the region, set the shared haplotype to be the major allele. { if (this.GetSequence(i) > 1) { this.SetHaplotype(i, 2); } else { this.SetHaplotype(i, 0); } this.SetP(0, 1); continue; } if (this.haplotype[i] != 1) //if the haplotype is determined (0 for share common, 2 for share rare, 1 for uncertain) { int currentHap = this.GetHaplotype(i); int lastHap = this.GetHaplotype(i - 1); if (lastHap == 1) { this.SetP(i, 1); continue; } if (currentHap == 0) //for cases to check, major->major, major->minor, minor->major,minor->minor; { if (lastHap == 0) { this.SetP(i, linkageData[this.GetStart() + i - 1, 2]); } else { this.SetP(i, linkageData[this.GetStart() + i - 1, 4]); } } else { if (lastHap == 0) { this.SetP(i, linkageData[this.GetStart() + i - 1, 3]); } else { this.SetP(i, linkageData[this.GetStart() + i - 1, 5]); } } } else //if the haplotype at position i is not determined (the value is 1), but in this case, // the previous snp haplotype can determine this haplotype (say, last one is major and major->minor is 0%, then determine that this position is major) { this.SetP(i, 1); } } //*** now we should check the break point of the haplotype. (at which position the conversion P is almost zero.(which is simulated to be 1-power(0.95, sampleSize))) int lastBreak = 0; //last break point position. bool flag = false; //false means that no break point. true means there is at least one break point. //int lastBreak = this.GetSequence().Length; //return true; for (int i = 0; i < this.GetP().Length; i++) { if (this.GetP(i) <= 0.001)//1.0 / (linkageData.GetUpperBound(0) + 1)) { if (i - lastBreak < 100) { lastBreak = i; } else { RareBlock newBlock = new RareBlock(); newBlock.InitialSequence(i - lastBreak); for (int j = 0; j < newBlock.GetSequence().Length; j++) { newBlock.SetSequence(j, this.GetSequence(j + lastBreak)); } newBlock.SetStart(lastBreak); newBlock.SetEnd(i - 1); newBlock.SetGroup(this.GetGroup()); pairwiseIBDList.Insert(index + 1, newBlock); lastBreak = i; } flag = true; } } if (flag) { pairwiseIBDList.RemoveAt(index); return(false); } return(true); }
public bool PvalueWithMM(double[,] linkageData, ArrayList pairwiseIBDList, int index) { this.InitialP(); this.InitialHaplotype(this.GetSequence().Length); this.SetHaplotype(this.GetSequence()); this.SetP(0, 0); for (int i = 0; i < this.GetP().Length; i++) { if (i == 0) //first snp of the region, set the shared haplotype to be the major allele. { if (this.GetSequence(i) > 1) { this.SetHaplotype(i, 2); } else { this.SetHaplotype(i, 0); } this.SetP(0, 1); continue; } if (this.haplotype[i] != 1) //if the haplotype is determined (0 for share common, 2 for share rare, 1 for uncertain) { int currentHap = this.GetHaplotype(i); int lastHap = this.GetHaplotype(i - 1); if (lastHap == 1) { this.SetP(i, 1); continue; } if (currentHap == 0) //for cases to check, major->major, major->minor, minor->major,minor->minor; { if (lastHap == 0) { this.SetP(i, linkageData[this.GetStart() + i - 1, 2]); } else { this.SetP(i, linkageData[this.GetStart() + i - 1, 4]); } } else { if (lastHap == 0) { this.SetP(i, linkageData[this.GetStart() + i - 1, 3]); } else { this.SetP(i, linkageData[this.GetStart() + i - 1, 5]); } } } else //if the haplotype at position i is not determined (the value is 1), but in this case, // the previous snp haplotype can determine this haplotype (say, last one is major and major->minor is 0%, then determine that this position is major) { if (this.haplotype[i - 1] == 0) { if (linkageData[this.GetStart() + i - 1, 2] == (double)1) { this.SetSequence(i, 0); this.SetHaplotype(i, 0); this.SetP(i, 1); continue; } else if (linkageData[this.GetStart() + i - 1, 3] == (double)1) { this.SetSequence(i, 2); this.SetHaplotype(i, 2); this.SetP(i, 1); continue; } } else { if (linkageData[this.start + i - 1, 4] == (double)1) { this.SetSequence(i, 0); this.SetHaplotype(i, 0); this.SetP(i, 1); continue; } else if (linkageData[this.start + i - 1, 5] == (double)1) { this.SetSequence(i, 2); this.SetHaplotype(i, 2); this.SetP(i, 1); continue; } } if (i == this.GetP().Length - 1) { if (this.GetHaplotype(i - 1) == 0) { if (linkageData[this.GetStart() + i - 1, 2] >= 0.5) { this.SetHaplotype(i, 0); this.SetP(i, linkageData[this.GetStart() + i - 1, 2]); } else { this.SetHaplotype(i, 2); this.SetP(i, linkageData[this.GetStart() + i - 1, 3]); } } else { if (linkageData[this.GetStart() + i - 1, 4] >= 0.5) { this.SetHaplotype(i, 0); this.SetP(i, linkageData[this.GetStart() + i - 1, 4]); } else { this.SetHaplotype(i, 2); this.SetP(i, linkageData[this.GetStart() + i - 1, 5]); } } continue; } //if still here, means that we should decide the haplotype by p value. int j = i; //j is the start positon of uncertain haplotype region while (i < this.p.Length - 1) { if (this.haplotype[i + 1] == 1) { //this.p[i + 1] = 1; i++; } else { i++; break; } //determine the length of the uncertain haplotype region. } int length = i - j; //the length, now i is the end of the region+1 /* * if (length >= 3) // if the region is >=3, it should be the rareblock which we start the searching. * { * for (int k = 0; k < length; k++) * this.haplotype[j+k] = 2; * i =j -1; //go back to the start position of the uncertain region. * continue; * } */ //this will cause many impossible combination, omitted now. 7th,Nov,2008 //if not, we should calculate which haplotype is most likely to be shared by these individuals. double[,] markovP = new double[length + 1, 4]; //marcov chain data, for each row stores: //[,0]:major of this snp will choose major(0) or minor(2), [,1]the p value of this chosen //[,2]:major of this snp will choose major(0) or minor(2), [,3]the p value of this chosen markovP[length, 0] = haplotype[i]; markovP[length, 2] = haplotype[i]; if (i < this.GetP().Length - 1) { if (markovP[length, 0] == 0) { markovP[length, 1] = linkageData[j + length - 1, 2]; markovP[length, 3] = linkageData[j + length - 1, 4]; } else { markovP[length, 1] = linkageData[j + length - 1, 3]; markovP[length, 3] = linkageData[j + length - 1, 5]; } }//fill the last row first, coz which one should follow is determined at the end of the uncertain region. else { markovP[length, 1] = 0.5; markovP[length, 3] = 0.5; } for (int m = length - 1; m >= 0; m--) //then fill the row from bottom to top. { double AB = linkageData[j + m - 1 + this.start, 2] * markovP[m + 1, 1]; double Ab = linkageData[j + m - 1 + this.start, 3] * markovP[m + 1, 3]; if (AB >= Ab) { markovP[m, 0] = 0; markovP[m, 1] = AB; } else { markovP[m, 0] = 2; markovP[m, 1] = Ab; } double aB = linkageData[j + m - 1 + this.start, 4] * markovP[m + 1, 1]; double ab = linkageData[j + m - 1 + this.start, 5] * markovP[m + 1, 3]; if (aB >= ab) { markovP[m, 2] = 0; markovP[m, 3] = aB; } else { markovP[m, 2] = 2; markovP[m, 3] = ab; } }// finished filling the marcov table. /* if (this.sequence[j - 1] == 0) * { * this.p[i] = (int)markovP[0, 1]; * } * else * { * this.p[i] = (int)markovP[0, 3]; * } */ int n = 0; while (n < length) //read from the marcov table and assign all the haplotype with shared region. { if (this.haplotype[j - 1 + n] == 0) { this.SetHaplotype(j + n, (int)markovP[n, 0]); } else { this.SetHaplotype(j + n, (int)markovP[n, 2]); } n++; } i = j - 1; //go back the start position of the uncertain region and caculate the p value (now the uncertain region is certain). } } //*** now we should check the break point of the haplotype. (at which position the conversion P is almost zero.(which is simulated to be 1-power(0.95, sampleSize))) int lastBreak = 0; //last break point position. bool flag = false; //false means that no break point. true means there is at least one break point. //int lastBreak = this.GetSequence().Length; //return true; for (int i = 0; i < this.GetP().Length; i++) { if (this.GetP(i) <= 0.001)//1.0 / (linkageData.GetUpperBound(0) + 1)) { if (i - lastBreak < 100) { lastBreak = i; } else { RareBlock newBlock = new RareBlock(); newBlock.InitialSequence(i - lastBreak); for (int j = 0; j < newBlock.GetSequence().Length; j++) { newBlock.SetSequence(j, this.GetSequence(j + lastBreak)); } newBlock.SetStart(lastBreak); newBlock.SetEnd(i - 1); newBlock.SetGroup(this.GetGroup()); pairwiseIBDList.Insert(index + 1, newBlock); lastBreak = i; } flag = true; } } if (flag) { pairwiseIBDList.RemoveAt(index); return(false); } return(true); }
public void Combine(int i, ArrayList rareBlockList, ArrayList combinedBlockList, SnpDataSet dataSet) { if (i == rareBlockList.Count - 1) { return; //reach the end of the rareBlockList. } ArrayList newGroup = new ArrayList(); ArrayList remainGroup = new ArrayList(); bool flag = true; for (int j = i + 1; j < i + 50 && j < rareBlockList.Count; j++) // find combined block from this block to 100 blocks away. { RareBlock block = new RareBlock(); //Console.WriteLine("{0} {1}", i, j); block = (RareBlock)((RareBlock)rareBlockList[j]).MemberwiseClone(); if (block.GetStart() < this.GetEnd()) { continue; } int m = 0; int n = 0; newGroup.Clear(); remainGroup.Clear(); for (m = 0; m < this.GetGroup().Length; m++) //find the intersection of the two group of individuals. { if (n == block.GetGroup().Length) { remainGroup.Add(this.GetGroup(m)); break; } if (this.group[m] == block.GetGroup(n)) { newGroup.Add(this.group[m]); n++; } else if (this.group[m] > block.GetGroup(n)) { n++; m--; } else { remainGroup.Add(this.GetGroup(m)); } } if (newGroup.Count > 2) //more than 1 guys appare in the two rare blocks { //Console.Write("{0} {1} {2}\t", i, j, newGroup.Count); //foreach (int ind in newGroup) // Console.Write("{0} ",ind); //Console.WriteLine(); //Console.ReadKey(); RareBlock newRareBlock = new RareBlock(); newRareBlock.SetStart(this.GetStart()); newRareBlock.SetEnd(block.GetEnd()); newRareBlock.SetGroup((int[])newGroup.ToArray(typeof(int))); if (newRareBlock.CheckCommon(dataSet)) //check the snp data between the rare blocks //if (true) { if (newRareBlock.GetGroup().Length == 3) { newRareBlock.Extend(dataSet); if (newRareBlock.GetEnd() - newRareBlock.GetStart() > 170) { if (combinedBlockList.Count > 0) { RareBlock lastRareBlock = (RareBlock)((RareBlock)combinedBlockList[combinedBlockList.Count - 1]).MemberwiseClone(); if (!lastRareBlock.CheckSame(this)) { if (this.CheckContain(lastRareBlock)) { combinedBlockList[combinedBlockList.Count - 1] = this.GetCopy(); } else { combinedBlockList.Add(this.GetCopy()); } } } else { combinedBlockList.Add(this.GetCopy()); } } } else { newRareBlock.Combine(j, rareBlockList, combinedBlockList, dataSet);//if pass, continue combining. } //break; //When we find another block to combine to this block ,stop searching for this block. //Time saving a lot, can miss some, discuss later on. if (this.group.Length - newRareBlock.group.Length == 0) { flag = false; break; } //else if (newRareBlock.group.Length / (this.group.Length - newRareBlock.group.Length) >= 3) break; } /* * if (this.group.Length - newRareBlock.group.Length >= 3) * { * RareBlock remainGroupBlock = new RareBlock(); * remainGroupBlock.start = this.start; * remainGroupBlock.end = this.end; * remainGroupBlock.sequence = this.sequence; * remainGroupBlock.group = (int[])remainGroup.ToArray(typeof(int)); * remainGroupBlock.Combine(j, rareBlockList, combinedBlockList, dataSet); * } */ } } if (flag && (this.GetEnd() - this.GetEnd()) > 70) { if (combinedBlockList.Count > 0) { RareBlock lastRareBlock = (RareBlock)combinedBlockList[combinedBlockList.Count - 1]; if (!lastRareBlock.CheckSame(this)) { if (this.CheckContain(lastRareBlock)) { combinedBlockList[combinedBlockList.Count - 1] = this.GetCopy(); } else { combinedBlockList.Add(this.GetCopy()); } } } else { combinedBlockList.Add(this.GetCopy()); } } }