private static int FindPoorMer(kMerTable kMersTable, PairTable pairsTable, Sequence read, ulong[] mers, int[] depths) { int merSize = kMersTable.merSize; int pairGap = pairsTable.pairGap; int firstMerIdx = 0; int mersInRead = read.Length - merSize + 1; int firstPoorMer = -1; int minDepthForRead = HarmonicMean(depths, 0, mersInRead) / 3; if (minDepthForRead == 0) { minDepthForRead = kMersTable.averageDepthLoaded / 10; } while (firstMerIdx < mersInRead) { // check every kMer in the trimmed/extended read int plusCount = 0; int rcCount = 0; kMersTable.GetDepthCounts(mers[firstMerIdx], out plusCount, out rcCount); if (plusCount + rcCount < minDepthForRead) { firstPoorMer = firstMerIdx; break; } // finally check that we have a pair (if we can) if (pairsTable != null) { int lastMerIdx = firstMerIdx + pairGap + 2 * kMerPairs.pairFragmentSize - merSize; if (lastMerIdx < mersInRead) { ulong firstMerInPair = mers[firstMerIdx]; ulong lastMerInPair = mers[lastMerIdx]; ulong pair = (firstMerInPair & kMerPairs.firstFragmentMask) | ((lastMerInPair >> (64 - merSize * 2)) & kMerPairs.lastFragmentMask); ulong rcPair = kMers.ReverseComplement(pair, kMerPairs.pairSize); if (rcPair < pair) { pair = rcPair; } int pairDepth = 0; pairDepth = pairsTable.GetPairDepth(pair); if (pairDepth < minDepthForRead) { firstPoorMer = firstMerIdx; break; } } } firstMerIdx++; } // scan for a good starting kMer return(firstPoorMer); }
public static int ExtendRead(Sequence read, Sequence quals, int mersInRead, int pairsInRead, ulong[] mers, int[] depths, int minDepth, int[] pairDepths, int minPairDepth, kMerTable kMersTable, PairTable pairsTable, int wantedExtension) { int merSize = kMersTable.merSize; int pairGap = 0; if (pairsInRead > 0) { pairGap = pairsTable.pairGap; } int lastMerIdx = mersInRead - 1; int merFill = 64 - merSize * 2; // unused bits at RHS of ulong kMer int basesAdded = 0; char[] bases = new char[] { 'A', 'C', 'G', 'T' }; while (basesAdded < wantedExtension) { ulong lastMer = mers[lastMerIdx]; ulong rshiftedMer = (lastMer >> merFill) << 2; // right-shifted kMer with a hole at the last base int bestBase = -1; // assume no good base found int bestBasesFound = 0; // how many acceptable extensions were found - only alllowed answer is '1' ulong bestMer = 0; // the kMer that was accepted int bestDepth = 0; // and its depth int bestPairDepth = 0; // and its pair depth bool lastGoodMerUnbalanced = false; int plusCount = 0; int rcCount = 0; int pairDepth = 0; kMersTable.GetDepthCounts(lastMer, out plusCount, out rcCount); lastGoodMerUnbalanced = (plusCount + rcCount > minDepth) && (plusCount <= 1 || rcCount <= 1); // try each of the possible bases for (ulong b = 0; b < 4; b++) { ulong possibleMer = (rshiftedMer | b) << merFill; kMersTable.GetDepthCounts(possibleMer, out plusCount, out rcCount); // skip alternative if not deep enough if (plusCount + rcCount < minDepth) { continue; } // and be very wary of kMers with 0/1 on one strand if (!lastGoodMerUnbalanced && (plusCount <= 1 || rcCount <= 1)) { continue; } // and also skip if it doesn't have a pair match if (pairsInRead > 0) { int firstMerIdx = lastMerIdx + 1 + merSize - pairGap - 2 * kMerPairs.pairFragmentSize; if (firstMerIdx >= 0) { ulong pair; bool pairValid = kMerPairs.ConstructPair(read, firstMerIdx, pairGap, out pair); pairDepth = pairsTable.GetPairDepth(pair); if (pairDepth < minPairDepth) { continue; } } } // have an alternative that is both deep enough and has a pair bestBase = (int)b; bestMer = possibleMer; bestDepth = plusCount + rcCount; bestPairDepth = pairDepth; bestBasesFound++; } // and if we found just one acceptable extension, accept it and move on if (bestBasesFound == 1) { read.Append(bases[bestBase]); if (quals.Length > 0) { quals.Append(quals.Bases[quals.Length - 1]); // copy the qual from the previous base } lastMerIdx++; mers[lastMerIdx] = bestMer; depths[lastMerIdx] = bestDepth; pairDepths[lastMerIdx] = bestPairDepth; basesAdded++; } else { break; } } return(basesAdded); }