private void LoadCBTTiles(BinaryReader cbtFile, int minLoadDepth, int minDeepDepth) { ulong kMer = 0; // packed form of next read kMer ulong countPair = 0; // a packed count pair for this kMer int asReadCount = 0; // as-read count for the next read mer int rcCount = 0; // RC count for the next read mer int sumCount = 0; // sum of as-read and rc counts List <ulong> hdubMers = new List <ulong>(); // accumulating set of high-depth, unbalanced kMers List <int> hdubCounts = new List <int>(); // and their summed counts int deepestDepthSoFar = 0; // the depest depth in the above lists int deepThreshold = minDeepDepth; // and the threshold for admission to the 'hdub' list bool EOF = false; while (!EOF) { try { kMer = cbtFile.ReadUInt64(); asReadCount = cbtFile.ReadInt32(); rcCount = cbtFile.ReadInt32(); countPair = ((ulong)asReadCount << 32) + (ulong)rcCount; sumCount = asReadCount + rcCount; } catch { EOF = true; } if (EOF) { break; } // add all mers tiled from the reads into our hash table if (sumCount >= minLoadDepth) { kMersTable.Add(kMer, countPair); distinctMersLoaded++; totalMersLoaded += sumCount; } // if we're accumulating hdubs... if (hdubMerList != null) { int highCount = asReadCount; int lowCount = rcCount; bool rcAsRead = false; if (lowCount > highCount) { int temp = highCount; highCount = lowCount; lowCount = temp; rcAsRead = true; } // does this mer look like a candidate for the HDUB list? if (sumCount > deepThreshold && lowCount <= sumCount / 100) { // if the new 'deep' kMer is deeper than anything in the list already, adjust the threshold if (sumCount > deepestDepthSoFar) { deepestDepthSoFar = sumCount; deepThreshold = sumCount / 4; } // add the as-read form of the kMer to the lists hdubMers.Add(rcAsRead ? kMers.ReverseComplement(kMer, merSize) : kMer); hdubCounts.Add(sumCount); // trim down the list if it's getting a bit big (just want the top few HDUBs) if (hdubMers.Count > 100) { int idx = 0; while (hdubCounts[idx] < deepThreshold) { hdubMers.RemoveAt(idx); hdubCounts.RemoveAt(idx); idx++; if (idx == hdubCounts.Count) { break; } } } } } } // all cbt tuples in the file cbtFile.Close(); // flush final buffer bool mersLoaded = kMersTable.LoadFinished(); GC.Collect(); if (!mersLoaded) { Console.WriteLine(".cbt load failed - check file is sorted"); distinctMersLoaded = 0; totalMersLoaded = 0; kMersTable = null; } if (hdubMerList != null) { // return the HDUBs in sorted order (highest count first) ulong[] sortedMers = hdubMers.ToArray(); int[] sortedCounts = hdubCounts.ToArray(); Array.Sort <int, ulong>(sortedCounts, sortedMers); Array.Reverse(sortedMers); hdubMers.Clear(); foreach (ulong mer in sortedMers) { hdubMerList.Add(mer); } } }
public PairTable(string pairsFN, int minLoadDepth) { Console.WriteLine("Loading kMer pairs from " + pairsFN + " (min=" + minLoadDepth + ")"); DateTime loadStart = DateTime.Now; FileInfo pairsFI = new FileInfo(pairsFN); long pairsFileLength = pairsFI.Length - 4; long pairsArrayLength = pairsFileLength / bytesPerPair + 1; // allocate the pairs table (32-mers - 2x16) pairsTable = new MerTable <int>(pairsArrayLength, 32); BinaryReader pairsFile = new BinaryReader(new FileStream(pairsFN, FileMode.Open, FileAccess.Read, FileShare.Read, 1000000, FileOptions.SequentialScan)); pairGap = pairsFile.ReadInt32(); pairFullLength = 2 * kMerPairs.pairFragmentSize + pairGap; bool EOF = false; while (!EOF) { try { ulong pair = pairsFile.ReadUInt64(); int pairDepth = pairsFile.ReadInt32(); if (pairDepth > minLoadDepth) { pairsTable.Add(pair, pairDepth); distinctPairsLoaded++; totalPairsLoaded += pairDepth; } } catch (EndOfStreamException) { EOF = true; } if (EOF) { break; } } pairsFile.Close(); bool prsLoaded = pairsTable.LoadFinished(); averageDepthLoaded = (int)(totalPairsLoaded / distinctPairsLoaded); if (prsLoaded) { Console.WriteLine("Finished loading kMer pairs table. " + distinctPairsLoaded + "/" + totalPairsLoaded + " " + "pairs loaded, average depth " + averageDepthLoaded + " in " + (DateTime.Now - loadStart).TotalSeconds.ToString("#.0") + "s"); } else { Console.WriteLine(".prs load failed - check file is sorted"); totalPairsLoaded = 0; pairsTable = null; } GC.Collect(); }