Esempio n. 1
0
        public kMerTable(string cbtFN, int minLoadDepth, int deepThreshold)
        {
            Console.WriteLine("Loading kMers from " + cbtFN + " (min=" + minLoadDepth + ")");
            DateTime loadStart = DateTime.Now;

            FileInfo tileFI        = new FileInfo(cbtFN);
            long     merArraySize  = 0;
            long     cbtFileLength = tileFI.Length;

            merArraySize = cbtFileLength / bytesPerCBTMer;        // file size in kmers

            if (deepThreshold > 0)
            {
                hdubMerList = new List <ulong>(100);
            }
            else
            {
                hdubMerList = null;
            }

            BinaryReader cbtFile = null;

            cbtFile = new BinaryReader(new FileStream(cbtFN, FileMode.Open, FileAccess.Read, FileShare.Read, 1000000, FileOptions.SequentialScan));

            merSize = cbtFile.ReadInt32();
            // allocate the kMer table
            kMersTable = new MerTable <ulong>(merArraySize, merSize);

            LoadCBTTiles(cbtFile, minLoadDepth, deepThreshold);

            if (merSize <= 0 || merSize > 32)
            {
                return;
            }

            cbtFile.Close();

            averageDepthLoaded = (int)(totalMersLoaded / distinctMersLoaded);

            if (hdubMerList != null)
            {
                hdubFilter = GenerateHDUBFilter(hdubMerList);
            }
            hdubMerList = null;

            Console.WriteLine("Finished loading kMer table. " +
                              distinctMersLoaded + "/" + totalMersLoaded + " " + merSize + "-mers loaded, average depth " + averageDepthLoaded +
                              " in " + (DateTime.Now - loadStart).TotalSeconds.ToString("#.0") + "s");
        }
Esempio n. 2
0
        private void LoadCBTTiles(BinaryReader cbtFile, int minLoadDepth, int minDeepDepth)
        {
            ulong kMer        = 0;                                  // packed form of next read kMer
            ulong countPair   = 0;                                  // a packed count pair for this kMer
            int   asReadCount = 0;                                  // as-read count for the next read mer
            int   rcCount     = 0;                                  // RC count for the next read mer
            int   sumCount    = 0;                                  // sum of as-read and rc counts

            List <ulong> hdubMers          = new List <ulong>();    // accumulating set of high-depth, unbalanced kMers
            List <int>   hdubCounts        = new List <int>();      // and their summed counts
            int          deepestDepthSoFar = 0;                     // the depest depth in the above lists
            int          deepThreshold     = minDeepDepth;          // and the threshold for admission to the 'hdub' list

            bool EOF = false;

            while (!EOF)
            {
                try
                {
                    kMer        = cbtFile.ReadUInt64();
                    asReadCount = cbtFile.ReadInt32();
                    rcCount     = cbtFile.ReadInt32();
                    countPair   = ((ulong)asReadCount << 32) + (ulong)rcCount;
                    sumCount    = asReadCount + rcCount;
                }
                catch
                {
                    EOF = true;
                }

                if (EOF)
                {
                    break;
                }

                // add all mers tiled from the reads into our hash table
                if (sumCount >= minLoadDepth)
                {
                    kMersTable.Add(kMer, countPair);
                    distinctMersLoaded++;
                    totalMersLoaded += sumCount;
                }

                // if we're accumulating hdubs...
                if (hdubMerList != null)
                {
                    int  highCount = asReadCount;
                    int  lowCount  = rcCount;
                    bool rcAsRead  = false;

                    if (lowCount > highCount)
                    {
                        int temp = highCount;
                        highCount = lowCount;
                        lowCount  = temp;
                        rcAsRead  = true;
                    }

                    // does this mer look like a candidate for the HDUB list?
                    if (sumCount > deepThreshold && lowCount <= sumCount / 100)
                    {
                        // if the new 'deep' kMer is deeper than anything in the list already, adjust the threshold
                        if (sumCount > deepestDepthSoFar)
                        {
                            deepestDepthSoFar = sumCount;
                            deepThreshold     = sumCount / 4;
                        }

                        // add the as-read form of the kMer to the lists
                        hdubMers.Add(rcAsRead ? kMers.ReverseComplement(kMer, merSize) : kMer);
                        hdubCounts.Add(sumCount);

                        // trim down the list if it's getting a bit big (just want the top few HDUBs)
                        if (hdubMers.Count > 100)
                        {
                            int idx = 0;
                            while (hdubCounts[idx] < deepThreshold)
                            {
                                hdubMers.RemoveAt(idx);
                                hdubCounts.RemoveAt(idx);
                                idx++;
                                if (idx == hdubCounts.Count)
                                {
                                    break;
                                }
                            }
                        }
                    }
                }
            } // all cbt tuples in the file

            cbtFile.Close();

            // flush final buffer
            bool mersLoaded = kMersTable.LoadFinished();

            GC.Collect();

            if (!mersLoaded)
            {
                Console.WriteLine(".cbt load failed - check file is sorted");
                distinctMersLoaded = 0;
                totalMersLoaded    = 0;
                kMersTable         = null;
            }

            if (hdubMerList != null)
            {
                // return the HDUBs in sorted order (highest count first)
                ulong[] sortedMers   = hdubMers.ToArray();
                int[]   sortedCounts = hdubCounts.ToArray();
                Array.Sort <int, ulong>(sortedCounts, sortedMers);
                Array.Reverse(sortedMers);
                hdubMers.Clear();
                foreach (ulong mer in sortedMers)
                {
                    hdubMerList.Add(mer);
                }
            }
        }
Esempio n. 3
0
        public PairTable(string pairsFN, int minLoadDepth)
        {
            Console.WriteLine("Loading kMer pairs from " + pairsFN + " (min=" + minLoadDepth + ")");
            DateTime loadStart = DateTime.Now;

            FileInfo pairsFI = new FileInfo(pairsFN);

            long pairsFileLength  = pairsFI.Length - 4;
            long pairsArrayLength = pairsFileLength / bytesPerPair + 1;

            // allocate the pairs table (32-mers - 2x16)
            pairsTable = new MerTable <int>(pairsArrayLength, 32);

            BinaryReader pairsFile = new BinaryReader(new FileStream(pairsFN, FileMode.Open, FileAccess.Read, FileShare.Read, 1000000, FileOptions.SequentialScan));

            pairGap = pairsFile.ReadInt32();

            pairFullLength = 2 * kMerPairs.pairFragmentSize + pairGap;

            bool EOF = false;

            while (!EOF)
            {
                try
                {
                    ulong pair      = pairsFile.ReadUInt64();
                    int   pairDepth = pairsFile.ReadInt32();

                    if (pairDepth > minLoadDepth)
                    {
                        pairsTable.Add(pair, pairDepth);
                        distinctPairsLoaded++;
                        totalPairsLoaded += pairDepth;
                    }
                }
                catch (EndOfStreamException)
                {
                    EOF = true;
                }
                if (EOF)
                {
                    break;
                }
            }

            pairsFile.Close();
            bool prsLoaded = pairsTable.LoadFinished();

            averageDepthLoaded = (int)(totalPairsLoaded / distinctPairsLoaded);

            if (prsLoaded)
            {
                Console.WriteLine("Finished loading kMer pairs table. " +
                                  distinctPairsLoaded + "/" + totalPairsLoaded + " " + "pairs loaded, average depth " + averageDepthLoaded +
                                  " in " + (DateTime.Now - loadStart).TotalSeconds.ToString("#.0") + "s");
            }
            else
            {
                Console.WriteLine(".prs load failed - check file is sorted");
                totalPairsLoaded = 0;
                pairsTable       = null;
            }

            GC.Collect();
        }