Пример #1
0
 //----< set Delegate >------------------------------
 public void setVisitorDelegate(WriteBufferDelegate value)
 {
     interp_visitor.setDelegate(value); print_visitor.setDelegate(value);
 }
        static long uniqueMersDropped = 0;                                              // no. of unique mers dropped (too few reps)

        static void Main(string[] args)
        {
            if (args.Length < 3)
            {
                Console.WriteLine("usage: MergeCBTFiles minReps <cbt file patterns or names> mergedFN");
                return;
            }

            int           minReps    = Convert.ToInt32(args[0]);
            List <string> fnPatterns = new List <string>();

            for (int i = 1; i < args.Length - 1; i++)
            {
                fnPatterns.Add(args[i]);
            }
            string mergedFN = args[args.Length - 1];

            List <string> cbtFNs = new List <string>();

            foreach (string fnPattern in fnPatterns)
            {
                string[] fns = Directory.GetFiles(Directory.GetCurrentDirectory(), fnPattern);
                if (fns.Length == 0)
                {
                    Console.WriteLine(fnPattern + " did not match any files");
                    return;
                }
                foreach (string fn in fns)
                {
                    cbtFNs.Add(fn);
                }
            }

            int noCBTFiles = cbtFNs.Count;

            BinaryReader[] cbtFiles      = new BinaryReader[noCBTFiles];
            int[]          cbtMerLengths = new int[noCBTFiles];

            // open all the .cbt files
            for (int i = 0; i < noCBTFiles; i++)
            {
                cbtFiles[i]      = new BinaryReader(File.Open(cbtFNs[i], FileMode.Open, FileAccess.Read));
                cbtMerLengths[i] = cbtFiles[i].ReadInt32();
            }

            // check that they all use the same k-mer length
            int merSize = cbtMerLengths[0];

            for (int i = 1; i < noCBTFiles; i++)
            {
                if (cbtMerLengths[i] != merSize)
                {
                    Console.WriteLine("inconsistent k-mer sizes in .cbt files - expected " + merSize + " but found a " + cbtMerLengths[i]);
                    return;
                }
            }

            // open the merged .cbt file
            string normalEnding = "_" + merSize + ".cbt";

            if (mergedFN.EndsWith(normalEnding))
            {
                mergedFN.Substring(0, mergedFN.Length - normalEnding.Length);
            }
            string histoFN = mergedFN + "_" + merSize + "_histo.txt";

            mergedFN = mergedFN + normalEnding;
            BinaryWriter cbtFile = new BinaryWriter(File.Open(mergedFN, FileMode.Create, FileAccess.Write));
            StreamWriter histo   = new StreamWriter(File.Open(histoFN, FileMode.Create, FileAccess.Write));

            // write out the k-mer length
            cbtFile.Write(merSize);

            // now just merge and write until all mers have been written
            bool      mersLeft     = true;
            Stopwatch mergingTimer = new Stopwatch();

            mergingTimer.Start();

            CBTSource[] merSources = new CBTSource[noCBTFiles];
            for (int i = 0; i < noCBTFiles; i++)
            {
                merSources[i] = new CBTSource(cbtFiles[i]);
            }

            WriteBufferDelegate wbd = new WriteBufferDelegate(WriteBuffer);

            ulong[][] bufferMers = new ulong[2][];
            bufferMers[0] = new ulong[noCBTFiles * bufferSize];
            bufferMers[1] = new ulong[noCBTFiles * bufferSize];
            ulong[][] bufferCountPairs = new ulong[2][];
            bufferCountPairs[0] = new ulong[noCBTFiles * bufferSize];
            bufferCountPairs[1] = new ulong[noCBTFiles * bufferSize];
            int[]          bufferCount        = new int[2];
            IAsyncResult[] iarWriteBuffer     = new IAsyncResult[2];
            int            currentBuffer      = 0;
            int            previousBuffer     = 1;
            ulong          highestMerInBuffer = 0;

            // fill merged buffers from all of the .cbt files and write them out
            while (mersLeft)
            {
                if (iarWriteBuffer[currentBuffer] != null)
                {
                    wbd.EndInvoke(iarWriteBuffer[currentBuffer]);
                }

                mersLeft = FillBuffer(merSources, bufferSize, ref bufferMers[currentBuffer], ref bufferCountPairs[currentBuffer], out bufferCount[currentBuffer], highestMerInBuffer, out highestMerInBuffer);

                if (!mersLeft)
                {
                    break;
                }

                if (iarWriteBuffer[previousBuffer] != null && !iarWriteBuffer[previousBuffer].IsCompleted)
                {
                    iarWriteBuffer[previousBuffer].AsyncWaitHandle.WaitOne();
                }
                iarWriteBuffer[currentBuffer] = wbd.BeginInvoke(cbtFile, minReps, bufferMers[currentBuffer], bufferCountPairs[currentBuffer], bufferCount[currentBuffer], null, null);

                previousBuffer = currentBuffer;
                if (currentBuffer == 0)
                {
                    currentBuffer = 1;
                }
                else
                {
                    currentBuffer = 0;
                }
            }

            // and flush the remaining buffered k-mers
            for (int i = 0; i < 2; i++)
            {
                if (iarWriteBuffer[i] != null && !iarWriteBuffer[i].IsCompleted)
                {
                    wbd.EndInvoke(iarWriteBuffer[i]);
                }
            }

            mergingTimer.Stop();

            Console.WriteLine("Merged " + uniqueMersWritten + "/" + mersWritten + " " + merSize + "-mers from " + noCBTFiles + " files. " +
                              uniqueMersDropped + "/" + mersDropped + " " + merSize + "-mers dropped (depth < " + minReps + ") in " + mergingTimer.Elapsed.TotalSeconds.ToString("#.0") + "s");

            Console.WriteLine("Generating histogram and stats...");
            int[]  sums     = new int[sumReps.Count];
            long[] repsReps = new long[sumReps.Count];

            sumReps.Keys.CopyTo(sums, 0);
            sumReps.Values.CopyTo(repsReps, 0);

            Array.Sort(sums, repsReps);

            Process myProcess            = Process.GetCurrentProcess();
            string  myProcessNameAndArgs = myProcess.ProcessName;

            foreach (string a in args)
            {
                myProcessNameAndArgs = myProcessNameAndArgs + " " + a;
            }
            histo.WriteLine(">" + myProcessNameAndArgs);

            histo.WriteLine(">sums");
            long totalMers = mersWritten + mersDropped;

            histo.WriteLine(">copies\tcounts\t" + totalMers);
            for (int i = 0; i < sums.Length; i++)
            {
                histo.Write(sums[i]);
                histo.Write('\t');
                histo.Write(repsReps[i]);
                histo.Write('\t');
                long mersInBucket = sums[i] * repsReps[i];
                histo.Write(mersInBucket);
                histo.Write('\t');
                histo.Write((((float)mersInBucket / (float)totalMers) * 100.0).ToString("F2"));
                histo.WriteLine();
            }

            histo.WriteLine();
            histo.WriteLine(uniqueMersWritten + "\tdistinct mers written to cbt file");
            histo.WriteLine(mersWritten + "\ttotal mers written to cbt file");
            histo.WriteLine(mersDropped + "\tmers dropped (too few reps)");
            histo.WriteLine(mergingTimer.Elapsed.TotalSeconds.ToString("#.0") + "\ts merging");

            histo.Close();
        }
Пример #3
0
 public void setDelegate(WriteBufferDelegate value)
 {
     dele = value;
 }
        private static void MergeAndWrite(BinaryWriter pairsFile, MerCollections.MerDictionary[] repeatedMers, MerCollections.MerDictionary[] overflowMers)
        {
            mergingPhase = true;

            int noOfOverflows = 0;

            for (int p = 0; p < overflowMers.Length; p++)
            {
                if (overflowMers[p] != null)
                {
                    noOfOverflows++;
                }
            }

            //                 shared mers           overflow
            int noMerSources = repeatedMers.Length + noOfOverflows;

            MerSource[] merSources   = new MerSource[noMerSources];
            int         sourceCounts = 0;

            int nextSource = 0;

            // shared repeated mers partitions
            for (int i = 0; i < repeatedMers.Length; i++)
            {
                merSources[nextSource] = new MerDictionarySource(repeatedMers[i]);
                nextSource++;
                sourceCounts += repeatedMers[i].Count;
                //Console.WriteLine("repeatedMers[" + i + "]=" + repeatedMers[i].Count);
            }
            // all the overflow mer tables
            for (int i = 0; i < overflowMers.Length; i++)
            {
                if (overflowMers[i] != null)
                {
                    merSources[nextSource] = new MerDictionarySource(overflowMers[i]);
                    nextSource++;
                    sourceCounts += overflowMers[i].Count;
                    //Console.WriteLine("overflowMers[" + i + "]=" + overflowMers[i].Count);
                }
            }

            //Console.WriteLine("Total mers=" + sourceCounts);
            //Console.WriteLine("Dictionary=" + pairDictionary.Count);

            WriteBufferDelegate wbd = new WriteBufferDelegate(WriteBuffer);

            // now just merge and write until all mers have been written
            bool mersLeft = true;

            ulong[][] bufferMers = new ulong[2][];
            bufferMers[0] = new ulong[noMerSources * bufferSize];
            bufferMers[1] = new ulong[noMerSources * bufferSize];
            ulong[][] bufferValues = new ulong[2][];
            bufferValues[0] = new ulong[noMerSources * bufferSize];
            bufferValues[1] = new ulong[noMerSources * bufferSize];
            int[]          bufferCount        = new int[2];
            IAsyncResult[] iarWriteBuffer     = new IAsyncResult[2];
            int            currentBuffer      = 0;
            int            previousBuffer     = 1;
            ulong          highestMerInBuffer = 0;

            merSources[0].Open();               // just being polite

            while (mersLeft)
            {
                if (iarWriteBuffer[currentBuffer] != null)
                {
                    wbd.EndInvoke(iarWriteBuffer[currentBuffer]);
                }

                mersLeft = FillBuffer(merSources, ref bufferMers[currentBuffer], ref bufferValues[currentBuffer], out bufferCount[currentBuffer], highestMerInBuffer, out highestMerInBuffer);

                if (!mersLeft)
                {
                    break;
                }

                if (iarWriteBuffer[previousBuffer] != null && !iarWriteBuffer[previousBuffer].IsCompleted)
                {
                    iarWriteBuffer[previousBuffer].AsyncWaitHandle.WaitOne();
                }
                iarWriteBuffer[currentBuffer] = wbd.BeginInvoke(pairsFile, bufferMers[currentBuffer], bufferValues[currentBuffer], bufferCount[currentBuffer], null, null);

                previousBuffer = currentBuffer;
                if (currentBuffer == 0)
                {
                    currentBuffer = 1;
                }
                else
                {
                    currentBuffer = 0;
                }
            }

            for (int i = 0; i < 2; i++)
            {
                if (iarWriteBuffer[i] != null && !iarWriteBuffer[i].IsCompleted)
                {
                    wbd.EndInvoke(iarWriteBuffer[i]);
                }
            }

            //for (int s = 0; s < merSources.Length; s++)
            //{
            //    Console.WriteLine("skipped[" + s + "]=" + merSources[s].repeatsSkipped);
            //}
        }