//----< set Delegate >------------------------------ public void setVisitorDelegate(WriteBufferDelegate value) { interp_visitor.setDelegate(value); print_visitor.setDelegate(value); }
static long uniqueMersDropped = 0; // no. of unique mers dropped (too few reps) static void Main(string[] args) { if (args.Length < 3) { Console.WriteLine("usage: MergeCBTFiles minReps <cbt file patterns or names> mergedFN"); return; } int minReps = Convert.ToInt32(args[0]); List <string> fnPatterns = new List <string>(); for (int i = 1; i < args.Length - 1; i++) { fnPatterns.Add(args[i]); } string mergedFN = args[args.Length - 1]; List <string> cbtFNs = new List <string>(); foreach (string fnPattern in fnPatterns) { string[] fns = Directory.GetFiles(Directory.GetCurrentDirectory(), fnPattern); if (fns.Length == 0) { Console.WriteLine(fnPattern + " did not match any files"); return; } foreach (string fn in fns) { cbtFNs.Add(fn); } } int noCBTFiles = cbtFNs.Count; BinaryReader[] cbtFiles = new BinaryReader[noCBTFiles]; int[] cbtMerLengths = new int[noCBTFiles]; // open all the .cbt files for (int i = 0; i < noCBTFiles; i++) { cbtFiles[i] = new BinaryReader(File.Open(cbtFNs[i], FileMode.Open, FileAccess.Read)); cbtMerLengths[i] = cbtFiles[i].ReadInt32(); } // check that they all use the same k-mer length int merSize = cbtMerLengths[0]; for (int i = 1; i < noCBTFiles; i++) { if (cbtMerLengths[i] != merSize) { Console.WriteLine("inconsistent k-mer sizes in .cbt files - expected " + merSize + " but found a " + cbtMerLengths[i]); return; } } // open the merged .cbt file string normalEnding = "_" + merSize + ".cbt"; if (mergedFN.EndsWith(normalEnding)) { mergedFN.Substring(0, mergedFN.Length - normalEnding.Length); } string histoFN = mergedFN + "_" + merSize + "_histo.txt"; mergedFN = mergedFN + normalEnding; BinaryWriter cbtFile = new BinaryWriter(File.Open(mergedFN, FileMode.Create, FileAccess.Write)); StreamWriter histo = new StreamWriter(File.Open(histoFN, FileMode.Create, FileAccess.Write)); // write out the k-mer length cbtFile.Write(merSize); // now just merge and write until all mers have been written bool mersLeft = true; Stopwatch mergingTimer = new Stopwatch(); mergingTimer.Start(); CBTSource[] merSources = new CBTSource[noCBTFiles]; for (int i = 0; i < noCBTFiles; i++) { merSources[i] = new CBTSource(cbtFiles[i]); } WriteBufferDelegate wbd = new WriteBufferDelegate(WriteBuffer); ulong[][] bufferMers = new ulong[2][]; bufferMers[0] = new ulong[noCBTFiles * bufferSize]; bufferMers[1] = new ulong[noCBTFiles * bufferSize]; ulong[][] bufferCountPairs = new ulong[2][]; bufferCountPairs[0] = new ulong[noCBTFiles * bufferSize]; bufferCountPairs[1] = new ulong[noCBTFiles * bufferSize]; int[] bufferCount = new int[2]; IAsyncResult[] iarWriteBuffer = new IAsyncResult[2]; int currentBuffer = 0; int previousBuffer = 1; ulong highestMerInBuffer = 0; // fill merged buffers from all of the .cbt files and write them out while (mersLeft) { if (iarWriteBuffer[currentBuffer] != null) { wbd.EndInvoke(iarWriteBuffer[currentBuffer]); } mersLeft = FillBuffer(merSources, bufferSize, ref bufferMers[currentBuffer], ref bufferCountPairs[currentBuffer], out bufferCount[currentBuffer], highestMerInBuffer, out highestMerInBuffer); if (!mersLeft) { break; } if (iarWriteBuffer[previousBuffer] != null && !iarWriteBuffer[previousBuffer].IsCompleted) { iarWriteBuffer[previousBuffer].AsyncWaitHandle.WaitOne(); } iarWriteBuffer[currentBuffer] = wbd.BeginInvoke(cbtFile, minReps, bufferMers[currentBuffer], bufferCountPairs[currentBuffer], bufferCount[currentBuffer], null, null); previousBuffer = currentBuffer; if (currentBuffer == 0) { currentBuffer = 1; } else { currentBuffer = 0; } } // and flush the remaining buffered k-mers for (int i = 0; i < 2; i++) { if (iarWriteBuffer[i] != null && !iarWriteBuffer[i].IsCompleted) { wbd.EndInvoke(iarWriteBuffer[i]); } } mergingTimer.Stop(); Console.WriteLine("Merged " + uniqueMersWritten + "/" + mersWritten + " " + merSize + "-mers from " + noCBTFiles + " files. " + uniqueMersDropped + "/" + mersDropped + " " + merSize + "-mers dropped (depth < " + minReps + ") in " + mergingTimer.Elapsed.TotalSeconds.ToString("#.0") + "s"); Console.WriteLine("Generating histogram and stats..."); int[] sums = new int[sumReps.Count]; long[] repsReps = new long[sumReps.Count]; sumReps.Keys.CopyTo(sums, 0); sumReps.Values.CopyTo(repsReps, 0); Array.Sort(sums, repsReps); Process myProcess = Process.GetCurrentProcess(); string myProcessNameAndArgs = myProcess.ProcessName; foreach (string a in args) { myProcessNameAndArgs = myProcessNameAndArgs + " " + a; } histo.WriteLine(">" + myProcessNameAndArgs); histo.WriteLine(">sums"); long totalMers = mersWritten + mersDropped; histo.WriteLine(">copies\tcounts\t" + totalMers); for (int i = 0; i < sums.Length; i++) { histo.Write(sums[i]); histo.Write('\t'); histo.Write(repsReps[i]); histo.Write('\t'); long mersInBucket = sums[i] * repsReps[i]; histo.Write(mersInBucket); histo.Write('\t'); histo.Write((((float)mersInBucket / (float)totalMers) * 100.0).ToString("F2")); histo.WriteLine(); } histo.WriteLine(); histo.WriteLine(uniqueMersWritten + "\tdistinct mers written to cbt file"); histo.WriteLine(mersWritten + "\ttotal mers written to cbt file"); histo.WriteLine(mersDropped + "\tmers dropped (too few reps)"); histo.WriteLine(mergingTimer.Elapsed.TotalSeconds.ToString("#.0") + "\ts merging"); histo.Close(); }
public void setDelegate(WriteBufferDelegate value) { dele = value; }
private static void MergeAndWrite(BinaryWriter pairsFile, MerCollections.MerDictionary[] repeatedMers, MerCollections.MerDictionary[] overflowMers) { mergingPhase = true; int noOfOverflows = 0; for (int p = 0; p < overflowMers.Length; p++) { if (overflowMers[p] != null) { noOfOverflows++; } } // shared mers overflow int noMerSources = repeatedMers.Length + noOfOverflows; MerSource[] merSources = new MerSource[noMerSources]; int sourceCounts = 0; int nextSource = 0; // shared repeated mers partitions for (int i = 0; i < repeatedMers.Length; i++) { merSources[nextSource] = new MerDictionarySource(repeatedMers[i]); nextSource++; sourceCounts += repeatedMers[i].Count; //Console.WriteLine("repeatedMers[" + i + "]=" + repeatedMers[i].Count); } // all the overflow mer tables for (int i = 0; i < overflowMers.Length; i++) { if (overflowMers[i] != null) { merSources[nextSource] = new MerDictionarySource(overflowMers[i]); nextSource++; sourceCounts += overflowMers[i].Count; //Console.WriteLine("overflowMers[" + i + "]=" + overflowMers[i].Count); } } //Console.WriteLine("Total mers=" + sourceCounts); //Console.WriteLine("Dictionary=" + pairDictionary.Count); WriteBufferDelegate wbd = new WriteBufferDelegate(WriteBuffer); // now just merge and write until all mers have been written bool mersLeft = true; ulong[][] bufferMers = new ulong[2][]; bufferMers[0] = new ulong[noMerSources * bufferSize]; bufferMers[1] = new ulong[noMerSources * bufferSize]; ulong[][] bufferValues = new ulong[2][]; bufferValues[0] = new ulong[noMerSources * bufferSize]; bufferValues[1] = new ulong[noMerSources * bufferSize]; int[] bufferCount = new int[2]; IAsyncResult[] iarWriteBuffer = new IAsyncResult[2]; int currentBuffer = 0; int previousBuffer = 1; ulong highestMerInBuffer = 0; merSources[0].Open(); // just being polite while (mersLeft) { if (iarWriteBuffer[currentBuffer] != null) { wbd.EndInvoke(iarWriteBuffer[currentBuffer]); } mersLeft = FillBuffer(merSources, ref bufferMers[currentBuffer], ref bufferValues[currentBuffer], out bufferCount[currentBuffer], highestMerInBuffer, out highestMerInBuffer); if (!mersLeft) { break; } if (iarWriteBuffer[previousBuffer] != null && !iarWriteBuffer[previousBuffer].IsCompleted) { iarWriteBuffer[previousBuffer].AsyncWaitHandle.WaitOne(); } iarWriteBuffer[currentBuffer] = wbd.BeginInvoke(pairsFile, bufferMers[currentBuffer], bufferValues[currentBuffer], bufferCount[currentBuffer], null, null); previousBuffer = currentBuffer; if (currentBuffer == 0) { currentBuffer = 1; } else { currentBuffer = 0; } } for (int i = 0; i < 2; i++) { if (iarWriteBuffer[i] != null && !iarWriteBuffer[i].IsCompleted) { wbd.EndInvoke(iarWriteBuffer[i]); } } //for (int s = 0; s < merSources.Length; s++) //{ // Console.WriteLine("skipped[" + s + "]=" + merSources[s].repeatsSkipped); //} }