private static bool FindMerInUniqueMers(ulong mer, out int plusCount, out int rcCount)
        {
            bool  foundMer = true;
            ulong rcMer    = MerStrings.ReverseComplement(mer);
            ulong countPair;
            bool  rcMerWasCanonical = false;

            rcMerWasCanonical = rcMer < mer;
            if (rcMerWasCanonical)
            {
                mer = rcMer;
            }

            if (!uniqueMers.TryGetValue(mer, out countPair))
            {
                //string missingMer = MerStrings.ExpandMer(packedMer);
                countPair = 0;                                  // not in the table
                foundMer  = false;
            }

            // extract the plus, RC and qual values from the packed ulong value
            if (rcMerWasCanonical)
            {
                rcCount   = (int)(countPair >> 32);
                plusCount = (int)(countPair & 0xFFFFFFFF);
            }
            else
            {
                plusCount = (int)(countPair >> 32);
                rcCount   = (int)(countPair & 0xFFFFFFFF);
            }

            return(foundMer);
        }
        // Generate a set of mers from a read and calculate their read depths.
        private static int GetMerDepths(string read, int[] merDepths)
        {
            int readLength = read.Length;
            int mersInRead = readLength - merSize + 1;

            bool  merIsValid = false;
            ulong lastMer    = 0;

            // read too short to tile for mers
            if (readLength < merSize)
            {
                mersInRead = 0;
                return(mersInRead);
            }

            for (int i = 0; i < mersInRead; i++)
            {
                if (merIsValid)
                {
                    merIsValid = MerStrings.CondenseMerIncremental(merSize, lastMer, read, i, out lastMer);
                }
                else
                {
                    merIsValid = MerStrings.CondenseMer(read.Substring(i, merSize), out lastMer);
                }
                if (merIsValid)
                {
                    int plusCount = 0;
                    int rcCount   = 0;
                    if (FindMerInUniqueMers(lastMer, out plusCount, out rcCount))
                    {
                        int sumCount = plusCount + rcCount;
                        // don't count huge unbalanced counts
                        if ((plusCount > 100 * rcCount || rcCount > 100 * plusCount))
                        {
                            merDepths[i] = 0;
                        }
                        else
                        {
                            merDepths[i] = sumCount;
                        }
                    }
                    else
                    {
                        merDepths[i] = 0;
                    }
                }
                else
                {
                    merDepths[i] = 0;
                }
            }

            return(mersInRead);
        }
 private static void SaveFilteredReadAndQual(StreamWriter filteredReads, string header, string filteredRead, string quals)
 {
     MerStrings.WriteRead(filteredReads, header, filteredRead, readsFormat);
     if (readsFormat == MerStrings.formatFASTQ)
     {
         if (fullQualHeaders)
         {
             filteredReads.WriteLine("+" + header.Substring(1));
         }
         else
         {
             filteredReads.WriteLine("+");
         }
         filteredReads.WriteLine(quals);
     }
 }
        static Dictionary <string, int> highRepSeqs; // long seqs + counts for high depth reads

        static void Main(string[] args)
        {
            if (args.Length == 0)
            {
                Console.WriteLine("FilterReadsByDepth -min minDepth -max maxDepth [-reduce maxCopies] [-histoOnly] [-stats statsFN] [-f format] [-t #threads] cbtFN fileNames");
                return;
            }
            List <string> FNParams  = new List <string>(); // the .cbt name and the set of file names or patterns
            int           noThreads = 2;

            // find out who we are so we can track what program & args produced the result files
            Process myProcess = Process.GetCurrentProcess();

            myProcessNameAndArgs = myProcess.ProcessName;
            foreach (string a in args)
            {
                myProcessNameAndArgs = myProcessNameAndArgs + " " + a;
            }

            for (int p = 0; p < args.Length; p++)
            {
                if (args[p][0] == '-')
                {
                    args[p] = args[p].ToLower();

                    if (args[p] == "-s" || args[p] == "-stats")
                    {
                        if (!CheckForParamValue(p, args.Length, "stats file name string expected after -s|-stats"))
                        {
                            return;
                        }
                        statsFN = args[p + 1];
                        p++;
                        continue;
                    }

                    if (args[p] == "-min")
                    {
                        if (!CheckForParamValue(p, args.Length, "minDepth number expected after -min"))
                        {
                            return;
                        }
                        try
                        {
                            minDepth = Convert.ToInt32(args[p + 1]);
                        }
                        catch
                        {
                            Console.WriteLine("expected a number for the -min parameter: " + args[p + 1]);
                            return;
                        }
                        p++;
                        continue;
                    }

                    if (args[p] == "-max")
                    {
                        if (!CheckForParamValue(p, args.Length, "maxDepth number expected after -max"))
                        {
                            return;
                        }
                        try
                        {
                            maxDepth = Convert.ToInt32(args[p + 1]);
                        }
                        catch
                        {
                            Console.WriteLine("expected a number for the -max parameter: " + args[p + 1]);
                            return;
                        }
                        p++;
                        continue;
                    }

                    if (args[p] == "-r" || args[p] == "-reduce")
                    {
                        if (!CheckForParamValue(p, args.Length, "reduced depth number expected after -reduce"))
                        {
                            return;
                        }
                        try
                        {
                            reducedDepth  = Convert.ToInt32(args[p + 1]);
                            reducingReads = true;
                        }
                        catch
                        {
                            Console.WriteLine("expected a number for the -reduce parameter: " + args[p + 1]);
                            return;
                        }
                        p++;
                        continue;
                    }

                    if (args[p] == "-histoonly" || args[p] == "-ho")
                    {
                        histoOnly = true;
                        continue;
                    }

                    if (args[p] == "-t" || args[p] == "-threads")
                    {
                        if (!CheckForParamValue(p, args.Length, "number expected after -t|-threads"))
                        {
                            return;
                        }
                        try
                        {
                            noThreads = Convert.ToInt32(args[p + 1]);
                        }
                        catch
                        {
                            Console.WriteLine("expected a number for the -t|-threads parameter: " + args[p + 1]);
                            return;
                        }
                        p++;
                        continue;
                    }

                    if (args[p] == "-f" || args[p] == "-format")
                    {
                        if (!CheckForParamValue(p, args.Length, "reads format expected after -f|-format"))
                        {
                            return;
                        }
                        string readsFormatParam = args[p + 1].ToLower();
                        if (readsFormatParam == "fna")
                        {
                            readsFormat = MerStrings.formatFNA;
                        }
                        else if (readsFormatParam == "fasta")
                        {
                            readsFormat = MerStrings.formatFNA;
                        }
                        else if (readsFormatParam == "fa")
                        {
                            readsFormat = MerStrings.formatFNA;
                        }
                        else if (readsFormatParam == "fastq")
                        {
                            readsFormat = MerStrings.formatFASTQ;
                        }
                        else if (readsFormatParam == "fq")
                        {
                            readsFormat = MerStrings.formatFASTQ;
                        }
                        else
                        {
                            Console.WriteLine("reads format must be fasta or fastq: " + args[p + 1]);
                            return;
                        }
                        p++;
                        continue;
                    }

                    if (args[p] == "-o" || args[p] == "-output")
                    {
                        if (!CheckForParamValue(p, args.Length, "directory name expected after -o|-output"))
                        {
                            return;
                        }
                        outputDir = args[p + 1];
                        p++;
                        continue;
                    }
                }

                FNParams.Add(args[p]);
            }

            if (FNParams.Count < 2)
            {
                Console.WriteLine("expected a cbt file name and at least one reads file name or pattern");
                return;
            }

            // validate the output directory & set the output prefix string
            string fnSeparator = Path.DirectorySeparatorChar.ToString();  // \ for Windows; / for Unix/Linux

            if (outputDir != null)
            {
                try
                {
                    // add a trailing \ if the output directory name doesn't already have one
                    if (!outputDir.EndsWith(fnSeparator))
                    {
                        outputDir += fnSeparator;
                    }
                    string       testOutputFN = outputDir + "43EDD23F-5F68-47f0-B7B9-66AE9EE3BF0B.txt";
                    StreamWriter testTemp     = new StreamWriter(testOutputFN);
                    testTemp.Close();
                    File.Delete(testOutputFN);
                }
                catch
                {
                    Console.WriteLine("Output directory: " + args[6] + " was invalid");
                    return;
                }
            }

            // take the cbt file name from the start of the non-option list
            string cbtFN = FNParams[0];

            FNParams.RemoveAt(0);

            if (FNParams.Count == 0)
            {
                Console.WriteLine("did not find any reads file names or patterns");
                return;
            }

            if (!File.Exists(cbtFN))
            {
                Console.WriteLine("k-mer consensus (.cbt) file not found: " + cbtFN);
                return;
            }

            List <string> readsFileNames = new List <string>(FNParams.Count);
            List <string> readsFilePaths = new List <string>(FNParams.Count);

            foreach (string readsFNP in FNParams)
            {
                string readsFileName;
                string readsFilePath;
                GetPathFN(readsFNP, out readsFilePath, out readsFileName);
                readsFilePaths.Add(readsFilePath);
                readsFileNames.Add(readsFileName);
            }

            List <string> expandedReadsFNs = new List <string>();

            for (int f = 0; f < FNParams.Count; f++)
            {
                string[] matchedReadsFNs = Directory.GetFiles(readsFilePaths[f], readsFileNames[f], SearchOption.TopDirectoryOnly);
                foreach (string matchedReadsFN in matchedReadsFNs)
                {
                    expandedReadsFNs.Add(matchedReadsFN);
                }
            }

            // make sure there aren't any duplicates in the file list (seems to be a bug on the Cherax SGI HPC system and it returns each file name twice)
            List <string> distinctReadsFNs = new List <string>();

            foreach (string fn in expandedReadsFNs)
            {
                if (!distinctReadsFNs.Contains(fn))
                {
                    distinctReadsFNs.Add(fn);
                }
            }

            // finally... the set of fully qualified, distinct reads files
            string[] readsFNs;
            readsFNs = distinctReadsFNs.ToArray();
            Array.Sort(readsFNs);

            int noOfReadsFiles = distinctReadsFNs.Count;

            if (noOfReadsFiles == 0)
            {
                Console.WriteLine("No matching reads files found");
                return;
            }

            StreamReader formatTester = new StreamReader(readsFNs[0]);
            string       firstLine    = formatTester.ReadLine();

            if (firstLine[0] == '>')
            {
                readsFormat = MerStrings.formatFNA;
            }
            if (firstLine[0] == '@')
            {
                readsFormat = MerStrings.formatFASTQ;
            }
            formatTester.Close();
            formatTester = null;

            if (statsFN == null)
            {
                // construct a stats
                statsFN = readsFileNames[0].Substring(0, readsFileNames[0].LastIndexOf('.'));
                statsFN = statsFN.Replace('?', '_');
                statsFN = statsFN.Replace('*', '_');
                statsFN = statsFN.Replace('/', '_');
                statsFN = statsFN.Replace('\\', '_');
                statsFN = statsFN.Replace("__", "_");
                statsFN = statsFN.Replace("__", "_");
                statsFN = statsFN + "_fstats.txt";
                statsFN = statsFN.Replace("__", "_");
            }

            // calculate the min load depth from the min reps depth - don't need to load all of the singletons and other errors into memory
            //int minLoadDepth = minDepth / 2;
            //if (minLoadDepth <= 1)
            //    minLoadDepth = 2;
            int minLoadDepth = minDepth;

            long loadedUniqueMers = 0;
            long loadedTotalMers  = 0;

            // load the .cbt file into a merTable (either a hash table (small) or a sorted array (large))
            long mersLoaded = MerStrings.LoadCBTFile(cbtFN, minLoadDepth, 0, 0, minDepth,
                                                     out uniqueMers, out merSize, out averageDepth, out loadedUniqueMers, out loadedTotalMers);

            if (merSize < 1 || merSize > 32)
            {
                Console.WriteLine("bad k-mer size found at start of .cbt file");
                return;
            }

            MerStrings.Initialise(merSize);

            highRepSeqs = new Dictionary <string, int>(10000000);
            highRepSeqs.Add(new string('A', 40), 0);
            highRepSeqs.Add(new string('C', 40), 0);
            highRepSeqs.Add(new string('G', 40), 0);
            highRepSeqs.Add(new string('T', 40), 0);

            // resolve the FASTQ qual ambiguity by reading through quals until one is encountered that can only come from either of the alternative sets
            if (readsFormat == MerStrings.formatFASTQ)
            {
                qualBase = MerStrings.ResolveFastqQualAmbiguity(readsFNs[0], out fullQualHeaders);
            }
            // and check whether we've got Unix data so we can write out the corrected files in the same format
            string lfConvention = MerStrings.LFConvention(readsFNs[0]);

            // start the monitor/synchronising thread
            Thread monitorProgress = new Thread(RateReporter);

            monitorProgress.Priority = ThreadPriority.AboveNormal;
            monitorProgress.Start();

            readsFiles    = new StreamReader[2];
            filteredReads = new StreamWriter[2];
            Dictionary <int, int> readDepths = new Dictionary <int, int>(1000);

            // filter a pair of files at a time (allowing us to filter many files in a single run while keeping pairedness)
            for (int f = 0; f < noOfReadsFiles; f += 2)
            {
                // for each file in the pair
                for (int p = 0; p < 2; p++)
                {
                    if (f + p < noOfReadsFiles)
                    {
                        string fullReadsFN = readsFNs[f + p];
                        string readsPath;
                        string readsFN;
                        GetPathFN(fullReadsFN, out readsPath, out readsFN);
                        string fileSuffix        = readsFN.Substring(readsFN.LastIndexOf('.'));
                        string fileWithoutSuffix = readsFN.Substring(0, readsFN.LastIndexOf('.'));

                        readsFiles[p] = new StreamReader(fullReadsFN, Encoding.ASCII, false, 1000000);
                        Console.WriteLine("filtering " + readsFN);

                        // check that the file appears to be in the expected format
                        char firstChar = (char)readsFiles[p].Peek();
                        if (readsFormat == MerStrings.formatFASTQ && firstChar != '@')
                        {
                            Console.WriteLine(readsFN + " does not appear to be in FASTQ format");
                            return;
                        }
                        if (readsFormat == MerStrings.formatFNA && firstChar != '>')
                        {
                            Console.WriteLine(readsFN + " does not appear to be in FASTA format");
                            return;
                        }

                        string outputPath = outputDir == null ? readsPath + fnSeparator : outputDir;
                        if (!histoOnly)
                        {
                            string maxDepthString = maxDepth.ToString();
                            if (maxDepth == int.MaxValue)
                            {
                                maxDepthString = "max";
                            }
                            filteredReads[p] = new StreamWriter(outputPath + fileWithoutSuffix + "_" + minDepth + "_" + maxDepthString + fileSuffix,
                                                                false, readsFiles[p].CurrentEncoding, 1000000);
                            filteredReads[p].NewLine = lfConvention;
                        }
                    }
                    else
                    {
                        readsFiles[p]    = null;
                        filteredReads[p] = null;
                    }
                }

                filterThreadParams[] filterParams     = new filterThreadParams[noThreads];
                Thread[]             filteringThreads = new Thread[noThreads];

                // ready a new thread for each parallel healer
                for (int b = 0; b < noThreads; b++)
                {
                    filterParams[b] = new filterThreadParams();
                    filterParams[b].threadNumber  = b + 1;
                    filterParams[b].readsFiles    = readsFiles;
                    filterParams[b].filteredReads = filteredReads;
                    filteringThreads[b]           = new Thread(new ParameterizedThreadStart(Program.FilteringThread));
                    filteringThreads[b].Priority  = ThreadPriority.BelowNormal;
                    filteringThreads[b].Name      = b.ToString();
                    filteringThreads[b].Start(filterParams[b]);
                }

                // and wait for all threads to finish
                for (int b = 0; b < noThreads; b++)
                {
                    filteringThreads[b].Join();
                    filteringThreads[b] = null;
                    //Console.WriteLine("finished healing thread " + b);
                }

                foreach (StreamWriter r in filteredReads)
                {
                    if (r != null)
                    {
                        r.Close();
                    }
                }

                // merge the per-thread histograms
                for (int b = 0; b < noThreads; b++)
                {
                    Dictionary <int, int> threadReadDepths = filterParams[b].depthHisto;
                    foreach (KeyValuePair <int, int> kvp in threadReadDepths)
                    {
                        if (readDepths.ContainsKey(kvp.Key))
                        {
                            readDepths[kvp.Key] += kvp.Value;
                        }
                        else
                        {
                            readDepths.Add(kvp.Key, kvp.Value);
                        }
                    }
                }
            } // for a pair of files

            StreamWriter histo = new StreamWriter(statsFN);

            histo.WriteLine(myProcessNameAndArgs);
            histo.WriteLine();
            histo.WriteLine("depth\tcount");
            int[] depths = readDepths.Keys.ToArray <int>();
            int[] counts = readDepths.Values.ToArray <int>();
            Array.Sort <int, int>(depths, counts);
            for (int i = 0; i < readDepths.Count; i++)
            {
                histo.WriteLine(depths[i] + "\t" + counts[i]);
            }

            Console.WriteLine("discarded " + reducedReads + "/" + discardedReads + " of " + totalReads + " reads");
            histo.WriteLine("discarded " + reducedReads + "/" + discardedReads + " of " + totalReads + " reads");
            histo.Close();

            stopMonitor = true;
            monitorProgress.Join();
        }
        private static void FilteringThread(object threadParams)
        {
            filterThreadParams theseParams = (filterThreadParams)threadParams;
            int filterNumber = theseParams.threadNumber;                // which healing thread is this one?

            StreamReader[] readsFiles    = theseParams.readsFiles;      // the (shared) read files to be processed
            StreamWriter[] filteredReads = theseParams.filteredReads;   // corresponding (shared) streams for filtered reads

            int noReadFNs = readsFiles.Length;

            bool[] fileActive = new bool[noReadFNs];                    // have not yet reached EOF on this reads file
            bool[,] readValid = new bool[batchSize, noReadFNs];         // did the last read from this file produce a read?
            int filesStillActive = 0;                                   // how many active reads files are still around

            string[,] readHeaderSet = new string[batchSize, noReadFNs]; // a batch of sets of read headers
            string[,] readSet       = new string[batchSize, noReadFNs]; // a batch of sets of reads, possibly one from each file
            string[,] qualHeaderSet = new string[batchSize, noReadFNs]; //
            string[,] qualsSet      = new string[batchSize, noReadFNs]; // text form of the quals
            int[] merDepths = new int[maxReadLength];

            int[]  depths          = new int[noReadFNs];                        // depths for each read in the set
            bool[] rightDepth      = new bool[noReadFNs];                       // are depths within the requested bounds?
            bool[] keepThisReadSet = new bool[batchSize];                       // at least one of the set is of the desired depth, so keep the lot

            Dictionary <int, int> readDepths = new Dictionary <int, int>(1000); // depth histogram for this thread

            for (int f = 0; f < noReadFNs; f++)
            {
                if (readsFiles[f] != null)
                {
                    fileActive[f] = true;                               // stays true until EOF
                    filesStillActive++;
                }
            }

            // get the next set of reads and check their depths
            while (filesStillActive > 0)
            {
                lock (readsFiles)
                {
                    // try getting the next batch of reads
                    for (int b = 0; b < batchSize; b++)
                    {
                        for (int f = 0; f < noReadFNs; f++)
                        {
                            if (fileActive[f])                              // only if we haven't already reached EOF on this file
                            {
                                readSet[b, f] = MerStrings.ReadRead(readsFiles[f], null, readsFormat, out readHeaderSet[b, f], out qualHeaderSet[b, f], out qualsSet[b, f]);
                                if (readSet[b, f] == null)                            // this read failed - now at EOF for the file
                                {
                                    fileActive[f]   = false;
                                    readValid[b, f] = false;
                                    filesStillActive--;
                                }
                                else
                                {
                                    readValid[b, f] = true;
                                    Interlocked.Increment(ref totalReads);
                                    progressReads++;
                                }
                            }
                            else
                            {
                                readValid[b, f] = false;
                            }
                        }
                    }
                } // lock to ensure synchronised reading from all reads files

                // now have a set of reads (the n'th read from each file, if they exist. So filter each one in turn.


                for (int b = 0; b < batchSize; b++)
                {
                    keepThisReadSet[b] = true;
                    for (int f = 0; f < noReadFNs; f++)
                    {
                        if (readValid[b, f])
                        {
                            depths[f] = CalculateReadDepth(readSet[b, f], merDepths);

                            //if (depths[f] > 100000)
                            //    Debugger.Break();

                            if (reducingReads && !histoOnly)
                            {
                                if (depths[f] >= minDepth)                                  // possibly in the allowable range
                                {
                                    if (depths[f] >= maxDepth)                              // above the max level, so a candidate for thinning
                                    {
                                        // extract and test all the long read keys
                                        int keyReps = 0;
                                        for (int i = 0; i < readSet[b, f].Length - 40; i++)
                                        {
                                            string readKey = readSet[b, f].Substring(i, 40);

                                            // ignore them if they contain an N
                                            if (readKey.Contains('N'))
                                            {
                                                continue;
                                            }

                                            // look the next seq in the table
                                            if (highRepSeqs.ContainsKey(readKey))
                                            {
                                                highRepSeqs[readKey]++;
                                                keyReps = highRepSeqs[readKey];
                                            }

                                            // and break if we found it
                                            if (keyReps > 0)
                                            {
                                                break;
                                            }
                                        }

                                        if (keyReps > reducedDepth)
                                        {
                                            rightDepth[f] = false;                      // we already have enough of these reads, so mark it to be discarded
                                            Interlocked.Increment(ref reducedReads);
                                        }

                                        if (keyReps == 0)                               // didn't find this read already, so remember it for the future
                                        {
                                            string readKey = readSet[b, f].Substring(0, 40);
                                            if (!readKey.Contains('N'))
                                            {
                                                lock (highRepSeqs)
                                                {
                                                    if (!highRepSeqs.ContainsKey(readKey))
                                                    {
                                                        highRepSeqs.Add(readKey, 1);
                                                    }
                                                }
                                            }
                                            rightDepth[f] = true;                       // and let the read through
                                        }
                                    }
                                    else
                                    {
                                        rightDepth[f] = true;                               // reducing but read between min and max so let it through
                                    }
                                }
                                else
                                {
                                    rightDepth[f] = false;                                  // reducing, but below the requested min depth
                                }
                            }
                            else
                            {
                                rightDepth[f] = depths[f] >= minDepth && depths[f] <= maxDepth; // not reducing, so must be between min and max
                            }
                        }
                        else
                        {
                            depths[f]     = 0;
                            rightDepth[f] = false;
                        }

                        // keep the read only if all members of the set should be kept (if paired)
                        keepThisReadSet[b] = keepThisReadSet[b] & rightDepth[f];

                        if (readDepths.ContainsKey(depths[f]))
                        {
                            readDepths[depths[f]]++;
                        }
                        else
                        {
                            readDepths.Add(depths[f], 1);
                        }
                    }
                } // end of checking a batch

                for (int b = 0; b < batchSize; b++)
                {
                    if (filesStillActive > 0 && !histoOnly)
                    {
                        lock (filteredReads)
                        {
                            for (int f = 0; f < noReadFNs; f++)
                            {
                                if (readValid[b, f])
                                {
                                    if (keepThisReadSet[b])
                                    {
                                        SaveFilteredReadAndQual(filteredReads[f], readHeaderSet[b, f], readSet[b, f], qualsSet[b, f]);
                                        progressWantedReads++;
                                    }
                                    else
                                    {
                                        Interlocked.Increment(ref discardedReads);
                                    }
                                }
                            }
                        } // writing out a set of healed reads
                    }
                }
            } // end of file reading/healing loop

            theseParams.depthHisto = readDepths;
        }
        public void AddOrIncrement(ulong mer, int threadNo)
        {
            long  addingIncrement = 0x0000000100000000;                 // assume we've got the as-read form is the canonical form
            ulong rcFlagToBeSet   = 0x0;                                // and that we don't want to set the RC flag

            // generate canonical k-mer first
            ulong rcMer = MerStrings.ReverseComplement(mer);

            if (rcMer < mer)
            {
                mer             = rcMer;
                addingIncrement = 0x0000000000000001;                   // increment the low part of the count pair
                rcFlagToBeSet   = singletonRCFlagMask;                  // remember if the canonical k-mer was the RC form
            }

            int absMerHashCode       = mer.GetHashCode() & int31Mask;
            int partitionNo          = absMerHashCode % noOfPartitions;
            int singletonPartitionNo = singletonPrefixBits == 0 ? 0 : (int)(mer >> (64 - singletonPrefixBits));

            // this mer may have been seen before, so first try updating it in one of the repeated mer tables
            bool updatedRepeat = UpdateRepeatedMer(partitionNo, mer, threadNo, mer, addingIncrement);

            if (updatedRepeat)
            {
                return;
            }

            // handling a k-mer for the first time - try adding it to the singletons table
            // ----------------------------------------------------------------------------

            // get a stable pointer to the current singetons table (in case someone else fills it and initiates a flush while we're still busy with it)
            MerCollection thisSingletonPartition = singletonFilters[singletonPartitionNo];

            Interlocked.Increment(ref thisSingletonPartition.activeCount);

            // try to add this mer to this partition's singletons collection (and fetch the existing singleton+flag if it's already there)
            int   filterIdx;
            ulong fMer  = mer | rcFlagToBeSet | singletonActiveFlagMask;
            bool  added = thisSingletonPartition.TryInsertKey(fMer, out filterIdx);

            if (added)
            {
                // successfully added this mer so we must be seeing it for the first time

                // if singleton table is already full enough, flush it out and empty the table
                if (thisSingletonPartition.Count >= maxSingletonCapacity[singletonPartitionNo])
                {
                    bool flushNeeded      = true;
                    int  flushNumberToUse = 0;

                    // lock this section to avoid two threads trying to flush/replace the same singleton buffer concurrently
                    lock (lockSingletons)
                    {
                        // test entry condition now that we have the lock (filter may have been reset while we were waiting)
                        if (!thisSingletonPartition.flushed)
                        {
                            // allocate a replacement table for the other threads to use while we're flushing this one
                            int newSingletonLength = thisSingletonPartition.length + thisSingletonPartition.length / 4;
                            if (newSingletonLength > maxSingletonSize)
                            {
                                newSingletonLength = maxSingletonSize;
                            }
                            MerCollection emptySingletonFilter = new MerCollection(newSingletonLength, singletonMerMask); // allocate new local filter for the partition

                            singletonFilters[singletonPartitionNo]     = emptySingletonFilter;                            // make it visible to the concurrent threads (single point assignment)
                            maxSingletonCapacity[singletonPartitionNo] = newSingletonLength * 8 / 10;
                            thisSingletonPartition.flushed             = true;
                            flushNumberToUse = flushSingletonNumber[singletonPartitionNo];
                            flushSingletonNumber[singletonPartitionNo]++;
                        }
                        else
                        {
                            flushNeeded = false;
                        }
                    }

                    if (flushNeeded)
                    {
                        while (thisSingletonPartition.activeCount > 1)
                        {
                            // pause briefly to let any inflight updates to this table to complete
                            Thread.Sleep(100);
                        }
                        FlushSingletons(thisSingletonPartition, singletonPartitionNo, flushNumberToUse);
                    }
                    //flushes++;
                }
            }
            else
            {
                // Insert failed, so must be seeing this k-mer for second (or rarely more) time. Mark as inactive in singletons and add to a repeats table with appropriate counts.
                // There can be a race here with two threads trying to concurrently promote the same singleton. This is resolved by atomically clearing the singleton
                // active flag - and only one of the threads will get the 'active' flag returned from the Exchange. This thread does the promotion - and then sets the
                // promotion-complete bit for the singleton. The other threads will spin until they find this bit has been set.

                if (tracing)
                {
                    lock (traceUpdates)
                    {
                        traceUpdates.Enqueue(new TraceEntry(threadNo, 1, singletonPartitionNo, filterIdx, (ulong)thisSingletonPartition.entries[filterIdx].key));
                        if (traceUpdates.Count > maxTrace)
                        {
                            traceUpdates.Dequeue();
                        }
                    }
                }

                // get the current value of this singleton entry (safe because the promotion changes are progressive)
                ulong merFromFilter = (ulong)thisSingletonPartition.entries[filterIdx].key;
                // and see if this singleton may have already been promoted
                bool activeSingleton = (merFromFilter & singletonActiveFlagMask) != 0;

                // if this singleton may be 'active', try to promote it
                if (activeSingleton)
                {
                    ulong inactiveMer = mer & singletonMerMask;                      // build what the inactive-but-being-promoted entry should look like
                    // if no-one else has altered the singleton entry, then set it to inactive-but-being-promoted
                    long currentMerFromFilter = Interlocked.CompareExchange(ref thisSingletonPartition.entries[filterIdx].key, (long)inactiveMer, (long)merFromFilter);

                    if (tracing)
                    {
                        lock (traceUpdates)
                        {
                            traceUpdates.Enqueue(new TraceEntry(threadNo, 2, singletonPartitionNo, filterIdx, (ulong)currentMerFromFilter));
                            if (traceUpdates.Count > maxTrace)
                            {
                                traceUpdates.Dequeue();
                            }
                        }
                    }

                    // if this thread successfully set the singleton to 'inactive', it will take care of the promotion
                    if (currentMerFromFilter == (long)merFromFilter)
                    {
                        ulong rcFlag = merFromFilter & singletonRCFlagMask;          // non-zero --> RC found in singletons

                        long initialCount = 0;
                        if (rcFlag != 0)                                        // singleton was seen in RC form
                        {
                            initialCount = 0x0000000000000001;
                        }
                        else                                                // singleton was seen in as-is form
                        {
                            initialCount = 0x0000000100000000;
                        }

                        if (repeatedMersFull[partitionNo])
                        {
                            if (overflowMers[threadNo] == null)
                            {
                                overflowMers[threadNo] = new MerDictionary(repeatedMers[partitionNo].lengthEntries / 10, fullMerMask);
                                //Console.WriteLine("added overflow for thread " + threadNo + " for [" + partitionNo + "]");
                            }

                            bool full = overflowMers[threadNo].Add(mer, initialCount);
                            if (full)
                            {
                                overflowMers[threadNo].Resize();
                            }
                        }
                        else
                        {
                            bool full = repeatedMers[partitionNo].Add(mer, initialCount);
                            if (full)
                            {
                                repeatedMersFull[partitionNo] = true;
                            }
                        }

                        // now that the mer has been promoted, set the 'promoted' flag
                        inactiveMer = inactiveMer | (long)singletonPromotedFlagMask;
                        thisSingletonPartition.entries[filterIdx].key = (long)inactiveMer;

                        if (tracing)
                        {
                            lock (traceUpdates)
                            {
                                traceUpdates.Enqueue(new TraceEntry(threadNo, 3, singletonPartitionNo, filterIdx, (ulong)thisSingletonPartition.entries[filterIdx].key));
                                if (traceUpdates.Count > maxTrace)
                                {
                                    traceUpdates.Dequeue();
                                }
                            }
                        }
                    }
                }

                // singleton is now known to be no longer active, so wait (if necessary) for the 'promoted' flag to be set and increment the repeat counter

                merFromFilter = (ulong)thisSingletonPartition.entries[filterIdx].key;

                if (tracing)
                {
                    lock (traceUpdates)
                    {
                        traceUpdates.Enqueue(new TraceEntry(threadNo, 4, singletonPartitionNo, filterIdx, merFromFilter));
                        if (traceUpdates.Count > maxTrace)
                        {
                            traceUpdates.Dequeue();
                        }
                    }
                }

                bool promotionComplete = (merFromFilter & singletonPromotedFlagMask) != 0;
                bool alreadySlept      = false;
                while (!promotionComplete)
                {
                    promotionComplete = (((ulong)thisSingletonPartition.entries[filterIdx].key & singletonPromotedFlagMask) != 0);
                    if (alreadySlept && !promotionComplete)
                    {
                        if (tracing)
                        {
                            lock (traceUpdates)
                            {
                                StreamWriter trace = new StreamWriter("trace.txt");
                                foreach (TraceEntry t in traceUpdates)
                                {
                                    trace.WriteLine(t.place + "\t" + t.thread + "\t" + t.partition + "\t" + t.index + "\t" + t.value.ToString("x16"));
                                }
                                trace.Close();
                            }
                            Console.WriteLine("promotion still not complete after sleep");
                        }
                    }
                    if (!promotionComplete)
                    {
                        Thread.Sleep(100);
                    }
                    alreadySlept = true;
                }

                UpdateRepeatedMerAfterPromotion(partitionNo, mer, threadNo, mer, addingIncrement);
                //if (!updateSucceeded)
                //{
                //    lock (traceUpdates)
                //    {
                //        StreamWriter trace = new StreamWriter("trace.txt");
                //        foreach (TraceEntry t in traceUpdates)
                //            trace.WriteLine(t.thread + "\t" + t.place + "\t" + t.partition + "\t" + t.index + "\t" + t.value.ToString("x16"));
                //        trace.Close();
                //    }
                //    Console.WriteLine("UpdateRepeatedMerRetry failed after waiting for promotion to complete");
                //}
            }

            Interlocked.Decrement(ref thisSingletonPartition.activeCount);
        }
        static void Main(string[] args)
        {
            if (args.Length < 2)
            {
                Console.WriteLine("usage: GenerateMerPairs [-m min] [-t threads] cbtFN readsPattern or file names (" + version + ")");
                return;
            }

            List <string> FNParams    = new List <string>(); // the .cbt name and the set of file names or patterns
            int           noThreads   = 1;                   // no. of healing threads to run in parallel (1 thread is default)
            int           minLoadReps = 3;                   // min rep count needed before mer will be loaded into uniqueMers table or saved as a pair

            for (int p = 0; p < args.Length; p++)
            {
                if (args[p][0] == '-')
                {
                    args[p] = args[p].ToLower();

                    if (args[p] == "-m" || args[p] == "-min")
                    {
                        if (!CheckForParamValue(p, args.Length, "minReps number expected after -m|-min"))
                        {
                            return;
                        }
                        try
                        {
                            minLoadReps = Convert.ToInt32(args[p + 1]);
                        }
                        catch
                        {
                            Console.WriteLine("expected a number for the -m|-min parameter: " + args[p + 1]);
                            return;
                        }
                        p++;
                        continue;
                    }

                    if (args[p] == "-t" || args[p] == "-threads")
                    {
                        if (!CheckForParamValue(p, args.Length, "number expected after -t|-threads"))
                        {
                            return;
                        }
                        try
                        {
                            noThreads = Convert.ToInt32(args[p + 1]);
                        }
                        catch
                        {
                            Console.WriteLine("expected a number for the -t|-threads parameter: " + args[p + 1]);
                            return;
                        }
                        p++;
                        continue;
                    }

                    Console.WriteLine("unrecognised option: " + args[p]);
                    Console.WriteLine("usage: generateMerPairs [-m min] [-t threads] cbtFN readsPattern or file names (" + version + ")");
                    return;
                }

                FNParams.Add(args[p]);
            }

            if (FNParams.Count < 2)
            {
                Console.WriteLine("expected a cbt file name and at least one reads file name or pattern");
                return;
            }

            // take the cbt file name from the start of the non-option list
            string cbtFN = FNParams[0];

            FNParams.RemoveAt(0);

            if (FNParams.Count == 0)
            {
                Console.WriteLine("did not find any reads file names or patterns");
                return;
            }

            string pairsFN = cbtFN.Replace(".cbt", ".prs");

            List <string> readsFileNames = new List <string>(FNParams.Count);
            List <string> readsFilePaths = new List <string>(FNParams.Count);

            foreach (string readsFNP in FNParams)
            {
                string readsFileName;
                string readsFilePath;
                GetPathFN(readsFNP, out readsFilePath, out readsFileName);
                readsFilePaths.Add(readsFilePath);
                readsFileNames.Add(readsFileName);
            }

            List <string> expandedReadsFNs = new List <string>();

            for (int f = 0; f < FNParams.Count; f++)
            {
                string[] matchedReadsFNs = Directory.GetFiles(readsFilePaths[f], readsFileNames[f], SearchOption.TopDirectoryOnly);
                foreach (string matchedReadsFN in matchedReadsFNs)
                {
                    expandedReadsFNs.Add(matchedReadsFN);
                }
            }

            // make sure there aren't any duplicates in the file list (seems to be a bug on the Cherax SGI HPC system and it returns each file name twice)
            List <string> distinctReadsFNs = new List <string>();

            foreach (string fn in expandedReadsFNs)
            {
                if (!distinctReadsFNs.Contains(fn))
                {
                    distinctReadsFNs.Add(fn);
                }
            }

            // finally... the set of fully qualified, distinct reads files
            string[] readsFNs;
            readsFNs = distinctReadsFNs.ToArray();

            if (readsFNs.Length == 0)
            {
                Console.WriteLine("No matching read files found");
                return;
            }

            int noOfReadsFiles = readsFNs.Length;

            readsFiles = new StreamReader[noOfReadsFiles];
            for (int f = 0; f < noOfReadsFiles; f++)
            {
                string readsFN = readsFNs[f];
                readsFiles[f] = new StreamReader(readsFN);
            }

            // look at the first file to determine the file format and possible read length
            StreamReader testReader = new StreamReader(readsFNs[0]);
            char         headerChar = (char)testReader.Peek();

            if (headerChar == '>')
            {
                readsFormat = MerStrings.formatFNA;
            }
            if (headerChar == '@')
            {
                readsFormat = MerStrings.formatFASTQ;
            }
            int readLength = 0;

            for (int i = 0; i < 20; i++)
            {
                string nextRead = MerStrings.ReadRead(testReader, readsFormat);
                if (nextRead == null)
                {
                    break;
                }
                int nextLength = nextRead.Length;
                if (nextLength > readLength)
                {
                    readLength = nextLength;
                }
            }
            testReader.Close();

            // have to able to fit at least two full mers into the read (no overlaps)
            if (readLength < 2 * merSize)
            {
                Console.WriteLine("reads too short to generate pairs: " + readLength);
                return;
            }

            if (!File.Exists(cbtFN))
            {
                Console.WriteLine(".cbt file not found: " + cbtFN);
                return;
            }

            //string knownPairsFN = "C.sporogenesRaw_25_Copy_1.prs";
            //BinaryReader knownPairs = new BinaryReader(File.Open(knownPairsFN, FileMode.Open, FileAccess.Read));

            //knownPairs.ReadInt32();

            //while (true)
            //{
            //    ulong mer = 0;
            //    int count = 0;

            //    try
            //    {
            //        mer = knownPairs.ReadUInt64();
            //        count = knownPairs.ReadInt32();

            //        goodPairs.Add(mer, count);
            //    }
            //    catch
            //    {
            //        break;
            //    }
            //}

            //knownPairs.Close();
            //Console.WriteLine("loaded " + goodPairs.Count + " good mers from " + knownPairsFN);


            long loadedUniqueMers = 0;
            long loadedTotalMers  = 0;

            // load the .cbt file into a merTable (either a hash table (small) or a sorted array (large))
            MerStrings.LoadCBTFile(cbtFN, minLoadReps, 0, 0, minLoadReps,
                                   out uniqueMers, out merSize, out averageDepth, out loadedUniqueMers, out loadedTotalMers);

            if (merSize < merStubSize)
            {
                Console.WriteLine("mers in .cbt file are shorter than merStub size: " + merSize + " < " + merStubSize);
                return;
            }

            uniquePairs = new MerCollections.MerTables(loadedUniqueMers, noThreads);

            // calculate a gap size based on the first read
            gap = (readLength - endGuard) / 2 - (merStubSize * 2);
            if (gap < minGap)
            {
                gap = minGap;
            }
            if (gap > maxGap)
            {
                gap = maxGap;
            }

            pairStride = merStubSize + gap + merStubSize;

            // start the monitor/synchronising thread
            Thread monitorProgress = new Thread(RateReporter);

            monitorProgress.Priority = ThreadPriority.AboveNormal;
            monitorProgress.Start();

            DateTime pairingStart = DateTime.Now;

            foreach (string readsFN in readsFNs)
            {
                Console.WriteLine("Generating pairs from " + readsFN);
                StreamReader   reads         = new StreamReader(readsFN, Encoding.ASCII, false, 1000000);
                BufferedReader bufferedReads = new BufferedReader(readsFormat, reads, null);

                threadFinished = new EventWaitHandle[noThreads];
                int threadNo = 0;
                for (int i = 0; i < noThreads; i++)
                {
                    threadFinished[i] = new EventWaitHandle(false, EventResetMode.AutoReset);
                }

                for (int t = 0; t < noThreads; t++)
                {
                    threadParams workerParam = new threadParams();
                    workerParam.threadNo          = threadNo;
                    workerParam.bufferedReadsFile = bufferedReads;
                    ThreadPool.QueueUserWorkItem(new WaitCallback(PairWorker), workerParam);
                    threadNo++;
                }
                //  and wait for them all to finish
                for (int t = 0; t < noThreads; t++)
                {
                    threadFinished[t].WaitOne();
                }
            }

            BinaryWriter pairsFile = new BinaryWriter(File.Open(pairsFN, FileMode.Create, FileAccess.Write));

            pairsFile.Write(gap);

            for (int pi = 0; pi < uniquePairs.noOfPartitions; pi++)
            {
                totalPairsGenerated += uniquePairs.repeatedMers[pi].Sort();
            }

            for (int ti = 0; ti < noThreads; ti++)
            {
                if (uniquePairs.overflowMers[ti] != null)
                {
                    totalPairsGenerated += uniquePairs.overflowMers[ti].Sort();
                }
            }

            MergeAndWrite(pairsFile, uniquePairs.repeatedMers, uniquePairs.overflowMers);

            pairsFile.Close();

            StopMonitorThread(monitorProgress);

            //Console.WriteLine(totalDeepUnbalancedReads + " deep unbalanced reads");
            //Console.WriteLine(totalReadsProcessed + " reads processed");
            Console.WriteLine("wrote " + totalPairsWritten + " pairs from " + totalReadsRead + " reads in " + (DateTime.Now - pairingStart).TotalSeconds.ToString("#.0") + "s");
        }
        static void PairWorker(object param)
        {
            threadParams   threadParam = (threadParams)param;
            int            threadNo    = (int)threadParam.threadNo;
            BufferedReader readsFile   = threadParam.bufferedReadsFile;
            bool           EOF         = false;

            Sequence[] readHeaderBatch = new Sequence[batchSize];
            Sequence[] readBatch       = new Sequence[batchSize];
            for (int i = 0; i < batchSize; i++)
            {
                readHeaderBatch[i] = new Sequence(defaultHeaderLength);
                readBatch[i]       = new Sequence(defaultReadLength);
            }
            int  readsInBatch         = 0;
            long threadReadsRead      = 0;
            long threadReadsProcessed = 0;

            ulong[] mersFromRead              = new ulong[1000];
            bool[]  merValid                  = new bool[1000];
            ulong[] canonicalMersFromRead     = new ulong[1000];
            int[]   plusDepths                = new int[1000];
            int[]   rcDepths                  = new int[1000];
            bool    deepUnbalanced            = false;
            long    threadDeepUnbalancedCount = 0;

            int minDepth = averageDepth / 20;

            while (!EOF)
            {
                lock (readsFile)
                {
                    readsInBatch = readsFile.ReadReads(batchSize, readHeaderBatch, readBatch, null, null);

                    if (readsInBatch != batchSize)
                    {
                        EOF = true;
                    }

                    threadReadsRead += readsInBatch;
                }

                progressReadsProcessed += readsInBatch;

                for (int r = 0; r < readsInBatch; r++)
                {
                    threadReadsProcessed++;

                    Sequence read       = readBatch[r];
                    int      readLength = read.Length;

                    if (readLength < 2 * merSize)
                    {
                        continue;
                    }

                    if (readLength < 200)
                    {
                        stepSize = 1;
                    }
                    else
                    {
                        stepSize = 2;
                    }

                    //string target = "GTATATAATAAAGTTTTTTATAAAATTTTAAAAGATCATTATAAAAATATAATAACAATTAATATAATATTAATATACTTTAGTTATAGCTATAAATCTTT";
                    //if (read.ToString() == target)
                    //    Debugger.Break();

                    int merCount = MerStrings.GenerateMersFromRead(read, merSize, ref mersFromRead, ref merValid);

                    for (int i = 0; i < merCount; i++)
                    {
                        if (merValid[i])
                        {
                            ulong rcMer = MerStrings.ReverseComplement(mersFromRead[i], merSize);
                            if (rcMer < mersFromRead[i])
                            {
                                canonicalMersFromRead[i] = rcMer;
                            }
                            else
                            {
                                canonicalMersFromRead[i] = mersFromRead[i];
                            }
                        }
                    }

                    GetDepthsForRead(merCount, mersFromRead, canonicalMersFromRead, merValid, plusDepths, rcDepths, minDepth, out deepUnbalanced);

                    if (deepUnbalanced)
                    {
                        threadDeepUnbalancedCount++;
                        continue;
                    }

                    ulong pair;
                    int   pairDepth;
                    bool  gotPair;
                    int   startingM = 0;
                    int   lastM     = read.Length - pairStride; // generate pairs up to the end of the read (used to only generate from first part)

                    while (startingM < lastM)
                    {
                        if (merValid[startingM])
                        {
                            gotPair = GeneratePairFromRead(mersFromRead, merValid, plusDepths, rcDepths, startingM, merCount, minDepth, out pair, out pairDepth);

                            if (gotPair)
                            {
                                ulong rcPair = MerStrings.ReverseComplement(pair, 32);
                                if (rcPair < pair)
                                {
                                    pair = rcPair;
                                }

                                //if (pair == 0x054A0985B90B34D1)
                                //    Debugger.Break();

                                uniquePairs.AddIfNotPresent(pair, pairDepth, threadNo);

                                //lock (pairDictionary)
                                //{
                                //    if (!pairDictionary.ContainsKey(pair))
                                //        pairDictionary.Add(pair, pairDepth);
                                //}

                                //Interlocked.Increment(ref GPTrue);
                                //gotPairFromRead = true;
                            }
                            //else
                            //Interlocked.Increment(ref GPfalse);
                        }

                        startingM += stepSize;
                    }

                    //if (!gotPairFromRead)
                    //    threadReadsWithNoPairs++;
                }
            }

            Interlocked.Add(ref totalReadsProcessed, threadReadsProcessed);
            Interlocked.Add(ref totalReadsRead, threadReadsRead);
            Interlocked.Add(ref totalDeepUnbalancedReads, threadDeepUnbalancedCount);

            threadFinished[threadNo].Set();
        }