Ejemplo n.º 1
0
        //ASCIIEncoding ae = new ASCIIEncoding();

        /* Main compression method that keeps the two streams aligned and calls
         * the encoding methods for sequences and qualities.
         */
        public void Compress(FastqReader reader, BinaryWriter sequenceWriter, BinaryWriter qualityWriter)
        {
            long seqI  = 0;
            long qualI = 0;

            byte[]           buffer = new byte[BUFFER];
            WriteBitShepherd bits   = new WriteBitShepherd(sequenceWriter);

            while (true)
            {
                Spin("Encoding...");
                if (seqI <= qualI && reader.HasSeqLeft(seqI, 1))
                {
                    EncodeSeq(bits, ref seqI, reader);
                    continue;
                }
                if (reader.HasQLeft(qualI, 1))
                {
                    EncodeQual(buffer, ref qualI, qualityWriter, reader);
                    continue;
                }
                break;
            }
            bits.Close();
        }
Ejemplo n.º 2
0
        /* Encodes sequencing starting at the given index (i) until buffer
         * is full or the sequence ends and writes the result in the given WriteBitSheperd.
         * Updates i according to its progression.
         */
        void EncodeSeq(WriteBitShepherd bits, ref long i, FastqReader reader)
        {
            int writtenBits = 0;

            while (reader.HasSeqLeft(i, 4) && writtenBits < BIT_BUFFER)
            {
                //Check for run-length run.
                if (reader.GetSeq(i) == reader.GetSeq(i + 3) && reader.GetSeq(i) == reader.GetSeq(i + 2) &&
                    reader.GetSeq(i) == reader.GetSeq(i + 1))
                {
                    long j = i + 4;
                    long l = i + 8199;
                    while (j < l && reader.HasSeqLeft(j, 1) && reader.GetSeq(j) == reader.GetSeq(j - 1))
                    {
                        j++;
                    }
                    int length = (int)(j - i);
                    if (length > 35)
                    {
                        bits.Write(127, 7); //flag for long run length
                        bits.Write(GetRandomBit(), 1);
                        bits.Write(Array.IndexOf(Bases, reader.GetSeq(j - 1)), 3);
                        bits.Write(length - 4, 13);
                        writtenBits += 24;
                    }
                    else
                    {
                        bits.Write(0, 7); //flag for short run length
                        bits.Write(GetRandomBit(), 1);
                        bits.Write(Array.IndexOf(Bases, reader.GetSeq(j - 1)), 3);
                        bits.Write(length - 4, 5);
                        writtenBits += 16;
                    }
                    i = j;
                }
                else
                {
                    bits.Write(Encode(reader.GetSeq(i), reader.GetSeq(i + 1), reader.GetSeq(i + 2)), 7);
                    bits.Write(GetRandomBit(), 1);
                    i           += 3;
                    writtenBits += 8;
                }
            }

            bool   end  = false;
            string last = "";

            if (!reader.HasSeqLeft(i, 4))
            {
                while (reader.HasSeqLeft(i, 1)) //could still have 1, 2 or 3 bases
                {
                    last += reader.GetSeq(i++);
                }
                end = true;
            }
            if (last != "")
            {
                last = last.PadRight(3, 'N');
                bits.Write(Encode(last[0], last[1], last[2]), 7);
                bits.Write(GetRandomBit(), 1);
                writtenBits += 8;
            }
            if (end)
            {
                bits.Write(126, 7); // mark end of sequences blocks
                bits.Write(GetRandomBit(), 1);
                writtenBits += 8;
            }
        }