Пример #1
0
        //ASCIIEncoding ae = new ASCIIEncoding();

        /* Main compression method that keeps the two streams aligned and calls
         * the encoding methods for sequences and qualities.
         */
        public void Compress(FastqReader reader, BinaryWriter sequenceWriter, BinaryWriter qualityWriter)
        {
            long seqI  = 0;
            long qualI = 0;

            byte[]           buffer = new byte[BUFFER];
            WriteBitShepherd bits   = new WriteBitShepherd(sequenceWriter);

            while (true)
            {
                Spin("Encoding...");
                if (seqI <= qualI && reader.HasSeqLeft(seqI, 1))
                {
                    EncodeSeq(bits, ref seqI, reader);
                    continue;
                }
                if (reader.HasQLeft(qualI, 1))
                {
                    EncodeQual(buffer, ref qualI, qualityWriter, reader);
                    continue;
                }
                break;
            }
            bits.Close();
        }
Пример #2
0
        /* Main compression method that keeps the two streams aligned and calls
         * the encoding methods for sequences and qualities.
         */
        public void Compress(FastqReader reader, BinaryWriter writer)
        {
            long seqI      = 0;
            long qualI     = 0;
            int  idCounter = 0;

            byte[] buffer = new byte[BUFFER];
            ChooseIddc(reader, writer);

            while (true)
            {
                Spin("Encoding...");
                if (encodeIds && idCounter <= seqI / length && reader.HasIDLeft(idCounter, 1))
                {
                    iddc.EncodeId(ref idCounter);
                    continue;
                }
                if ((seqI <= qualI || !encodeQualities) && reader.HasSeqLeft(seqI, 1))
                {
                    EncodeSeq(buffer, ref seqI, writer, reader);
                    continue;
                }
                if (encodeQualities && reader.HasQLeft(qualI, 1))
                {
                    EncodeQual(buffer, ref qualI, writer, reader);
                    continue;
                }
                break;
            }
            Console.Error.WriteLine();
        }
Пример #3
0
        /* Main compression method that keeps the two streams aligned and calls
         * the encoding methods for sequences and qualities.
         */
        //public void Compress(FastqReader reader, BinaryWriter writer)
        public void Compress(FastqReader reader, BinaryWriter sequenceWriter, BinaryWriter qualityWriter)
        {
            long seqI  = 0;
            long qualI = 0;

            byte[] buffer = new byte[BUFFER];
            while (true)
            {
                Spin("Encoding...");
                if (seqI <= qualI && reader.HasSeqLeft(seqI, 1))
                {
                    EncodeSeq(buffer, ref seqI, sequenceWriter, reader);
                    continue;
                }
                if (reader.HasQLeft(qualI, 1))
                {
                    EncodeQual(buffer, ref qualI, qualityWriter, reader);
                    continue;
                }
                break;
            }
            Console.Error.WriteLine();
        }
Пример #4
0
        /* Encodes sequencing starting at the given index (i) until buffer
         * is full or the sequence ends and writes the result in the given WriteBitSheperd.
         * Updates i according to its progression.
         */
        void EncodeSeq(WriteBitShepherd bits, ref long i, FastqReader reader)
        {
            int writtenBits = 0;

            while (reader.HasSeqLeft(i, 4) && writtenBits < BIT_BUFFER)
            {
                //Check for run-length run.
                if (reader.GetSeq(i) == reader.GetSeq(i + 3) && reader.GetSeq(i) == reader.GetSeq(i + 2) &&
                    reader.GetSeq(i) == reader.GetSeq(i + 1))
                {
                    long j = i + 4;
                    long l = i + 8199;
                    while (j < l && reader.HasSeqLeft(j, 1) && reader.GetSeq(j) == reader.GetSeq(j - 1))
                    {
                        j++;
                    }
                    int length = (int)(j - i);
                    if (length > 35)
                    {
                        bits.Write(127, 7); //flag for long run length
                        bits.Write(GetRandomBit(), 1);
                        bits.Write(Array.IndexOf(Bases, reader.GetSeq(j - 1)), 3);
                        bits.Write(length - 4, 13);
                        writtenBits += 24;
                    }
                    else
                    {
                        bits.Write(0, 7); //flag for short run length
                        bits.Write(GetRandomBit(), 1);
                        bits.Write(Array.IndexOf(Bases, reader.GetSeq(j - 1)), 3);
                        bits.Write(length - 4, 5);
                        writtenBits += 16;
                    }
                    i = j;
                }
                else
                {
                    bits.Write(Encode(reader.GetSeq(i), reader.GetSeq(i + 1), reader.GetSeq(i + 2)), 7);
                    bits.Write(GetRandomBit(), 1);
                    i           += 3;
                    writtenBits += 8;
                }
            }

            bool   end  = false;
            string last = "";

            if (!reader.HasSeqLeft(i, 4))
            {
                while (reader.HasSeqLeft(i, 1)) //could still have 1, 2 or 3 bases
                {
                    last += reader.GetSeq(i++);
                }
                end = true;
            }
            if (last != "")
            {
                last = last.PadRight(3, 'N');
                bits.Write(Encode(last[0], last[1], last[2]), 7);
                bits.Write(GetRandomBit(), 1);
                writtenBits += 8;
            }
            if (end)
            {
                bits.Write(126, 7); // mark end of sequences blocks
                bits.Write(GetRandomBit(), 1);
                writtenBits += 8;
            }
        }
Пример #5
0
        /* Encodes sequencing starting at the given index (i) until buffer
         * is full or the sequence ends and writes the result in the given BinaryWriter.
         * Updates i according to its progression.
         */
        void EncodeSeq(byte[] buffer, ref long i, BinaryWriter writer, FastqReader reader)
        {
            //the first byte starts with 1 if we are encoding a seq
            byte first = (byte)128;
            int  b     = 0;

            while (reader.HasSeqLeft(i, 4) && b < BUFFER)
            {
                //Check for run-length run.
                if (reader.GetSeq(i) == reader.GetSeq(i + 3) && reader.GetSeq(i) == reader.GetSeq(i + 2) && reader.GetSeq(i) == reader.GetSeq(i + 1))
                {
                    long j = i + 4;
                    long l = i + 19;
                    while (j < l && reader.HasSeqLeft(j, 1) && reader.GetSeq(j) == reader.GetSeq(i))
                    {
                        j++;
                    }
                    buffer[b++] = (byte)(128 + ((j - i - 4) << 3) + Array.IndexOf(Bases, reader.GetSeq(i)));
                    i           = j;
                }
                else
                {
                    buffer[b++] = Encode(reader.GetSeq(i++), reader.GetSeq(i++), reader.GetSeq(i++));
                }
            }

            string last        = "";
            byte   lastSeqByte = 0;
            bool   lastByte    = false;

            if (!reader.HasSeqLeft(i, 4))
            {
                while (reader.HasSeqLeft(i, 1)) //could still have 1, 2 or 3 bases
                {
                    last += reader.GetSeq(i++);
                }
            }
            if (last != "")
            {
                lastByte    = true;
                last        = last.PadRight(3, 'N');
                lastSeqByte = Encode(last[0], last[1], last[2]);
            }

            if (b == BUFFER && !lastByte)
            {
                writer.Write(first);
                writer.Write(buffer);
            }
            else
            {
                first += (byte)32;  //we have to tell the decoder that we have a block with a length
                                    //different than BUFFER
                writer.Write(first);
                writer.Write(b + (lastByte ? 1 : 0));
                writer.Write(buffer, 0, b);
                if (lastByte)
                {
                    writer.Write(lastSeqByte);
                }
            }
        }