예제 #1
0
        /* Main compression method that keeps the two streams aligned and calls
         * the encoding methods for sequences and qualities.
         */
        public void Compress(FastqReader reader, BinaryWriter writer)
        {
            long seqI      = 0;
            long qualI     = 0;
            int  idCounter = 0;

            byte[] buffer = new byte[BUFFER];
            ChooseIddc(reader, writer);

            while (true)
            {
                Spin("Encoding...");
                if (encodeIds && idCounter <= seqI / length && reader.HasIDLeft(idCounter, 1))
                {
                    iddc.EncodeId(ref idCounter);
                    continue;
                }
                if ((seqI <= qualI || !encodeQualities) && reader.HasSeqLeft(seqI, 1))
                {
                    EncodeSeq(buffer, ref seqI, writer, reader);
                    continue;
                }
                if (encodeQualities && reader.HasQLeft(qualI, 1))
                {
                    EncodeQual(buffer, ref qualI, writer, reader);
                    continue;
                }
                break;
            }
            Console.Error.WriteLine();
        }
예제 #2
0
        /* Encodes IDs starting at the given index (id) until "buffer is full"
         * or the fastq file ends and writes the result in the given BinaryWriter.
         * Updates id according to its advancements.
         */
        public void EncodeId(ref int id)
        {
            // should check if "mode" is right (ie. reader && writer != null)
            // but we avoid doing so for efficiency

            //the first byte starts with 11 if we are encoding an ID
            byte          first = (byte)64;
            int           b     = 0;
            StringBuilder ids   = new StringBuilder();

            if (idContinuation.Length != 0)
            {
                ids.Append(idContinuation);
                b += idContinuation.Length;
                idContinuation = new StringBuilder();
            }
            //we assume that a continuation will never be longer
            //than BUFFER

            while (reader.HasIDLeft(id, 1) && b < ID_BUFFER)
            {
                string currentId = reader.GetID(id);
                b += currentId.Length;
                if (b > ID_BUFFER)
                {
                    //continuation
                    ids.Append(currentId.Substring(0, ID_BUFFER - (b - currentId.Length)));

                    idContinuation.Append(currentId.Substring(ID_BUFFER - (b - currentId.Length)));
                    b = ID_BUFFER;
                }
                else
                {
                    ids.Append(currentId);
                }
                id++;
                //here method to deal with known ID's structure
            }
            //we use ascii encoding, so 1 char = 1 byte
            if (b == ID_BUFFER)
            {
                writer.Write(first);
                writer.Write(ae.GetBytes(ids.ToString()));
            }
            else if (b < ID_BUFFER)
            {
                //mark smaller buffer
                first += (byte)32;                  //we have to tell the decoder that we have a block with a length
                //different than BUFFER
                writer.Write(first);
                writer.Write(b);
                writer.Write(ae.GetBytes(ids.ToString()));
            }
        }
예제 #3
0
        /* Encodes IDs starting at the given index (id) until "buffer is full"
         * or the fastq file ends and writes the result in the given BinaryWriter.
         * Updates id according to its advancements.
         */
        public void EncodeId(ref int id)
        {
            idBuffer.Seek(0, SeekOrigin.Begin);
            encodedId.Seek(0, SeekOrigin.Begin);


            // should check if "mode" is right (ie. reader && writer != null)
            // but we avoid doing so for efficiency

            //the first byte starts with 11 if we are encoding an ID
            byte first = (byte)64;
            int  b     = 0;

            if (continuationLength != 0)
            {
                encodedId.Write(idContinuation, 0, continuationLength);
                b += continuationLength;
                writtenContinuation = continuationLength;
                continuationLength  = 0;
            }
            //we assume that a continuation will never be longer
            //than BUFFER

            while (reader.HasIDLeft(id, 1) && b < ID_BUFFER)
            {
                //encodedId.Seek(0, SeekOrigin.Begin);
                string[] currentId = reader.GetID(id).Split(separators);
                if (currentId.Length < 5)
                {
                    throw new Exception("invalid ID format");
                }
                //1 2 3 4

                //XXX TODO check if you can use only idBuffer and not also encodedID

                //the first number that has to be encoded
                for (int i = 0; i < 4; i++)
                {
                    encodedId.Write(Convert.ToUInt16(currentId[i + 1]));
                    // we skip the first item
                }
                b += ENCODED_ID_LENGTH;
                byte[] buffer = ((MemoryStream)encodedId.BaseStream).GetBuffer();
                if (b > ID_BUFFER)
                {
                    //continuation
                    continuationLength = b - ID_BUFFER;
                    int firstExceedingByte = ENCODED_ID_LENGTH - continuationLength;
                    for (int i = 0; i < continuationLength; i++)
                    {
                        idContinuation[i] = buffer[firstExceedingByte + i];
                    }
                    idBuffer.Write(buffer, 0, firstExceedingByte);
                    //we have to write firstExceedingByte bytes as the count argument
                    b = ID_BUFFER;
                }
                else
                {
                    //XXX dopo continuation non scrive primi byte per l'uint?248.1
                    idBuffer.Write(buffer, 0, ENCODED_ID_LENGTH + writtenContinuation);
                }
                id++;
                encodedId.Seek(0, SeekOrigin.Begin);
                writtenContinuation = 0;
            }

            if (b == ID_BUFFER)
            {
                writer.Write(first);
                writer.Write(((MemoryStream)idBuffer.BaseStream).GetBuffer(), 0, b);
            }
            else if (b < ID_BUFFER)
            {
                //mark smaller buffer
                first += (byte)32;  //we have to tell the decoder that we have a block with a length
                                    //different than BUFFER
                writer.Write(first);
                writer.Write(b);
                writer.Write(((MemoryStream)idBuffer.BaseStream).GetBuffer(), 0, b);
            }
        }