Пример #1
0
        void ChooseIddc(FastqReader reader, BinaryWriter writer)
        {
            if (!encodeIds)
            {
                iddc = new PlaceholderIdGenerator();
                return;
            }
            //@SRX000571_SRR002321.54856271 080226_CMLIVERKIDNEY_0007:8:330:23:135 length=36
            Regex sra    = new Regex(@"^(@[^.]+\.)\d+\s([\S]+)(?:\d+:){3}\d+.*$", RegexOptions.Singleline);
            Regex length = new Regex(@"^.+length=\d+$", RegexOptions.Singleline);
            //@HWUSI-EAS627_1:3:1:0:370/1 (or /2)
            //@BILLIEHOLIDAY_3_FC30G08AAXX:1:1:0:1966
            Regex encode = new Regex(@"^(@[\S]+)(?:\d+:){3}\d+(\/[12])*$", RegexOptions.Singleline);

            String id          = reader.GetID(0);
            Match  sraMatch    = sra.Match(id);
            Match  encodeMatch = encode.Match(id);

            if (sraMatch.Success)   //type 0
            {
                Match lengthMatch = length.Match(id);
                writer.Write(0);
                iddc = new SraIdDeCompresser(reader, writer, sraMatch, lengthMatch.Success);
            }
            else if (encodeMatch.Success)     //type 2
            {
                writer.Write(2);
                iddc = new EncodeIdDeCompresser(reader, writer, encodeMatch);
            }
            else     //type 1
            {
                writer.Write(1);
                iddc = new PlainIdDeCompresser(reader, writer);
            }
        }
Пример #2
0
        /* Encodes IDs starting at the given index (id) until "buffer is full"
         * or the fastq file ends and writes the result in the given BinaryWriter.
         * Updates id according to its advancements.
         */
        public void EncodeId(ref int id)
        {
            // should check if "mode" is right (ie. reader && writer != null)
            // but we avoid doing so for efficiency

            //the first byte starts with 11 if we are encoding an ID
            byte          first = (byte)64;
            int           b     = 0;
            StringBuilder ids   = new StringBuilder();

            if (idContinuation.Length != 0)
            {
                ids.Append(idContinuation);
                b += idContinuation.Length;
                idContinuation = new StringBuilder();
            }
            //we assume that a continuation will never be longer
            //than BUFFER

            while (reader.HasIDLeft(id, 1) && b < ID_BUFFER)
            {
                string currentId = reader.GetID(id);
                b += currentId.Length;
                if (b > ID_BUFFER)
                {
                    //continuation
                    ids.Append(currentId.Substring(0, ID_BUFFER - (b - currentId.Length)));

                    idContinuation.Append(currentId.Substring(ID_BUFFER - (b - currentId.Length)));
                    b = ID_BUFFER;
                }
                else
                {
                    ids.Append(currentId);
                }
                id++;
                //here method to deal with known ID's structure
            }
            //we use ascii encoding, so 1 char = 1 byte
            if (b == ID_BUFFER)
            {
                writer.Write(first);
                writer.Write(ae.GetBytes(ids.ToString()));
            }
            else if (b < ID_BUFFER)
            {
                //mark smaller buffer
                first += (byte)32;                  //we have to tell the decoder that we have a block with a length
                //different than BUFFER
                writer.Write(first);
                writer.Write(b);
                writer.Write(ae.GetBytes(ids.ToString()));
            }
        }
Пример #3
0
        /* Encodes IDs starting at the given index (id) until "buffer is full"
         * or the fastq file ends and writes the result in the given BinaryWriter.
         * Updates id according to its advancements.
         */
        public void EncodeId(ref int id)
        {
            idBuffer.Seek(0, SeekOrigin.Begin);
            encodedId.Seek(0, SeekOrigin.Begin);


            // should check if "mode" is right (ie. reader && writer != null)
            // but we avoid doing so for efficiency

            //the first byte starts with 11 if we are encoding an ID
            byte first = (byte)64;
            int  b     = 0;

            if (continuationLength != 0)
            {
                encodedId.Write(idContinuation, 0, continuationLength);
                b += continuationLength;
                writtenContinuation = continuationLength;
                continuationLength  = 0;
            }
            //we assume that a continuation will never be longer
            //than BUFFER

            while (reader.HasIDLeft(id, 1) && b < ID_BUFFER)
            {
                //encodedId.Seek(0, SeekOrigin.Begin);
                string[] currentId = reader.GetID(id).Split(separators);
                if (currentId.Length < 5)
                {
                    throw new Exception("invalid ID format");
                }
                //1 2 3 4

                //XXX TODO check if you can use only idBuffer and not also encodedID

                //the first number that has to be encoded
                for (int i = 0; i < 4; i++)
                {
                    encodedId.Write(Convert.ToUInt16(currentId[i + 1]));
                    // we skip the first item
                }
                b += ENCODED_ID_LENGTH;
                byte[] buffer = ((MemoryStream)encodedId.BaseStream).GetBuffer();
                if (b > ID_BUFFER)
                {
                    //continuation
                    continuationLength = b - ID_BUFFER;
                    int firstExceedingByte = ENCODED_ID_LENGTH - continuationLength;
                    for (int i = 0; i < continuationLength; i++)
                    {
                        idContinuation[i] = buffer[firstExceedingByte + i];
                    }
                    idBuffer.Write(buffer, 0, firstExceedingByte);
                    //we have to write firstExceedingByte bytes as the count argument
                    b = ID_BUFFER;
                }
                else
                {
                    //XXX dopo continuation non scrive primi byte per l'uint?248.1
                    idBuffer.Write(buffer, 0, ENCODED_ID_LENGTH + writtenContinuation);
                }
                id++;
                encodedId.Seek(0, SeekOrigin.Begin);
                writtenContinuation = 0;
            }

            if (b == ID_BUFFER)
            {
                writer.Write(first);
                writer.Write(((MemoryStream)idBuffer.BaseStream).GetBuffer(), 0, b);
            }
            else if (b < ID_BUFFER)
            {
                //mark smaller buffer
                first += (byte)32;  //we have to tell the decoder that we have a block with a length
                                    //different than BUFFER
                writer.Write(first);
                writer.Write(b);
                writer.Write(((MemoryStream)idBuffer.BaseStream).GetBuffer(), 0, b);
            }
        }