/* Main compression method that keeps the two streams aligned and calls * the encoding methods for sequences and qualities. */ public void Compress(FastqReader reader, BinaryWriter writer) { long seqI = 0; long qualI = 0; int idCounter = 0; byte[] buffer = new byte[BUFFER]; ChooseIddc(reader, writer); while (true) { Spin("Encoding..."); if (encodeIds && idCounter <= seqI / length && reader.HasIDLeft(idCounter, 1)) { iddc.EncodeId(ref idCounter); continue; } if ((seqI <= qualI || !encodeQualities) && reader.HasSeqLeft(seqI, 1)) { EncodeSeq(buffer, ref seqI, writer, reader); continue; } if (encodeQualities && reader.HasQLeft(qualI, 1)) { EncodeQual(buffer, ref qualI, writer, reader); continue; } break; } Console.Error.WriteLine(); }
/* Encodes IDs starting at the given index (id) until "buffer is full" * or the fastq file ends and writes the result in the given BinaryWriter. * Updates id according to its advancements. */ public void EncodeId(ref int id) { // should check if "mode" is right (ie. reader && writer != null) // but we avoid doing so for efficiency //the first byte starts with 11 if we are encoding an ID byte first = (byte)64; int b = 0; StringBuilder ids = new StringBuilder(); if (idContinuation.Length != 0) { ids.Append(idContinuation); b += idContinuation.Length; idContinuation = new StringBuilder(); } //we assume that a continuation will never be longer //than BUFFER while (reader.HasIDLeft(id, 1) && b < ID_BUFFER) { string currentId = reader.GetID(id); b += currentId.Length; if (b > ID_BUFFER) { //continuation ids.Append(currentId.Substring(0, ID_BUFFER - (b - currentId.Length))); idContinuation.Append(currentId.Substring(ID_BUFFER - (b - currentId.Length))); b = ID_BUFFER; } else { ids.Append(currentId); } id++; //here method to deal with known ID's structure } //we use ascii encoding, so 1 char = 1 byte if (b == ID_BUFFER) { writer.Write(first); writer.Write(ae.GetBytes(ids.ToString())); } else if (b < ID_BUFFER) { //mark smaller buffer first += (byte)32; //we have to tell the decoder that we have a block with a length //different than BUFFER writer.Write(first); writer.Write(b); writer.Write(ae.GetBytes(ids.ToString())); } }
/* Encodes IDs starting at the given index (id) until "buffer is full" * or the fastq file ends and writes the result in the given BinaryWriter. * Updates id according to its advancements. */ public void EncodeId(ref int id) { idBuffer.Seek(0, SeekOrigin.Begin); encodedId.Seek(0, SeekOrigin.Begin); // should check if "mode" is right (ie. reader && writer != null) // but we avoid doing so for efficiency //the first byte starts with 11 if we are encoding an ID byte first = (byte)64; int b = 0; if (continuationLength != 0) { encodedId.Write(idContinuation, 0, continuationLength); b += continuationLength; writtenContinuation = continuationLength; continuationLength = 0; } //we assume that a continuation will never be longer //than BUFFER while (reader.HasIDLeft(id, 1) && b < ID_BUFFER) { //encodedId.Seek(0, SeekOrigin.Begin); string[] currentId = reader.GetID(id).Split(separators); if (currentId.Length < 5) { throw new Exception("invalid ID format"); } //1 2 3 4 //XXX TODO check if you can use only idBuffer and not also encodedID //the first number that has to be encoded for (int i = 0; i < 4; i++) { encodedId.Write(Convert.ToUInt16(currentId[i + 1])); // we skip the first item } b += ENCODED_ID_LENGTH; byte[] buffer = ((MemoryStream)encodedId.BaseStream).GetBuffer(); if (b > ID_BUFFER) { //continuation continuationLength = b - ID_BUFFER; int firstExceedingByte = ENCODED_ID_LENGTH - continuationLength; for (int i = 0; i < continuationLength; i++) { idContinuation[i] = buffer[firstExceedingByte + i]; } idBuffer.Write(buffer, 0, firstExceedingByte); //we have to write firstExceedingByte bytes as the count argument b = ID_BUFFER; } else { //XXX dopo continuation non scrive primi byte per l'uint?248.1 idBuffer.Write(buffer, 0, ENCODED_ID_LENGTH + writtenContinuation); } id++; encodedId.Seek(0, SeekOrigin.Begin); writtenContinuation = 0; } if (b == ID_BUFFER) { writer.Write(first); writer.Write(((MemoryStream)idBuffer.BaseStream).GetBuffer(), 0, b); } else if (b < ID_BUFFER) { //mark smaller buffer first += (byte)32; //we have to tell the decoder that we have a block with a length //different than BUFFER writer.Write(first); writer.Write(b); writer.Write(((MemoryStream)idBuffer.BaseStream).GetBuffer(), 0, b); } }