/* Encodes sequencing starting at the given index (i) until buffer * is full or the sequence ends and writes the result in the given WriteBitSheperd. * Updates i according to its progression. */ void EncodeSeq(WriteBitShepherd bits, ref long i, FastqReader reader) { int writtenBits = 0; while (reader.HasSeqLeft(i, 4) && writtenBits < BIT_BUFFER) { //Check for run-length run. if (reader.GetSeq(i) == reader.GetSeq(i + 3) && reader.GetSeq(i) == reader.GetSeq(i + 2) && reader.GetSeq(i) == reader.GetSeq(i + 1)) { long j = i + 4; long l = i + 8199; while (j < l && reader.HasSeqLeft(j, 1) && reader.GetSeq(j) == reader.GetSeq(j - 1)) { j++; } int length = (int)(j - i); if (length > 35) { bits.Write(127, 7); //flag for long run length bits.Write(GetRandomBit(), 1); bits.Write(Array.IndexOf(Bases, reader.GetSeq(j - 1)), 3); bits.Write(length - 4, 13); writtenBits += 24; } else { bits.Write(0, 7); //flag for short run length bits.Write(GetRandomBit(), 1); bits.Write(Array.IndexOf(Bases, reader.GetSeq(j - 1)), 3); bits.Write(length - 4, 5); writtenBits += 16; } i = j; } else { bits.Write(Encode(reader.GetSeq(i), reader.GetSeq(i + 1), reader.GetSeq(i + 2)), 7); bits.Write(GetRandomBit(), 1); i += 3; writtenBits += 8; } } bool end = false; string last = ""; if (!reader.HasSeqLeft(i, 4)) { while (reader.HasSeqLeft(i, 1)) //could still have 1, 2 or 3 bases { last += reader.GetSeq(i++); } end = true; } if (last != "") { last = last.PadRight(3, 'N'); bits.Write(Encode(last[0], last[1], last[2]), 7); bits.Write(GetRandomBit(), 1); writtenBits += 8; } if (end) { bits.Write(126, 7); // mark end of sequences blocks bits.Write(GetRandomBit(), 1); writtenBits += 8; } }
/* Encodes sequencing starting at the given index (i) until buffer * is full or the sequence ends and writes the result in the given BinaryWriter. * Updates i according to its progression. */ void EncodeSeq(byte[] buffer, ref long i, BinaryWriter writer, FastqReader reader) { //the first byte starts with 1 if we are encoding a seq byte first = (byte)128; int b = 0; while (reader.HasSeqLeft(i, 4) && b < BUFFER) { //Check for run-length run. if (reader.GetSeq(i) == reader.GetSeq(i + 3) && reader.GetSeq(i) == reader.GetSeq(i + 2) && reader.GetSeq(i) == reader.GetSeq(i + 1)) { long j = i + 4; long l = i + 19; while (j < l && reader.HasSeqLeft(j, 1) && reader.GetSeq(j) == reader.GetSeq(i)) { j++; } buffer[b++] = (byte)(128 + ((j - i - 4) << 3) + Array.IndexOf(Bases, reader.GetSeq(i))); i = j; } else { buffer[b++] = Encode(reader.GetSeq(i++), reader.GetSeq(i++), reader.GetSeq(i++)); } } string last = ""; byte lastSeqByte = 0; bool lastByte = false; if (!reader.HasSeqLeft(i, 4)) { while (reader.HasSeqLeft(i, 1)) //could still have 1, 2 or 3 bases { last += reader.GetSeq(i++); } } if (last != "") { lastByte = true; last = last.PadRight(3, 'N'); lastSeqByte = Encode(last[0], last[1], last[2]); } if (b == BUFFER && !lastByte) { writer.Write(first); writer.Write(buffer); } else { first += (byte)32; //we have to tell the decoder that we have a block with a length //different than BUFFER writer.Write(first); writer.Write(b + (lastByte ? 1 : 0)); writer.Write(buffer, 0, b); if (lastByte) { writer.Write(lastSeqByte); } } }