//ASCIIEncoding ae = new ASCIIEncoding(); /* Main compression method that keeps the two streams aligned and calls * the encoding methods for sequences and qualities. */ public void Compress(FastqReader reader, BinaryWriter sequenceWriter, BinaryWriter qualityWriter) { long seqI = 0; long qualI = 0; byte[] buffer = new byte[BUFFER]; WriteBitShepherd bits = new WriteBitShepherd(sequenceWriter); while (true) { Spin("Encoding..."); if (seqI <= qualI && reader.HasSeqLeft(seqI, 1)) { EncodeSeq(bits, ref seqI, reader); continue; } if (reader.HasQLeft(qualI, 1)) { EncodeQual(buffer, ref qualI, qualityWriter, reader); continue; } break; } bits.Close(); }
/* Encodes sequencing starting at the given index (i) until buffer * is full or the sequence ends and writes the result in the given WriteBitSheperd. * Updates i according to its progression. */ void EncodeSeq(WriteBitShepherd bits, ref long i, FastqReader reader) { int writtenBits = 0; while (reader.HasSeqLeft(i, 4) && writtenBits < BIT_BUFFER) { //Check for run-length run. if (reader.GetSeq(i) == reader.GetSeq(i + 3) && reader.GetSeq(i) == reader.GetSeq(i + 2) && reader.GetSeq(i) == reader.GetSeq(i + 1)) { long j = i + 4; long l = i + 8199; while (j < l && reader.HasSeqLeft(j, 1) && reader.GetSeq(j) == reader.GetSeq(j - 1)) { j++; } int length = (int)(j - i); if (length > 35) { bits.Write(127, 7); //flag for long run length bits.Write(GetRandomBit(), 1); bits.Write(Array.IndexOf(Bases, reader.GetSeq(j - 1)), 3); bits.Write(length - 4, 13); writtenBits += 24; } else { bits.Write(0, 7); //flag for short run length bits.Write(GetRandomBit(), 1); bits.Write(Array.IndexOf(Bases, reader.GetSeq(j - 1)), 3); bits.Write(length - 4, 5); writtenBits += 16; } i = j; } else { bits.Write(Encode(reader.GetSeq(i), reader.GetSeq(i + 1), reader.GetSeq(i + 2)), 7); bits.Write(GetRandomBit(), 1); i += 3; writtenBits += 8; } } bool end = false; string last = ""; if (!reader.HasSeqLeft(i, 4)) { while (reader.HasSeqLeft(i, 1)) //could still have 1, 2 or 3 bases { last += reader.GetSeq(i++); } end = true; } if (last != "") { last = last.PadRight(3, 'N'); bits.Write(Encode(last[0], last[1], last[2]), 7); bits.Write(GetRandomBit(), 1); writtenBits += 8; } if (end) { bits.Write(126, 7); // mark end of sequences blocks bits.Write(GetRandomBit(), 1); writtenBits += 8; } }