//ASCIIEncoding ae = new ASCIIEncoding(); /* Main compression method that keeps the two streams aligned and calls * the encoding methods for sequences and qualities. */ public void Compress(FastqReader reader, BinaryWriter sequenceWriter, BinaryWriter qualityWriter) { long seqI = 0; long qualI = 0; byte[] buffer = new byte[BUFFER]; WriteBitShepherd bits = new WriteBitShepherd(sequenceWriter); while (true) { Spin("Encoding..."); if (seqI <= qualI && reader.HasSeqLeft(seqI, 1)) { EncodeSeq(bits, ref seqI, reader); continue; } if (reader.HasQLeft(qualI, 1)) { EncodeQual(buffer, ref qualI, qualityWriter, reader); continue; } break; } bits.Close(); }
/* Main compression method that keeps the two streams aligned and calls * the encoding methods for sequences and qualities. */ public void Compress(FastqReader reader, BinaryWriter writer) { long seqI = 0; long qualI = 0; int idCounter = 0; byte[] buffer = new byte[BUFFER]; ChooseIddc(reader, writer); while (true) { Spin("Encoding..."); if (encodeIds && idCounter <= seqI / length && reader.HasIDLeft(idCounter, 1)) { iddc.EncodeId(ref idCounter); continue; } if ((seqI <= qualI || !encodeQualities) && reader.HasSeqLeft(seqI, 1)) { EncodeSeq(buffer, ref seqI, writer, reader); continue; } if (encodeQualities && reader.HasQLeft(qualI, 1)) { EncodeQual(buffer, ref qualI, writer, reader); continue; } break; } Console.Error.WriteLine(); }
/* Main compression method that keeps the two streams aligned and calls * the encoding methods for sequences and qualities. */ //public void Compress(FastqReader reader, BinaryWriter writer) public void Compress(FastqReader reader, BinaryWriter sequenceWriter, BinaryWriter qualityWriter) { long seqI = 0; long qualI = 0; byte[] buffer = new byte[BUFFER]; while (true) { Spin("Encoding..."); if (seqI <= qualI && reader.HasSeqLeft(seqI, 1)) { EncodeSeq(buffer, ref seqI, sequenceWriter, reader); continue; } if (reader.HasQLeft(qualI, 1)) { EncodeQual(buffer, ref qualI, qualityWriter, reader); continue; } break; } Console.Error.WriteLine(); }
/* Encodes sequencing starting at the given index (i) until buffer * is full or the sequence ends and writes the result in the given WriteBitSheperd. * Updates i according to its progression. */ void EncodeSeq(WriteBitShepherd bits, ref long i, FastqReader reader) { int writtenBits = 0; while (reader.HasSeqLeft(i, 4) && writtenBits < BIT_BUFFER) { //Check for run-length run. if (reader.GetSeq(i) == reader.GetSeq(i + 3) && reader.GetSeq(i) == reader.GetSeq(i + 2) && reader.GetSeq(i) == reader.GetSeq(i + 1)) { long j = i + 4; long l = i + 8199; while (j < l && reader.HasSeqLeft(j, 1) && reader.GetSeq(j) == reader.GetSeq(j - 1)) { j++; } int length = (int)(j - i); if (length > 35) { bits.Write(127, 7); //flag for long run length bits.Write(GetRandomBit(), 1); bits.Write(Array.IndexOf(Bases, reader.GetSeq(j - 1)), 3); bits.Write(length - 4, 13); writtenBits += 24; } else { bits.Write(0, 7); //flag for short run length bits.Write(GetRandomBit(), 1); bits.Write(Array.IndexOf(Bases, reader.GetSeq(j - 1)), 3); bits.Write(length - 4, 5); writtenBits += 16; } i = j; } else { bits.Write(Encode(reader.GetSeq(i), reader.GetSeq(i + 1), reader.GetSeq(i + 2)), 7); bits.Write(GetRandomBit(), 1); i += 3; writtenBits += 8; } } bool end = false; string last = ""; if (!reader.HasSeqLeft(i, 4)) { while (reader.HasSeqLeft(i, 1)) //could still have 1, 2 or 3 bases { last += reader.GetSeq(i++); } end = true; } if (last != "") { last = last.PadRight(3, 'N'); bits.Write(Encode(last[0], last[1], last[2]), 7); bits.Write(GetRandomBit(), 1); writtenBits += 8; } if (end) { bits.Write(126, 7); // mark end of sequences blocks bits.Write(GetRandomBit(), 1); writtenBits += 8; } }
/* Encodes sequencing starting at the given index (i) until buffer * is full or the sequence ends and writes the result in the given BinaryWriter. * Updates i according to its progression. */ void EncodeSeq(byte[] buffer, ref long i, BinaryWriter writer, FastqReader reader) { //the first byte starts with 1 if we are encoding a seq byte first = (byte)128; int b = 0; while (reader.HasSeqLeft(i, 4) && b < BUFFER) { //Check for run-length run. if (reader.GetSeq(i) == reader.GetSeq(i + 3) && reader.GetSeq(i) == reader.GetSeq(i + 2) && reader.GetSeq(i) == reader.GetSeq(i + 1)) { long j = i + 4; long l = i + 19; while (j < l && reader.HasSeqLeft(j, 1) && reader.GetSeq(j) == reader.GetSeq(i)) { j++; } buffer[b++] = (byte)(128 + ((j - i - 4) << 3) + Array.IndexOf(Bases, reader.GetSeq(i))); i = j; } else { buffer[b++] = Encode(reader.GetSeq(i++), reader.GetSeq(i++), reader.GetSeq(i++)); } } string last = ""; byte lastSeqByte = 0; bool lastByte = false; if (!reader.HasSeqLeft(i, 4)) { while (reader.HasSeqLeft(i, 1)) //could still have 1, 2 or 3 bases { last += reader.GetSeq(i++); } } if (last != "") { lastByte = true; last = last.PadRight(3, 'N'); lastSeqByte = Encode(last[0], last[1], last[2]); } if (b == BUFFER && !lastByte) { writer.Write(first); writer.Write(buffer); } else { first += (byte)32; //we have to tell the decoder that we have a block with a length //different than BUFFER writer.Write(first); writer.Write(b + (lastByte ? 1 : 0)); writer.Write(buffer, 0, b); if (lastByte) { writer.Write(lastSeqByte); } } }