private void Transform(byte[] Output, int OutOffset, uint[] Counter) { int ctr = 0; uint X0 = m_wrkState[ctr]; uint X1 = m_wrkState[++ctr]; uint X2 = m_wrkState[++ctr]; uint X3 = m_wrkState[++ctr]; uint X4 = m_wrkState[++ctr]; uint X5 = m_wrkState[++ctr]; uint X6 = m_wrkState[++ctr]; uint X7 = m_wrkState[++ctr]; uint X8 = m_wrkState[++ctr]; uint X9 = m_wrkState[++ctr]; uint X10 = m_wrkState[++ctr]; uint X11 = m_wrkState[++ctr]; uint X12 = Counter[0]; uint X13 = Counter[1]; uint X14 = m_wrkState[++ctr]; uint X15 = m_wrkState[++ctr]; ctr = Rounds; while (ctr != 0) { X0 += X4; X12 = IntUtils.RotateLeft(X12 ^ X0, 16); X8 += X12; X4 = IntUtils.RotateLeft(X4 ^ X8, 12); X0 += X4; X12 = IntUtils.RotateLeft(X12 ^ X0, 8); X8 += X12; X4 = IntUtils.RotateLeft(X4 ^ X8, 7); X1 += X5; X13 = IntUtils.RotateLeft(X13 ^ X1, 16); X9 += X13; X5 = IntUtils.RotateLeft(X5 ^ X9, 12); X1 += X5; X13 = IntUtils.RotateLeft(X13 ^ X1, 8); X9 += X13; X5 = IntUtils.RotateLeft(X5 ^ X9, 7); X2 += X6; X14 = IntUtils.RotateLeft(X14 ^ X2, 16); X10 += X14; X6 = IntUtils.RotateLeft(X6 ^ X10, 12); X2 += X6; X14 = IntUtils.RotateLeft(X14 ^ X2, 8); X10 += X14; X6 = IntUtils.RotateLeft(X6 ^ X10, 7); X3 += X7; X15 = IntUtils.RotateLeft(X15 ^ X3, 16); X11 += X15; X7 = IntUtils.RotateLeft(X7 ^ X11, 12); X3 += X7; X15 = IntUtils.RotateLeft(X15 ^ X3, 8); X11 += X15; X7 = IntUtils.RotateLeft(X7 ^ X11, 7); X0 += X5; X15 = IntUtils.RotateLeft(X15 ^ X0, 16); X10 += X15; X5 = IntUtils.RotateLeft(X5 ^ X10, 12); X0 += X5; X15 = IntUtils.RotateLeft(X15 ^ X0, 8); X10 += X15; X5 = IntUtils.RotateLeft(X5 ^ X10, 7); X1 += X6; X12 = IntUtils.RotateLeft(X12 ^ X1, 16); X11 += X12; X6 = IntUtils.RotateLeft(X6 ^ X11, 12); X1 += X6; X12 = IntUtils.RotateLeft(X12 ^ X1, 8); X11 += X12; X6 = IntUtils.RotateLeft(X6 ^ X11, 7); X2 += X7; X13 = IntUtils.RotateLeft(X13 ^ X2, 16); X8 += X13; X7 = IntUtils.RotateLeft(X7 ^ X8, 12); X2 += X7; X13 = IntUtils.RotateLeft(X13 ^ X2, 8); X8 += X13; X7 = IntUtils.RotateLeft(X7 ^ X8, 7); X3 += X4; X14 = IntUtils.RotateLeft(X14 ^ X3, 16); X9 += X14; X4 = IntUtils.RotateLeft(X4 ^ X9, 12); X3 += X4; X14 = IntUtils.RotateLeft(X14 ^ X3, 8); X9 += X14; X4 = IntUtils.RotateLeft(X4 ^ X9, 7); ctr -= 2; } IntUtils.Le32ToBytes(X0 + m_wrkState[ctr], Output, OutOffset); OutOffset += 4; IntUtils.Le32ToBytes(X1 + m_wrkState[++ctr], Output, OutOffset); OutOffset += 4; IntUtils.Le32ToBytes(X2 + m_wrkState[++ctr], Output, OutOffset); OutOffset += 4; IntUtils.Le32ToBytes(X3 + m_wrkState[++ctr], Output, OutOffset); OutOffset += 4; IntUtils.Le32ToBytes(X4 + m_wrkState[++ctr], Output, OutOffset); OutOffset += 4; IntUtils.Le32ToBytes(X5 + m_wrkState[++ctr], Output, OutOffset); OutOffset += 4; IntUtils.Le32ToBytes(X6 + m_wrkState[++ctr], Output, OutOffset); OutOffset += 4; IntUtils.Le32ToBytes(X7 + m_wrkState[++ctr], Output, OutOffset); OutOffset += 4; IntUtils.Le32ToBytes(X8 + m_wrkState[++ctr], Output, OutOffset); OutOffset += 4; IntUtils.Le32ToBytes(X9 + m_wrkState[++ctr], Output, OutOffset); OutOffset += 4; IntUtils.Le32ToBytes(X10 + m_wrkState[++ctr], Output, OutOffset); OutOffset += 4; IntUtils.Le32ToBytes(X11 + m_wrkState[++ctr], Output, OutOffset); OutOffset += 4; IntUtils.Le32ToBytes(X12 + Counter[0], Output, OutOffset); OutOffset += 4; IntUtils.Le32ToBytes(X13 + Counter[1], Output, OutOffset); OutOffset += 4; IntUtils.Le32ToBytes(X14 + m_wrkState[++ctr], Output, OutOffset); OutOffset += 4; IntUtils.Le32ToBytes(X15 + m_wrkState[++ctr], Output, OutOffset); }
private void Process(byte[] Input, int InOffset, byte[] Output, int OutOffset, int Length) { int prcSze = (Length >= Input.Length - InOffset) && Length >= Output.Length - OutOffset ? IntUtils.Min(Input.Length - InOffset, Output.Length - OutOffset) : Length; if (!m_isParallel || prcSze < m_parallelBlockSize) { // generate random Generate(prcSze, m_ctrVector, Output, OutOffset); // output is input xor with random int sze = prcSze - (prcSze % BLOCK_SIZE); if (sze != 0) { IntUtils.XORBLK(Input, InOffset, Output, OutOffset, sze); } // get the remaining bytes if (sze != prcSze) { for (int i = sze; i < prcSze; ++i) { Output[i + OutOffset] ^= Input[i + InOffset]; } } } else { // parallel CTR processing // int cnkSize = (prcSze / BLOCK_SIZE / ProcessorCount) * BLOCK_SIZE; int rndSize = cnkSize * ProcessorCount; int subSize = (cnkSize / BLOCK_SIZE); // create jagged array of 'sub counters' uint[] tmpCtr = new uint[m_ctrVector.Length]; // create random, and xor to output in parallel System.Threading.Tasks.Parallel.For(0, m_processorCount, i => { // thread level counter uint[] thdCtr = new uint[m_ctrVector.Length]; // offset counter by chunk size / block size thdCtr = Increase(m_ctrVector, subSize * i); // create random at offset position this.Generate(cnkSize, thdCtr, Output, OutOffset + (i * cnkSize)); // xor with input at offset IntUtils.XORBLK(Input, InOffset + (i * cnkSize), Output, OutOffset + (i * cnkSize), cnkSize); // store last counter if (i == m_processorCount - 1) { Array.Copy(thdCtr, 0, tmpCtr, 0, thdCtr.Length); } }); // last block processing if (rndSize < prcSze) { int fnlSize = prcSze % rndSize; Generate(fnlSize, tmpCtr, Output, rndSize); for (int i = 0; i < fnlSize; ++i) { Output[i + OutOffset + rndSize] ^= (byte)(Input[i + InOffset + rndSize]); } } // copy the last counter position to class variable Array.Copy(tmpCtr, 0, m_ctrVector, 0, m_ctrVector.Length); } }