private const int DSCALE = 1; // storage descaling, needed to make the error fit byte public static void PickBestIntra16(Vp8EncIterator it, ref Vp8ModeScore rd, Vp8SegmentInfo[] segmentInfos, Vp8EncProba proba) { const int numBlocks = 16; Vp8SegmentInfo dqm = segmentInfos[it.CurrentMacroBlockInfo.Segment]; int lambda = dqm.LambdaI16; int tlambda = dqm.TLambda; Span <byte> src = it.YuvIn.AsSpan(Vp8EncIterator.YOffEnc); Span <int> scratch = it.Scratch3; var rdTmp = new Vp8ModeScore(); var res = new Vp8Residual(); Vp8ModeScore rdCur = rdTmp; Vp8ModeScore rdBest = rd; int mode; bool isFlat = IsFlatSource16(src); rd.ModeI16 = -1; for (mode = 0; mode < WebpConstants.NumPredModes; ++mode) { // Scratch buffer. Span <byte> tmpDst = it.YuvOut2.AsSpan(Vp8EncIterator.YOffEnc); rdCur.ModeI16 = mode; // Reconstruct. rdCur.Nz = (uint)ReconstructIntra16(it, dqm, rdCur, tmpDst, mode); // Measure RD-score. rdCur.D = LossyUtils.Vp8_Sse16X16(src, tmpDst); rdCur.SD = tlambda != 0 ? Mult8B(tlambda, LossyUtils.Vp8Disto16X16(src, tmpDst, WeightY, scratch)) : 0; rdCur.H = WebpConstants.Vp8FixedCostsI16[mode]; rdCur.R = it.GetCostLuma16(rdCur, proba, res); if (isFlat) { // Refine the first impression (which was in pixel space). isFlat = IsFlat(rdCur.YAcLevels, numBlocks, WebpConstants.FlatnessLimitI16); if (isFlat) { // Block is very flat. We put emphasis on the distortion being very low! rdCur.D *= 2; rdCur.SD *= 2; } } // Since we always examine Intra16 first, we can overwrite *rd directly. rdCur.SetRdScore(lambda); if (mode == 0 || rdCur.Score < rdBest.Score) { Vp8ModeScore tmp = rdCur; rdCur = rdBest; rdBest = tmp; it.SwapOut(); } } if (rdBest != rd) { rd = rdBest; } // Finalize score for mode decision. rd.SetRdScore(dqm.LambdaMode); it.SetIntra16Mode(rd.ModeI16); // We have a blocky macroblock (only DCs are non-zero) with fairly high // distortion, record max delta so we can later adjust the minimal filtering // strength needed to smooth these blocks out. if ((rd.Nz & 0x100ffff) == 0x1000000 && rd.D > dqm.MinDisto) { dqm.StoreMaxDelta(rd.YDcLevels); } }
private static void RunVp8Sse16X16Test() { // arrange byte[] a = { 154, 154, 151, 151, 149, 148, 151, 157, 163, 163, 154, 132, 102, 98, 104, 108, 107, 104, 104, 103, 101, 106, 123, 119, 170, 171, 172, 171, 168, 175, 171, 173, 151, 151, 149, 150, 147, 147, 146, 159, 164, 165, 154, 129, 92, 90, 101, 105, 104, 103, 104, 101, 100, 105, 123, 117, 172, 172, 172, 168, 170, 177, 170, 175, 151, 149, 150, 150, 147, 147, 156, 161, 161, 161, 151, 126, 93, 90, 102, 107, 104, 103, 104, 101, 104, 104, 122, 117, 172, 172, 170, 168, 170, 177, 172, 175, 150, 149, 152, 151, 148, 151, 160, 159, 157, 157, 148, 133, 96, 90, 103, 107, 104, 104, 101, 100, 102, 102, 121, 117, 170, 170, 169, 171, 171, 179, 173, 175, 149, 151, 152, 151, 148, 154, 162, 157, 154, 154, 151, 132, 92, 89, 101, 108, 104, 102, 101, 101, 103, 103, 123, 118, 171, 168, 177, 173, 171, 178, 172, 176, 152, 152, 152, 151, 154, 162, 161, 155, 149, 157, 156, 129, 92, 87, 101, 107, 102, 100, 107, 100, 101, 102, 123, 118, 170, 175, 182, 172, 171, 179, 173, 175, 152, 151, 154, 155, 160, 162, 161, 153, 150, 156, 153, 129, 92, 91, 102, 106, 100, 109, 115, 99, 101, 102, 124, 120, 171, 179, 178, 172, 171, 181, 171, 173, 154, 154, 154, 162, 160, 158, 156, 152, 153, 157, 151, 128, 86, 86, 102, 105, 102, 122, 114, 99, 101, 102, 125, 120, 178, 173, 177, 172, 171, 180, 172, 173, 154, 152, 158, 163, 150, 148, 148, 156, 151, 158, 152, 129, 87, 87, 101, 105, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 154, 151, 165, 156, 141, 137, 146, 158, 152, 159, 152, 133, 90, 88, 99, 106, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 154, 160, 164, 150, 126, 127, 149, 159, 155, 161, 153, 131, 84, 86, 97, 103, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 157, 167, 157, 137, 102, 128, 155, 161, 157, 159, 154, 134, 84, 82, 97, 102, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 163, 163, 150, 113, 78, 132, 156, 162, 159, 160, 154, 132, 83, 78, 91, 97, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 163, 157, 137, 80, 78, 131, 154, 163, 157, 159, 149, 131, 82, 77, 94, 100, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 159, 151, 108, 72, 88, 132, 156, 162, 159, 157, 151, 130, 79, 78, 95, 102, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 151, 130, 82, 82, 89, 134, 154, 161, 161, 157, 152, 129, 81, 77, 95, 102, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204 }; byte[] b = { 150, 150, 150, 150, 146, 149, 152, 154, 164, 166, 154, 132, 99, 92, 106, 112, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 150, 150, 150, 150, 146, 149, 152, 154, 161, 164, 151, 130, 93, 86, 100, 106, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 150, 150, 150, 150, 146, 149, 152, 154, 158, 161, 148, 127, 93, 86, 100, 106, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 150, 150, 150, 150, 146, 149, 152, 154, 156, 159, 146, 125, 99, 92, 106, 112, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 148, 148, 148, 148, 149, 158, 162, 159, 155, 155, 153, 129, 94, 87, 101, 106, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 151, 151, 151, 151, 152, 159, 161, 156, 155, 155, 153, 129, 94, 87, 101, 106, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 154, 154, 154, 154, 156, 161, 159, 152, 155, 155, 153, 129, 94, 87, 101, 106, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 156, 156, 156, 156, 159, 162, 158, 149, 155, 155, 153, 129, 94, 87, 101, 106, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 152, 153, 157, 162, 150, 149, 149, 151, 155, 160, 150, 131, 91, 90, 104, 104, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 152, 156, 158, 157, 140, 137, 145, 159, 155, 160, 150, 131, 89, 88, 102, 101, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 153, 161, 160, 149, 118, 128, 147, 162, 155, 160, 150, 131, 86, 85, 99, 98, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 154, 165, 161, 144, 96, 128, 154, 159, 155, 160, 150, 131, 83, 82, 97, 96, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 161, 160, 149, 105, 78, 127, 156, 170, 156, 156, 154, 130, 81, 77, 95, 102, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 160, 160, 133, 85, 81, 129, 155, 167, 156, 156, 154, 130, 81, 77, 95, 102, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 156, 147, 109, 76, 85, 130, 153, 163, 156, 156, 154, 130, 81, 77, 95, 102, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 152, 128, 87, 83, 88, 132, 152, 159, 156, 156, 154, 130, 81, 77, 95, 102, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204 }; int expected = 2063; // act int actual = LossyUtils.Vp8_Sse16X16(a, b); // assert Assert.Equal(expected, actual); }
// Refine intra16/intra4 sub-modes based on distortion only (not rate). public static void RefineUsingDistortion(Vp8EncIterator it, Vp8SegmentInfo[] segmentInfos, Vp8ModeScore rd, bool tryBothModes, bool refineUvMode, int mbHeaderLimit) { long bestScore = Vp8ModeScore.MaxCost; int nz = 0; int mode; bool isI16 = tryBothModes || it.CurrentMacroBlockInfo.MacroBlockType == Vp8MacroBlockType.I16X16; Vp8SegmentInfo dqm = segmentInfos[it.CurrentMacroBlockInfo.Segment]; // Some empiric constants, of approximate order of magnitude. const int lambdaDi16 = 106; const int lambdaDi4 = 11; const int lambdaDuv = 120; long scoreI4 = dqm.I4Penalty; long i4BitSum = 0; long bitLimit = tryBothModes ? mbHeaderLimit : Vp8ModeScore.MaxCost; // no early-out allowed. if (isI16) { int bestMode = -1; Span <byte> src = it.YuvIn.AsSpan(Vp8EncIterator.YOffEnc); for (mode = 0; mode < WebpConstants.NumPredModes; ++mode) { Span <byte> reference = it.YuvP.AsSpan(Vp8Encoding.Vp8I16ModeOffsets[mode]); long score = (LossyUtils.Vp8_Sse16X16(src, reference) * WebpConstants.RdDistoMult) + (WebpConstants.Vp8FixedCostsI16[mode] * lambdaDi16); if (mode > 0 && WebpConstants.Vp8FixedCostsI16[mode] > bitLimit) { continue; } if (score < bestScore) { bestMode = mode; bestScore = score; } } if (it.X == 0 || it.Y == 0) { // Avoid starting a checkerboard resonance from the border. See bug #432 of libwebp. if (IsFlatSource16(src)) { bestMode = it.X == 0 ? 0 : 2; tryBothModes = false; // Stick to i16. } } it.SetIntra16Mode(bestMode); // We'll reconstruct later, if i16 mode actually gets selected. } // Next, evaluate Intra4. if (tryBothModes || !isI16) { // We don't evaluate the rate here, but just account for it through a // constant penalty (i4 mode usually needs more bits compared to i16). isI16 = false; it.StartI4(); do { int bestI4Mode = -1; long bestI4Score = Vp8ModeScore.MaxCost; Span <byte> src = it.YuvIn.AsSpan(Vp8EncIterator.YOffEnc + WebpLookupTables.Vp8Scan[it.I4]); short[] modeCosts = it.GetCostModeI4(rd.ModesI4); it.MakeIntra4Preds(); for (mode = 0; mode < WebpConstants.NumBModes; ++mode) { Span <byte> reference = it.YuvP.AsSpan(Vp8Encoding.Vp8I4ModeOffsets[mode]); long score = (LossyUtils.Vp8_Sse4X4(src, reference) * WebpConstants.RdDistoMult) + (modeCosts[mode] * lambdaDi4); if (score < bestI4Score) { bestI4Mode = mode; bestI4Score = score; } } i4BitSum += modeCosts[bestI4Mode]; rd.ModesI4[it.I4] = (byte)bestI4Mode; scoreI4 += bestI4Score; if (scoreI4 >= bestScore || i4BitSum > bitLimit) { // Intra4 won't be better than Intra16. Bail out and pick Intra16. isI16 = true; break; } else { // Reconstruct partial block inside YuvOut2 buffer Span <byte> tmpDst = it.YuvOut2.AsSpan(Vp8EncIterator.YOffEnc + WebpLookupTables.Vp8Scan[it.I4]); nz |= ReconstructIntra4(it, dqm, rd.YAcLevels.AsSpan(it.I4 * 16, 16), src, tmpDst, bestI4Mode) << it.I4; } }while (it.RotateI4(it.YuvOut2.AsSpan(Vp8EncIterator.YOffEnc))); } // Final reconstruction, depending on which mode is selected. if (!isI16) { it.SetIntra4Mode(rd.ModesI4); it.SwapOut(); bestScore = scoreI4; } else { int intra16Mode = it.Preds[it.PredIdx]; nz = ReconstructIntra16(it, dqm, rd, it.YuvOut.AsSpan(Vp8EncIterator.YOffEnc), intra16Mode); } // ... and UV! if (refineUvMode) { int bestMode = -1; long bestUvScore = Vp8ModeScore.MaxCost; Span <byte> src = it.YuvIn.AsSpan(Vp8EncIterator.UOffEnc); for (mode = 0; mode < WebpConstants.NumPredModes; ++mode) { Span <byte> reference = it.YuvP.AsSpan(Vp8Encoding.Vp8UvModeOffsets[mode]); long score = (LossyUtils.Vp8_Sse16X8(src, reference) * WebpConstants.RdDistoMult) + (WebpConstants.Vp8FixedCostsUv[mode] * lambdaDuv); if (score < bestUvScore) { bestMode = mode; bestUvScore = score; } } it.SetIntraUvMode(bestMode); } nz |= ReconstructUv(it, dqm, rd, it.YuvOut.AsSpan(Vp8EncIterator.UOffEnc), it.CurrentMacroBlockInfo.UvMode); rd.Nz = (uint)nz; rd.Score = bestScore; }