private static void RunVp8Sse4X4Test() { // arrange byte[] a = { 27, 27, 28, 29, 29, 28, 27, 27, 27, 28, 28, 29, 29, 28, 28, 27, 129, 129, 129, 129, 129, 129, 129, 129, 128, 128, 128, 128, 128, 128, 128, 128, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28, 28, 29, 29, 28, 28, 27, 129, 129, 129, 129, 129, 129, 129, 129, 128, 128, 128, 128, 128, 128, 128, 128, 27, 27, 26, 26, 26, 26, 27, 27, 27, 28, 28, 29, 29, 28, 28, 27, 129, 129, 129, 129, 129, 129, 129, 129, 128, 128, 128, 128, 128, 128, 128, 128, 28, 27, 27, 26, 26, 27, 27, 28, 27, 28, 28, 29, 29, 28, 28, 27, 129, 129, 129, 129, 129, 129, 129, 129, 128, 128, 128, 128, 128, 128, 128, 128 }; byte[] b = { 26, 26, 26, 26, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 26, 26, 26, 26, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 26, 26, 26, 26, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 26, 26, 26, 26, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204 }; int expected = 27; // act int actual = LossyUtils.Vp8_Sse4X4(a, b); // assert Assert.Equal(expected, actual); }
// Refine intra16/intra4 sub-modes based on distortion only (not rate). public static void RefineUsingDistortion(Vp8EncIterator it, Vp8SegmentInfo[] segmentInfos, Vp8ModeScore rd, bool tryBothModes, bool refineUvMode, int mbHeaderLimit) { long bestScore = Vp8ModeScore.MaxCost; int nz = 0; int mode; bool isI16 = tryBothModes || it.CurrentMacroBlockInfo.MacroBlockType == Vp8MacroBlockType.I16X16; Vp8SegmentInfo dqm = segmentInfos[it.CurrentMacroBlockInfo.Segment]; // Some empiric constants, of approximate order of magnitude. const int lambdaDi16 = 106; const int lambdaDi4 = 11; const int lambdaDuv = 120; long scoreI4 = dqm.I4Penalty; long i4BitSum = 0; long bitLimit = tryBothModes ? mbHeaderLimit : Vp8ModeScore.MaxCost; // no early-out allowed. if (isI16) { int bestMode = -1; Span <byte> src = it.YuvIn.AsSpan(Vp8EncIterator.YOffEnc); for (mode = 0; mode < WebpConstants.NumPredModes; ++mode) { Span <byte> reference = it.YuvP.AsSpan(Vp8Encoding.Vp8I16ModeOffsets[mode]); long score = (LossyUtils.Vp8_Sse16X16(src, reference) * WebpConstants.RdDistoMult) + (WebpConstants.Vp8FixedCostsI16[mode] * lambdaDi16); if (mode > 0 && WebpConstants.Vp8FixedCostsI16[mode] > bitLimit) { continue; } if (score < bestScore) { bestMode = mode; bestScore = score; } } if (it.X == 0 || it.Y == 0) { // Avoid starting a checkerboard resonance from the border. See bug #432 of libwebp. if (IsFlatSource16(src)) { bestMode = it.X == 0 ? 0 : 2; tryBothModes = false; // Stick to i16. } } it.SetIntra16Mode(bestMode); // We'll reconstruct later, if i16 mode actually gets selected. } // Next, evaluate Intra4. if (tryBothModes || !isI16) { // We don't evaluate the rate here, but just account for it through a // constant penalty (i4 mode usually needs more bits compared to i16). isI16 = false; it.StartI4(); do { int bestI4Mode = -1; long bestI4Score = Vp8ModeScore.MaxCost; Span <byte> src = it.YuvIn.AsSpan(Vp8EncIterator.YOffEnc + WebpLookupTables.Vp8Scan[it.I4]); short[] modeCosts = it.GetCostModeI4(rd.ModesI4); it.MakeIntra4Preds(); for (mode = 0; mode < WebpConstants.NumBModes; ++mode) { Span <byte> reference = it.YuvP.AsSpan(Vp8Encoding.Vp8I4ModeOffsets[mode]); long score = (LossyUtils.Vp8_Sse4X4(src, reference) * WebpConstants.RdDistoMult) + (modeCosts[mode] * lambdaDi4); if (score < bestI4Score) { bestI4Mode = mode; bestI4Score = score; } } i4BitSum += modeCosts[bestI4Mode]; rd.ModesI4[it.I4] = (byte)bestI4Mode; scoreI4 += bestI4Score; if (scoreI4 >= bestScore || i4BitSum > bitLimit) { // Intra4 won't be better than Intra16. Bail out and pick Intra16. isI16 = true; break; } else { // Reconstruct partial block inside YuvOut2 buffer Span <byte> tmpDst = it.YuvOut2.AsSpan(Vp8EncIterator.YOffEnc + WebpLookupTables.Vp8Scan[it.I4]); nz |= ReconstructIntra4(it, dqm, rd.YAcLevels.AsSpan(it.I4 * 16, 16), src, tmpDst, bestI4Mode) << it.I4; } }while (it.RotateI4(it.YuvOut2.AsSpan(Vp8EncIterator.YOffEnc))); } // Final reconstruction, depending on which mode is selected. if (!isI16) { it.SetIntra4Mode(rd.ModesI4); it.SwapOut(); bestScore = scoreI4; } else { int intra16Mode = it.Preds[it.PredIdx]; nz = ReconstructIntra16(it, dqm, rd, it.YuvOut.AsSpan(Vp8EncIterator.YOffEnc), intra16Mode); } // ... and UV! if (refineUvMode) { int bestMode = -1; long bestUvScore = Vp8ModeScore.MaxCost; Span <byte> src = it.YuvIn.AsSpan(Vp8EncIterator.UOffEnc); for (mode = 0; mode < WebpConstants.NumPredModes; ++mode) { Span <byte> reference = it.YuvP.AsSpan(Vp8Encoding.Vp8UvModeOffsets[mode]); long score = (LossyUtils.Vp8_Sse16X8(src, reference) * WebpConstants.RdDistoMult) + (WebpConstants.Vp8FixedCostsUv[mode] * lambdaDuv); if (score < bestUvScore) { bestMode = mode; bestUvScore = score; } } it.SetIntraUvMode(bestMode); } nz |= ReconstructUv(it, dqm, rd, it.YuvOut.AsSpan(Vp8EncIterator.UOffEnc), it.CurrentMacroBlockInfo.UvMode); rd.Nz = (uint)nz; rd.Score = bestScore; }
public static bool PickBestIntra4(Vp8EncIterator it, ref Vp8ModeScore rd, Vp8SegmentInfo[] segmentInfos, Vp8EncProba proba, int maxI4HeaderBits) { Vp8SegmentInfo dqm = segmentInfos[it.CurrentMacroBlockInfo.Segment]; int lambda = dqm.LambdaI4; int tlambda = dqm.TLambda; Span <byte> src0 = it.YuvIn.AsSpan(Vp8EncIterator.YOffEnc); Span <byte> bestBlocks = it.YuvOut2.AsSpan(Vp8EncIterator.YOffEnc); Span <int> scratch = it.Scratch3; int totalHeaderBits = 0; var rdBest = new Vp8ModeScore(); if (maxI4HeaderBits == 0) { return(false); } rdBest.InitScore(); rdBest.H = 211; // '211' is the value of VP8BitCost(0, 145) rdBest.SetRdScore(dqm.LambdaMode); it.StartI4(); var rdi4 = new Vp8ModeScore(); var rdTmp = new Vp8ModeScore(); var res = new Vp8Residual(); Span <short> tmpLevels = new short[16]; do { int numBlocks = 1; rdi4.Clear(); int mode; int bestMode = -1; Span <byte> src = src0.Slice(WebpLookupTables.Vp8Scan[it.I4]); short[] modeCosts = it.GetCostModeI4(rd.ModesI4); Span <byte> bestBlock = bestBlocks.Slice(WebpLookupTables.Vp8Scan[it.I4]); Span <byte> tmpDst = it.Scratch.AsSpan(); tmpDst.Clear(); rdi4.InitScore(); it.MakeIntra4Preds(); for (mode = 0; mode < WebpConstants.NumBModes; ++mode) { rdTmp.Clear(); tmpLevels.Clear(); // Reconstruct. rdTmp.Nz = (uint)ReconstructIntra4(it, dqm, tmpLevels, src, tmpDst, mode); // Compute RD-score. rdTmp.D = LossyUtils.Vp8_Sse4X4(src, tmpDst); rdTmp.SD = tlambda != 0 ? Mult8B(tlambda, LossyUtils.Vp8Disto4X4(src, tmpDst, WeightY, scratch)) : 0; rdTmp.H = modeCosts[mode]; // Add flatness penalty, to avoid flat area to be mispredicted by a complex mode. if (mode > 0 && IsFlat(tmpLevels, numBlocks, WebpConstants.FlatnessLimitI4)) { rdTmp.R = WebpConstants.FlatnessPenality * numBlocks; } else { rdTmp.R = 0; } // Early-out check. rdTmp.SetRdScore(lambda); if (bestMode >= 0 && rdTmp.Score >= rdi4.Score) { continue; } // Finish computing score. rdTmp.R += it.GetCostLuma4(tmpLevels, proba, res); rdTmp.SetRdScore(lambda); if (bestMode < 0 || rdTmp.Score < rdi4.Score) { rdi4.CopyScore(rdTmp); bestMode = mode; Span <byte> tmp = tmpDst; tmpDst = bestBlock; bestBlock = tmp; tmpLevels.CopyTo(rdBest.YAcLevels.AsSpan(it.I4 * 16, 16)); } } rdi4.SetRdScore(dqm.LambdaMode); rdBest.AddScore(rdi4); if (rdBest.Score >= rd.Score) { return(false); } totalHeaderBits += (int)rdi4.H; // <- equal to modeCosts[bestMode]; if (totalHeaderBits > maxI4HeaderBits) { return(false); } // Copy selected samples to the right place. LossyUtils.Vp8Copy4X4(bestBlock, bestBlocks.Slice(WebpLookupTables.Vp8Scan[it.I4])); rd.ModesI4[it.I4] = (byte)bestMode; it.TopNz[it.I4 & 3] = it.LeftNz[it.I4 >> 2] = rdi4.Nz != 0 ? 1 : 0; }while (it.RotateI4(bestBlocks)); // Finalize state. rd.CopyScore(rdBest); it.SetIntra4Mode(rd.ModesI4); it.SwapOut(); rdBest.YAcLevels.AsSpan().CopyTo(rd.YAcLevels); // Select intra4x4 over intra16x16. return(true); }