private bool Decimate(Vp8EncIterator it, ref Vp8ModeScore rd, Vp8RdLevel rdOpt) { rd.InitScore(); // We can perform predictions for Luma16x16 and Chroma8x8 already. // Luma4x4 predictions needs to be done as-we-go. it.MakeLuma16Preds(); it.MakeChroma8Preds(); if (rdOpt > Vp8RdLevel.RdOptNone) { QuantEnc.PickBestIntra16(it, ref rd, this.SegmentInfos, this.Proba); if (this.method >= WebpEncodingMethod.Level2) { QuantEnc.PickBestIntra4(it, ref rd, this.SegmentInfos, this.Proba, this.maxI4HeaderBits); } QuantEnc.PickBestUv(it, ref rd, this.SegmentInfos, this.Proba); } else { // At this point we have heuristically decided intra16 / intra4. // For method >= 2, pick the best intra4/intra16 based on SSE (~tad slower). // For method <= 1, we don't re-examine the decision but just go ahead with // quantization/reconstruction. QuantEnc.RefineUsingDistortion(it, this.SegmentInfos, rd, this.method >= WebpEncodingMethod.Level2, this.method >= WebpEncodingMethod.Level1, this.MbHeaderLimit); } bool isSkipped = rd.Nz == 0; it.SetSkip(isSkipped); return(isSkipped); }
public int GetCostLuma16(Vp8ModeScore rd, Vp8EncProba proba, Vp8Residual res) { int r = 0; // re-import the non-zero context. this.NzToBytes(); // DC res.Init(0, 1, proba); res.SetCoeffs(rd.YDcLevels); r += res.GetResidualCost(this.TopNz[8] + this.LeftNz[8]); // AC res.Init(1, 0, proba); for (int y = 0; y < 4; y++) { for (int x = 0; x < 4; x++) { int ctx = this.TopNz[x] + this.LeftNz[y]; res.SetCoeffs(rd.YAcLevels.AsSpan((x + (y * 4)) * 16, 16)); r += res.GetResidualCost(ctx); this.TopNz[x] = this.LeftNz[y] = res.Last >= 0 ? 1 : 0; } } return(r); }
public static int ReconstructUv(Vp8EncIterator it, Vp8SegmentInfo dqm, Vp8ModeScore rd, Span <byte> yuvOut, int mode) { Span <byte> reference = it.YuvP.AsSpan(Vp8Encoding.Vp8UvModeOffsets[mode]); Span <byte> src = it.YuvIn.AsSpan(Vp8EncIterator.UOffEnc); int nz = 0; int n; Span <short> tmp = it.Scratch2.AsSpan(0, 8 * 16); Span <int> scratch = it.Scratch3.AsSpan(0, 16); for (n = 0; n < 8; n += 2) { Vp8Encoding.FTransform2( src.Slice(WebpLookupTables.Vp8ScanUv[n]), reference.Slice(WebpLookupTables.Vp8ScanUv[n]), tmp.Slice(n * 16, 16), tmp.Slice((n + 1) * 16, 16), scratch); } CorrectDcValues(it, ref dqm.Uv, tmp, rd); for (n = 0; n < 8; n += 2) { nz |= Quantize2Blocks(tmp.Slice(n * 16, 32), rd.UvLevels.AsSpan(n * 16, 32), ref dqm.Uv) << n; } for (n = 0; n < 8; n += 2) { Vp8Encoding.ITransformTwo(reference.Slice(WebpLookupTables.Vp8ScanUv[n]), tmp.Slice(n * 16, 32), yuvOut.Slice(WebpLookupTables.Vp8ScanUv[n]), scratch); } return(nz << 16); }
public void AddScore(Vp8ModeScore other) { this.D += other.D; this.SD += other.SD; this.R += other.R; this.H += other.H; this.Nz |= other.Nz; // here, new nz bits are accumulated. this.Score += other.Score; }
public void CopyScore(Vp8ModeScore other) { this.D = other.D; this.SD = other.SD; this.R = other.R; this.H = other.H; this.Nz = other.Nz; // note that nz is not accumulated, but just copied. this.Score = other.Score; }
/// <summary> /// Same as CodeResiduals, but doesn't actually write anything. /// Instead, it just records the event distribution. /// </summary> private void RecordResiduals(Vp8EncIterator it, Vp8ModeScore rd) { int x, y, ch; var residual = new Vp8Residual(); bool i16 = it.CurrentMacroBlockInfo.MacroBlockType == Vp8MacroBlockType.I16X16; it.NzToBytes(); if (i16) { // i16x16 residual.Init(0, 1, this.Proba); residual.SetCoeffs(rd.YDcLevels); int res = residual.RecordCoeffs(it.TopNz[8] + it.LeftNz[8]); it.TopNz[8] = res; it.LeftNz[8] = res; residual.Init(1, 0, this.Proba); } else { residual.Init(0, 3, this.Proba); } // luma-AC for (y = 0; y < 4; y++) { for (x = 0; x < 4; x++) { int ctx = it.TopNz[x] + it.LeftNz[y]; Span <short> coeffs = rd.YAcLevels.AsSpan(16 * (x + (y * 4)), 16); residual.SetCoeffs(coeffs); int res = residual.RecordCoeffs(ctx); it.TopNz[x] = res; it.LeftNz[y] = res; } } // U/V residual.Init(0, 2, this.Proba); for (ch = 0; ch <= 2; ch += 2) { for (y = 0; y < 2; y++) { for (x = 0; x < 2; x++) { int ctx = it.TopNz[4 + ch + x] + it.LeftNz[4 + ch + y]; residual.SetCoeffs(rd.UvLevels.AsSpan(16 * ((ch * 2) + x + (y * 2)), 16)); int res = residual.RecordCoeffs(ctx); it.TopNz[4 + ch + x] = res; it.LeftNz[4 + ch + y] = res; } } } it.BytesToNz(); }
private long OneStatPass(int width, int height, int yStride, int uvStride, Vp8RdLevel rdOpt, int nbMbs, PassStats stats) { Span <byte> y = this.Y.GetSpan(); Span <byte> u = this.U.GetSpan(); Span <byte> v = this.V.GetSpan(); var it = new Vp8EncIterator(this.YTop, this.UvTop, this.Nz, this.MbInfo, this.Preds, this.TopDerr, this.Mbw, this.Mbh); long size = 0; long sizeP0 = 0; long distortion = 0; long pixelCount = nbMbs * 384; it.Init(); this.SetLoopParams(stats.Q); var info = new Vp8ModeScore(); do { info.Clear(); it.Import(y, u, v, yStride, uvStride, width, height, false); if (this.Decimate(it, ref info, rdOpt)) { // Just record the number of skips and act like skipProba is not used. ++this.Proba.NbSkip; } this.RecordResiduals(it, info); size += info.R + info.H; sizeP0 += info.H; distortion += info.D; it.SaveBoundary(); }while (it.Next() && --nbMbs > 0); sizeP0 += this.SegmentHeader.Size; if (stats.DoSizeSearch) { size += this.Proba.FinalizeSkipProba(this.Mbw, this.Mbh); size += this.Proba.FinalizeTokenProbas(); size = ((size + sizeP0 + 1024) >> 11) + HeaderSizeEstimate; stats.Value = size; } else { stats.Value = GetPsnr(distortion, pixelCount); } return(sizeP0); }
public static int ReconstructIntra16(Vp8EncIterator it, Vp8SegmentInfo dqm, Vp8ModeScore rd, Span <byte> yuvOut, int mode) { Span <byte> reference = it.YuvP.AsSpan(Vp8Encoding.Vp8I16ModeOffsets[mode]); Span <byte> src = it.YuvIn.AsSpan(Vp8EncIterator.YOffEnc); int nz = 0; int n; Span <short> shortScratchSpan = it.Scratch2.AsSpan(); Span <int> scratch = it.Scratch3.AsSpan(0, 16); shortScratchSpan.Clear(); scratch.Clear(); Span <short> dcTmp = shortScratchSpan.Slice(0, 16); Span <short> tmp = shortScratchSpan.Slice(16, 16 * 16); for (n = 0; n < 16; n += 2) { Vp8Encoding.FTransform2( src.Slice(WebpLookupTables.Vp8Scan[n]), reference.Slice(WebpLookupTables.Vp8Scan[n]), tmp.Slice(n * 16, 16), tmp.Slice((n + 1) * 16, 16), scratch); } Vp8Encoding.FTransformWht(tmp, dcTmp, scratch); nz |= QuantizeBlock(dcTmp, rd.YDcLevels, ref dqm.Y2) << 24; for (n = 0; n < 16; n += 2) { // Zero-out the first coeff, so that: a) nz is correct below, and // b) finding 'last' non-zero coeffs in SetResidualCoeffs() is simplified. tmp[n * 16] = tmp[(n + 1) * 16] = 0; nz |= Quantize2Blocks(tmp.Slice(n * 16, 32), rd.YAcLevels.AsSpan(n * 16, 32), ref dqm.Y1) << n; } // Transform back. LossyUtils.TransformWht(dcTmp, tmp, scratch); for (n = 0; n < 16; n += 2) { Vp8Encoding.ITransformTwo(reference.Slice(WebpLookupTables.Vp8Scan[n]), tmp.Slice(n * 16, 32), yuvOut.Slice(WebpLookupTables.Vp8Scan[n]), scratch); } return(nz); }
public void StoreDiffusionErrors(Vp8ModeScore rd) { for (int ch = 0; ch <= 1; ++ch) { Span <sbyte> top = this.TopDerr.AsSpan((this.X * 4) + ch, 2); Span <sbyte> left = this.LeftDerr.AsSpan(ch, 2); // restore err1 left[0] = (sbyte)rd.Derr[ch, 0]; // 3/4th of err3 left[1] = (sbyte)((3 * rd.Derr[ch, 2]) >> 2); // err2 top[0] = (sbyte)rd.Derr[ch, 1]; // 1/4th of err3. top[1] = (sbyte)(rd.Derr[ch, 2] - left[1]); } }
public int GetCostUv(Vp8ModeScore rd, Vp8EncProba proba, Vp8Residual res) { int r = 0; // re-import the non-zero context. this.NzToBytes(); res.Init(0, 2, proba); for (int ch = 0; ch <= 2; ch += 2) { for (int y = 0; y < 2; y++) { for (int x = 0; x < 2; x++) { int ctx = this.TopNz[4 + ch + x] + this.LeftNz[4 + ch + y]; res.SetCoeffs(rd.UvLevels.AsSpan(((ch * 2) + x + (y * 2)) * 16, 16)); r += res.GetResidualCost(ctx); this.TopNz[4 + ch + x] = this.LeftNz[4 + ch + y] = res.Last >= 0 ? 1 : 0; } } } return(r); }
/// <summary> /// Encodes the image to the specified stream from the <see cref="Image{TPixel}"/>. /// </summary> /// <typeparam name="TPixel">The pixel format.</typeparam> /// <param name="image">The <see cref="Image{TPixel}"/> to encode from.</param> /// <param name="stream">The <see cref="Stream"/> to encode the image data to.</param> public void Encode <TPixel>(Image <TPixel> image, Stream stream) where TPixel : unmanaged, IPixel <TPixel> { int width = image.Width; int height = image.Height; Span <byte> y = this.Y.GetSpan(); Span <byte> u = this.U.GetSpan(); Span <byte> v = this.V.GetSpan(); YuvConversion.ConvertRgbToYuv(image, this.configuration, this.memoryAllocator, y, u, v); int yStride = width; int uvStride = (yStride + 1) >> 1; var it = new Vp8EncIterator(this.YTop, this.UvTop, this.Nz, this.MbInfo, this.Preds, this.TopDerr, this.Mbw, this.Mbh); int[] alphas = new int[WebpConstants.MaxAlpha + 1]; this.alpha = this.MacroBlockAnalysis(width, height, it, y, u, v, yStride, uvStride, alphas, out this.uvAlpha); int totalMb = this.Mbw * this.Mbw; this.alpha /= totalMb; this.uvAlpha /= totalMb; // Analysis is done, proceed to actual encoding. this.SegmentHeader = new Vp8EncSegmentHeader(4); this.AssignSegments(alphas); this.SetLoopParams(this.quality); // Initialize the bitwriter. int averageBytesPerMacroBlock = this.averageBytesPerMb[this.BaseQuant >> 4]; int expectedSize = this.Mbw * this.Mbh * averageBytesPerMacroBlock; this.bitWriter = new Vp8BitWriter(expectedSize, this); // TODO: EncodeAlpha(); bool hasAlpha = false; // Stats-collection loop. this.StatLoop(width, height, yStride, uvStride); it.Init(); it.InitFilter(); var info = new Vp8ModeScore(); var residual = new Vp8Residual(); do { bool dontUseSkip = !this.Proba.UseSkipProba; info.Clear(); it.Import(y, u, v, yStride, uvStride, width, height, false); // Warning! order is important: first call VP8Decimate() and // *then* decide how to code the skip decision if there's one. if (!this.Decimate(it, ref info, this.rdOptLevel) || dontUseSkip) { this.CodeResiduals(it, info, residual); } else { it.ResetAfterSkip(); } it.SaveBoundary(); }while (it.Next()); // Store filter stats. this.AdjustFilterStrength(); // Write bytes from the bitwriter buffer to the stream. image.Metadata.SyncProfiles(); this.bitWriter.WriteEncodedImageToStream(stream, image.Metadata.ExifProfile, (uint)width, (uint)height, hasAlpha); }
private void CodeResiduals(Vp8EncIterator it, Vp8ModeScore rd, Vp8Residual residual) { int x, y, ch; bool i16 = it.CurrentMacroBlockInfo.MacroBlockType == Vp8MacroBlockType.I16X16; int segment = it.CurrentMacroBlockInfo.Segment; it.NzToBytes(); int pos1 = this.bitWriter.NumBytes(); if (i16) { residual.Init(0, 1, this.Proba); residual.SetCoeffs(rd.YDcLevels); int res = this.bitWriter.PutCoeffs(it.TopNz[8] + it.LeftNz[8], residual); it.TopNz[8] = it.LeftNz[8] = res; residual.Init(1, 0, this.Proba); } else { residual.Init(0, 3, this.Proba); } // luma-AC for (y = 0; y < 4; y++) { for (x = 0; x < 4; x++) { int ctx = it.TopNz[x] + it.LeftNz[y]; Span <short> coeffs = rd.YAcLevels.AsSpan(16 * (x + (y * 4)), 16); residual.SetCoeffs(coeffs); int res = this.bitWriter.PutCoeffs(ctx, residual); it.TopNz[x] = it.LeftNz[y] = res; } } int pos2 = this.bitWriter.NumBytes(); // U/V residual.Init(0, 2, this.Proba); for (ch = 0; ch <= 2; ch += 2) { for (y = 0; y < 2; y++) { for (x = 0; x < 2; x++) { int ctx = it.TopNz[4 + ch + x] + it.LeftNz[4 + ch + y]; residual.SetCoeffs(rd.UvLevels.AsSpan(16 * ((ch * 2) + x + (y * 2)), 16)); int res = this.bitWriter.PutCoeffs(ctx, residual); it.TopNz[4 + ch + x] = it.LeftNz[4 + ch + y] = res; } } } int pos3 = this.bitWriter.NumBytes(); it.LumaBits = pos2 - pos1; it.UvBits = pos3 - pos2; it.BitCount[segment, i16 ? 1 : 0] += it.LumaBits; it.BitCount[segment, 2] += it.UvBits; it.BytesToNz(); }
private const int DSCALE = 1; // storage descaling, needed to make the error fit byte public static void PickBestIntra16(Vp8EncIterator it, ref Vp8ModeScore rd, Vp8SegmentInfo[] segmentInfos, Vp8EncProba proba) { const int numBlocks = 16; Vp8SegmentInfo dqm = segmentInfos[it.CurrentMacroBlockInfo.Segment]; int lambda = dqm.LambdaI16; int tlambda = dqm.TLambda; Span <byte> src = it.YuvIn.AsSpan(Vp8EncIterator.YOffEnc); Span <int> scratch = it.Scratch3; var rdTmp = new Vp8ModeScore(); var res = new Vp8Residual(); Vp8ModeScore rdCur = rdTmp; Vp8ModeScore rdBest = rd; int mode; bool isFlat = IsFlatSource16(src); rd.ModeI16 = -1; for (mode = 0; mode < WebpConstants.NumPredModes; ++mode) { // Scratch buffer. Span <byte> tmpDst = it.YuvOut2.AsSpan(Vp8EncIterator.YOffEnc); rdCur.ModeI16 = mode; // Reconstruct. rdCur.Nz = (uint)ReconstructIntra16(it, dqm, rdCur, tmpDst, mode); // Measure RD-score. rdCur.D = LossyUtils.Vp8_Sse16X16(src, tmpDst); rdCur.SD = tlambda != 0 ? Mult8B(tlambda, LossyUtils.Vp8Disto16X16(src, tmpDst, WeightY, scratch)) : 0; rdCur.H = WebpConstants.Vp8FixedCostsI16[mode]; rdCur.R = it.GetCostLuma16(rdCur, proba, res); if (isFlat) { // Refine the first impression (which was in pixel space). isFlat = IsFlat(rdCur.YAcLevels, numBlocks, WebpConstants.FlatnessLimitI16); if (isFlat) { // Block is very flat. We put emphasis on the distortion being very low! rdCur.D *= 2; rdCur.SD *= 2; } } // Since we always examine Intra16 first, we can overwrite *rd directly. rdCur.SetRdScore(lambda); if (mode == 0 || rdCur.Score < rdBest.Score) { Vp8ModeScore tmp = rdCur; rdCur = rdBest; rdBest = tmp; it.SwapOut(); } } if (rdBest != rd) { rd = rdBest; } // Finalize score for mode decision. rd.SetRdScore(dqm.LambdaMode); it.SetIntra16Mode(rd.ModeI16); // We have a blocky macroblock (only DCs are non-zero) with fairly high // distortion, record max delta so we can later adjust the minimal filtering // strength needed to smooth these blocks out. if ((rd.Nz & 0x100ffff) == 0x1000000 && rd.D > dqm.MinDisto) { dqm.StoreMaxDelta(rd.YDcLevels); } }
// Refine intra16/intra4 sub-modes based on distortion only (not rate). public static void RefineUsingDistortion(Vp8EncIterator it, Vp8SegmentInfo[] segmentInfos, Vp8ModeScore rd, bool tryBothModes, bool refineUvMode, int mbHeaderLimit) { long bestScore = Vp8ModeScore.MaxCost; int nz = 0; int mode; bool isI16 = tryBothModes || it.CurrentMacroBlockInfo.MacroBlockType == Vp8MacroBlockType.I16X16; Vp8SegmentInfo dqm = segmentInfos[it.CurrentMacroBlockInfo.Segment]; // Some empiric constants, of approximate order of magnitude. const int lambdaDi16 = 106; const int lambdaDi4 = 11; const int lambdaDuv = 120; long scoreI4 = dqm.I4Penalty; long i4BitSum = 0; long bitLimit = tryBothModes ? mbHeaderLimit : Vp8ModeScore.MaxCost; // no early-out allowed. if (isI16) { int bestMode = -1; Span <byte> src = it.YuvIn.AsSpan(Vp8EncIterator.YOffEnc); for (mode = 0; mode < WebpConstants.NumPredModes; ++mode) { Span <byte> reference = it.YuvP.AsSpan(Vp8Encoding.Vp8I16ModeOffsets[mode]); long score = (LossyUtils.Vp8_Sse16X16(src, reference) * WebpConstants.RdDistoMult) + (WebpConstants.Vp8FixedCostsI16[mode] * lambdaDi16); if (mode > 0 && WebpConstants.Vp8FixedCostsI16[mode] > bitLimit) { continue; } if (score < bestScore) { bestMode = mode; bestScore = score; } } if (it.X == 0 || it.Y == 0) { // Avoid starting a checkerboard resonance from the border. See bug #432 of libwebp. if (IsFlatSource16(src)) { bestMode = it.X == 0 ? 0 : 2; tryBothModes = false; // Stick to i16. } } it.SetIntra16Mode(bestMode); // We'll reconstruct later, if i16 mode actually gets selected. } // Next, evaluate Intra4. if (tryBothModes || !isI16) { // We don't evaluate the rate here, but just account for it through a // constant penalty (i4 mode usually needs more bits compared to i16). isI16 = false; it.StartI4(); do { int bestI4Mode = -1; long bestI4Score = Vp8ModeScore.MaxCost; Span <byte> src = it.YuvIn.AsSpan(Vp8EncIterator.YOffEnc + WebpLookupTables.Vp8Scan[it.I4]); short[] modeCosts = it.GetCostModeI4(rd.ModesI4); it.MakeIntra4Preds(); for (mode = 0; mode < WebpConstants.NumBModes; ++mode) { Span <byte> reference = it.YuvP.AsSpan(Vp8Encoding.Vp8I4ModeOffsets[mode]); long score = (LossyUtils.Vp8_Sse4X4(src, reference) * WebpConstants.RdDistoMult) + (modeCosts[mode] * lambdaDi4); if (score < bestI4Score) { bestI4Mode = mode; bestI4Score = score; } } i4BitSum += modeCosts[bestI4Mode]; rd.ModesI4[it.I4] = (byte)bestI4Mode; scoreI4 += bestI4Score; if (scoreI4 >= bestScore || i4BitSum > bitLimit) { // Intra4 won't be better than Intra16. Bail out and pick Intra16. isI16 = true; break; } else { // Reconstruct partial block inside YuvOut2 buffer Span <byte> tmpDst = it.YuvOut2.AsSpan(Vp8EncIterator.YOffEnc + WebpLookupTables.Vp8Scan[it.I4]); nz |= ReconstructIntra4(it, dqm, rd.YAcLevels.AsSpan(it.I4 * 16, 16), src, tmpDst, bestI4Mode) << it.I4; } }while (it.RotateI4(it.YuvOut2.AsSpan(Vp8EncIterator.YOffEnc))); } // Final reconstruction, depending on which mode is selected. if (!isI16) { it.SetIntra4Mode(rd.ModesI4); it.SwapOut(); bestScore = scoreI4; } else { int intra16Mode = it.Preds[it.PredIdx]; nz = ReconstructIntra16(it, dqm, rd, it.YuvOut.AsSpan(Vp8EncIterator.YOffEnc), intra16Mode); } // ... and UV! if (refineUvMode) { int bestMode = -1; long bestUvScore = Vp8ModeScore.MaxCost; Span <byte> src = it.YuvIn.AsSpan(Vp8EncIterator.UOffEnc); for (mode = 0; mode < WebpConstants.NumPredModes; ++mode) { Span <byte> reference = it.YuvP.AsSpan(Vp8Encoding.Vp8UvModeOffsets[mode]); long score = (LossyUtils.Vp8_Sse16X8(src, reference) * WebpConstants.RdDistoMult) + (WebpConstants.Vp8FixedCostsUv[mode] * lambdaDuv); if (score < bestUvScore) { bestMode = mode; bestUvScore = score; } } it.SetIntraUvMode(bestMode); } nz |= ReconstructUv(it, dqm, rd, it.YuvOut.AsSpan(Vp8EncIterator.UOffEnc), it.CurrentMacroBlockInfo.UvMode); rd.Nz = (uint)nz; rd.Score = bestScore; }
public static void PickBestUv(Vp8EncIterator it, ref Vp8ModeScore rd, Vp8SegmentInfo[] segmentInfos, Vp8EncProba proba) { const int numBlocks = 8; Vp8SegmentInfo dqm = segmentInfos[it.CurrentMacroBlockInfo.Segment]; int lambda = dqm.LambdaUv; Span <byte> src = it.YuvIn.AsSpan(Vp8EncIterator.UOffEnc); Span <byte> tmpDst = it.YuvOut2.AsSpan(Vp8EncIterator.UOffEnc); Span <byte> dst0 = it.YuvOut.AsSpan(Vp8EncIterator.UOffEnc); Span <byte> dst = dst0; var rdBest = new Vp8ModeScore(); var rdUv = new Vp8ModeScore(); var res = new Vp8Residual(); int mode; rd.ModeUv = -1; rdBest.InitScore(); for (mode = 0; mode < WebpConstants.NumPredModes; ++mode) { rdUv.Clear(); // Reconstruct rdUv.Nz = (uint)ReconstructUv(it, dqm, rdUv, tmpDst, mode); // Compute RD-score rdUv.D = LossyUtils.Vp8_Sse16X8(src, tmpDst); rdUv.SD = 0; // not calling TDisto here: it tends to flatten areas. rdUv.H = WebpConstants.Vp8FixedCostsUv[mode]; rdUv.R = it.GetCostUv(rdUv, proba, res); if (mode > 0 && IsFlat(rdUv.UvLevels, numBlocks, WebpConstants.FlatnessLimitIUv)) { rdUv.R += WebpConstants.FlatnessPenality * numBlocks; } rdUv.SetRdScore(lambda); if (mode == 0 || rdUv.Score < rdBest.Score) { rdBest.CopyScore(rdUv); rd.ModeUv = mode; rdUv.UvLevels.CopyTo(rd.UvLevels.AsSpan()); for (int i = 0; i < 2; i++) { rd.Derr[i, 0] = rdUv.Derr[i, 0]; rd.Derr[i, 1] = rdUv.Derr[i, 1]; rd.Derr[i, 2] = rdUv.Derr[i, 2]; } Span <byte> tmp = dst; dst = tmpDst; tmpDst = tmp; } } it.SetIntraUvMode(rd.ModeUv); rd.AddScore(rdBest); if (dst != dst0) { // copy 16x8 block if needed. LossyUtils.Vp8Copy16X8(dst, dst0); } // Store diffusion errors for next block. it.StoreDiffusionErrors(rd); }
public static bool PickBestIntra4(Vp8EncIterator it, ref Vp8ModeScore rd, Vp8SegmentInfo[] segmentInfos, Vp8EncProba proba, int maxI4HeaderBits) { Vp8SegmentInfo dqm = segmentInfos[it.CurrentMacroBlockInfo.Segment]; int lambda = dqm.LambdaI4; int tlambda = dqm.TLambda; Span <byte> src0 = it.YuvIn.AsSpan(Vp8EncIterator.YOffEnc); Span <byte> bestBlocks = it.YuvOut2.AsSpan(Vp8EncIterator.YOffEnc); Span <int> scratch = it.Scratch3; int totalHeaderBits = 0; var rdBest = new Vp8ModeScore(); if (maxI4HeaderBits == 0) { return(false); } rdBest.InitScore(); rdBest.H = 211; // '211' is the value of VP8BitCost(0, 145) rdBest.SetRdScore(dqm.LambdaMode); it.StartI4(); var rdi4 = new Vp8ModeScore(); var rdTmp = new Vp8ModeScore(); var res = new Vp8Residual(); Span <short> tmpLevels = new short[16]; do { int numBlocks = 1; rdi4.Clear(); int mode; int bestMode = -1; Span <byte> src = src0.Slice(WebpLookupTables.Vp8Scan[it.I4]); short[] modeCosts = it.GetCostModeI4(rd.ModesI4); Span <byte> bestBlock = bestBlocks.Slice(WebpLookupTables.Vp8Scan[it.I4]); Span <byte> tmpDst = it.Scratch.AsSpan(); tmpDst.Clear(); rdi4.InitScore(); it.MakeIntra4Preds(); for (mode = 0; mode < WebpConstants.NumBModes; ++mode) { rdTmp.Clear(); tmpLevels.Clear(); // Reconstruct. rdTmp.Nz = (uint)ReconstructIntra4(it, dqm, tmpLevels, src, tmpDst, mode); // Compute RD-score. rdTmp.D = LossyUtils.Vp8_Sse4X4(src, tmpDst); rdTmp.SD = tlambda != 0 ? Mult8B(tlambda, LossyUtils.Vp8Disto4X4(src, tmpDst, WeightY, scratch)) : 0; rdTmp.H = modeCosts[mode]; // Add flatness penalty, to avoid flat area to be mispredicted by a complex mode. if (mode > 0 && IsFlat(tmpLevels, numBlocks, WebpConstants.FlatnessLimitI4)) { rdTmp.R = WebpConstants.FlatnessPenality * numBlocks; } else { rdTmp.R = 0; } // Early-out check. rdTmp.SetRdScore(lambda); if (bestMode >= 0 && rdTmp.Score >= rdi4.Score) { continue; } // Finish computing score. rdTmp.R += it.GetCostLuma4(tmpLevels, proba, res); rdTmp.SetRdScore(lambda); if (bestMode < 0 || rdTmp.Score < rdi4.Score) { rdi4.CopyScore(rdTmp); bestMode = mode; Span <byte> tmp = tmpDst; tmpDst = bestBlock; bestBlock = tmp; tmpLevels.CopyTo(rdBest.YAcLevels.AsSpan(it.I4 * 16, 16)); } } rdi4.SetRdScore(dqm.LambdaMode); rdBest.AddScore(rdi4); if (rdBest.Score >= rd.Score) { return(false); } totalHeaderBits += (int)rdi4.H; // <- equal to modeCosts[bestMode]; if (totalHeaderBits > maxI4HeaderBits) { return(false); } // Copy selected samples to the right place. LossyUtils.Vp8Copy4X4(bestBlock, bestBlocks.Slice(WebpLookupTables.Vp8Scan[it.I4])); rd.ModesI4[it.I4] = (byte)bestMode; it.TopNz[it.I4 & 3] = it.LeftNz[it.I4 >> 2] = rdi4.Nz != 0 ? 1 : 0; }while (it.RotateI4(bestBlocks)); // Finalize state. rd.CopyScore(rdBest); it.SetIntra4Mode(rd.ModesI4); it.SwapOut(); rdBest.YAcLevels.AsSpan().CopyTo(rd.YAcLevels); // Select intra4x4 over intra16x16. return(true); }