public override MemoryHandle Pin(int byteOffset = 0) { unsafe { Retain(); // this checks IsDisposed try { if ((IntPtr.Size == 4 && (uint)byteOffset > (uint)_array.Length * (uint)Unsafe.SizeOf <T>()) || (IntPtr.Size != 4 && (ulong)byteOffset > (uint)_array.Length * (ulong)Unsafe.SizeOf <T>())) { throw new ArgumentOutOfRangeException(nameof(byteOffset)); } var handle = GCHandle.Alloc(_array, GCHandleType.Pinned); return(new MemoryHandle(this, Unsafe.Add <byte>((void *)handle.AddrOfPinnedObject(), _offset + byteOffset), handle)); } catch { Release(); throw; } } }
string Ctor(char[] value, int startIndex, int length) { if (value == null) { throw new ArgumentNullException(nameof(value)); } if (startIndex < 0) { throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_StartIndex); } if (length < 0) { throw new ArgumentOutOfRangeException(nameof(length), SR.ArgumentOutOfRange_NegativeLength); } if (startIndex > value.Length - length) { throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_Index); } if (length == 0) { return(Empty); } string result = FastAllocateString(length); Buffer.Memmove( elementCount: (uint)result.Length, // derefing Length now allows JIT to prove 'result' not null below destination: ref result._firstChar, source: ref Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(value), startIndex)); return(result); }
private static int CompareOrdinalHelper(string strA, int indexA, int countA, string strB, int indexB, int countB) { Debug.Assert(strA != null); Debug.Assert(strB != null); Debug.Assert(indexA >= 0 && indexB >= 0); Debug.Assert(countA >= 0 && countB >= 0); Debug.Assert(indexA + countA <= strA.Length && indexB + countB <= strB.Length); return(SpanHelpers.SequenceCompareTo(ref Unsafe.Add(ref strA.GetRawStringData(), indexA), countA, ref Unsafe.Add(ref strB.GetRawStringData(), indexB), countB)); }
/// <summary> /// Computes a 64-hash using the Marvin algorithm. /// </summary> public static long ComputeHash(ref byte data, int count, ulong seed) { uint ucount = (uint)count; uint p0 = (uint)seed; uint p1 = (uint)(seed >> 32); int byteOffset = 0; // declared as signed int so we don't have to cast everywhere (it's passed to Unsafe.Add() and used for nothing else.) while (ucount >= 8) { p0 += Unsafe.As <byte, uint>(ref Unsafe.Add(ref data, byteOffset)); Block(ref p0, ref p1); p0 += Unsafe.As <byte, uint>(ref Unsafe.Add(ref data, byteOffset + 4)); Block(ref p0, ref p1); byteOffset += 8; ucount -= 8; } switch (ucount) { case 4: p0 += Unsafe.As <byte, uint>(ref Unsafe.Add(ref data, byteOffset)); Block(ref p0, ref p1); goto case 0; case 0: p0 += 0x80u; break; case 5: p0 += Unsafe.As <byte, uint>(ref Unsafe.Add(ref data, byteOffset)); byteOffset += 4; Block(ref p0, ref p1); goto case 1; case 1: p0 += 0x8000u | Unsafe.Add(ref data, byteOffset); break; case 6: p0 += Unsafe.As <byte, uint>(ref Unsafe.Add(ref data, byteOffset)); byteOffset += 4; Block(ref p0, ref p1); goto case 2; case 2: p0 += 0x800000u | Unsafe.As <byte, ushort>(ref Unsafe.Add(ref data, byteOffset)); break; case 7: p0 += Unsafe.As <byte, uint>(ref Unsafe.Add(ref data, byteOffset)); byteOffset += 4; Block(ref p0, ref p1); goto case 3; case 3: p0 += 0x80000000u | (((uint)(Unsafe.Add(ref data, byteOffset + 2))) << 16) | (uint)(Unsafe.As <byte, ushort>(ref Unsafe.Add(ref data, byteOffset))); break; default: Debug.Fail("Should not get here."); break; } Block(ref p0, ref p1); Block(ref p0, ref p1); return((((long)p1) << 32) | p0); }
private static int GetIndexOfFirstInvalidUtf8Sequence(ref byte inputBuffer, int inputLength, out int scalarCount, out int surrogatePairCount) { // The fields below control where we read from the buffer. IntPtr inputBufferCurrentOffset = IntPtr.Zero; int tempScalarCount = inputLength; int tempSurrogatePairCount = 0; // If the sequence is long enough, try running vectorized "is this sequence ASCII?" // logic. We perform a small test of the first few bytes to make sure they're all // ASCII before we incur the cost of invoking the vectorized code path. if (Vector.IsHardwareAccelerated) { if (IntPtr.Size >= 8) { // Test first 16 bytes and check for all-ASCII. if ((inputLength >= 2 * sizeof(ulong) + 3 * Vector <byte> .Count) && QWordAllBytesAreAscii(ReadAndFoldTwoQWordsUnaligned(ref inputBuffer))) { inputBufferCurrentOffset = ConsumeAsciiBytesVectorized(ref Unsafe.Add(ref inputBuffer, 2 * sizeof(ulong)), inputLength - 2 * sizeof(ulong)) + 2 * sizeof(ulong); } } else { // Test first 8 bytes and check for all-ASCII. if ((inputLength >= 2 * sizeof(uint) + 3 * Vector <byte> .Count) && DWordAllBytesAreAscii(ReadAndFoldTwoDWordsUnaligned(ref inputBuffer))) { inputBufferCurrentOffset = ConsumeAsciiBytesVectorized(ref Unsafe.Add(ref inputBuffer, 2 * sizeof(uint)), inputLength - 2 * sizeof(uint)) + 2 * sizeof(uint); } } } int inputBufferRemainingBytes = inputLength - ConvertIntPtrToInt32WithoutOverflowCheck(inputBufferCurrentOffset); // Begin the main loop. #if DEBUG long lastOffsetProcessed = -1; // used for invariant checking in debug builds #endif while (inputBufferRemainingBytes >= sizeof(uint)) { // Read 32 bits at a time. This is enough to hold any possible UTF8-encoded scalar. Debug.Assert(inputLength - (int)inputBufferCurrentOffset >= sizeof(uint)); uint thisDWord = Unsafe.ReadUnaligned <uint>(ref Unsafe.Add(ref inputBuffer, inputBufferCurrentOffset)); AfterReadDWord: #if DEBUG Debug.Assert(lastOffsetProcessed < (long)inputBufferCurrentOffset, "Algorithm should've made forward progress since last read."); lastOffsetProcessed = (long)inputBufferCurrentOffset; #endif // First, check for the common case of all-ASCII bytes. if (DWordAllBytesAreAscii(thisDWord)) { // We read an all-ASCII sequence. inputBufferCurrentOffset += 4; inputBufferRemainingBytes -= 4; // If we saw a sequence of all ASCII, there's a good chance a significant amount of following data is also ASCII. // Below is basically unrolled loops with poor man's vectorization. if (inputBufferRemainingBytes >= 5 * sizeof(uint)) { // The JIT produces better codegen for aligned reads than it does for // unaligned reads, and we want the processor to operate at maximum // efficiency in the loop that follows, so we'll align the references // now. It's OK to do this without pinning because the GC will never // move a heap-allocated object in a manner that messes with its // alignment. { ref byte refToCurrentDWord = ref Unsafe.Add(ref inputBuffer, inputBufferCurrentOffset); thisDWord = Unsafe.ReadUnaligned <uint>(ref refToCurrentDWord); if (!DWordAllBytesAreAscii(thisDWord)) { goto AfterReadDWordSkipAllBytesAsciiCheck; } int adjustment = GetNumberOfBytesToNextDWordAlignment(ref refToCurrentDWord); inputBufferCurrentOffset += adjustment; // will adjust 'bytes remaining' value after below loop } // At this point, the input buffer offset points to an aligned DWORD. // We also know that there's enough room to read at least four DWORDs from the stream. IntPtr inputBufferFinalOffsetAtWhichCanSafelyLoop = (IntPtr)(inputLength - 4 * sizeof(uint)); do { ref uint currentReadPosition = ref Unsafe.As <byte, uint>(ref Unsafe.Add(ref inputBuffer, inputBufferCurrentOffset)); if (!DWordAllBytesAreAscii(currentReadPosition | Unsafe.Add(ref currentReadPosition, 1))) { goto LoopTerminatedEarlyDueToNonAsciiData; } if (!DWordAllBytesAreAscii(Unsafe.Add(ref currentReadPosition, 2) | Unsafe.Add(ref currentReadPosition, 3))) { inputBufferCurrentOffset += 2 * sizeof(uint); goto LoopTerminatedEarlyDueToNonAsciiData; } inputBufferCurrentOffset += 4 * sizeof(uint); } while (IntPtrIsLessThanOrEqualTo(inputBufferCurrentOffset, inputBufferFinalOffsetAtWhichCanSafelyLoop)); inputBufferRemainingBytes = inputLength - ConvertIntPtrToInt32WithoutOverflowCheck(inputBufferCurrentOffset); continue; // need to perform a bounds check because we might be running out of data LoopTerminatedEarlyDueToNonAsciiData: // We know that there's *at least* two DWORDs of data remaining in the buffer. // We also know that one of them (or both of them) contains non-ASCII data somewhere. // Let's perform a quick check here to bypass the logic at the beginning of the main loop. thisDWord = Unsafe.As <byte, uint>(ref Unsafe.Add(ref inputBuffer, inputBufferCurrentOffset)); if (DWordAllBytesAreAscii(thisDWord)) { inputBufferCurrentOffset += 4; thisDWord = Unsafe.As <byte, uint>(ref Unsafe.Add(ref inputBuffer, inputBufferCurrentOffset)); } inputBufferRemainingBytes = inputLength - ConvertIntPtrToInt32WithoutOverflowCheck(inputBufferCurrentOffset); goto AfterReadDWordSkipAllBytesAsciiCheck; } continue; }
public static unsafe void ClearWithReferences(ref IntPtr ip, nuint pointerSizeLength) { Debug.Assert((int)Unsafe.AsPointer(ref ip) % sizeof(IntPtr) == 0, "Should've been aligned on natural word boundary."); // First write backward 8 natural words at a time. // Writing backward allows us to get away with only simple modifications to the // mov instruction's base and index registers between loop iterations. for (; pointerSizeLength >= 8; pointerSizeLength -= 8) { Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -1) = default(IntPtr); Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -2) = default(IntPtr); Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -3) = default(IntPtr); Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -4) = default(IntPtr); Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -5) = default(IntPtr); Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -6) = default(IntPtr); Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -7) = default(IntPtr); Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -8) = default(IntPtr); } Debug.Assert(pointerSizeLength <= 7); // The logic below works by trying to minimize the number of branches taken for any // given range of lengths. For example, the lengths [ 4 .. 7 ] are handled by a single // branch, [ 2 .. 3 ] are handled by a single branch, and [ 1 ] is handled by a single // branch. // // We can write both forward and backward as a perf improvement. For example, // the lengths [ 4 .. 7 ] can be handled by zeroing out the first four natural // words and the last 3 natural words. In the best case (length = 7), there are // no overlapping writes. In the worst case (length = 4), there are three // overlapping writes near the middle of the buffer. In perf testing, the // penalty for performing duplicate writes is less expensive than the penalty // for complex branching. if (pointerSizeLength >= 4) { goto Write4To7; } else if (pointerSizeLength >= 2) { goto Write2To3; } else if (pointerSizeLength > 0) { goto Write1; } else { return; // nothing to write } Write4To7: Debug.Assert(pointerSizeLength >= 4); // Write first four and last three. Unsafe.Add(ref ip, 2) = default(IntPtr); Unsafe.Add(ref ip, 3) = default(IntPtr); Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -3) = default(IntPtr); Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -2) = default(IntPtr); Write2To3: Debug.Assert(pointerSizeLength >= 2); // Write first two and last one. Unsafe.Add(ref ip, 1) = default(IntPtr); Unsafe.Add(ref Unsafe.Add(ref ip, (IntPtr)pointerSizeLength), -1) = default(IntPtr); Write1: Debug.Assert(pointerSizeLength >= 1); // Write only element. ip = default(IntPtr); }
public static int ComputeHash32(ref byte data, uint count, uint p0, uint p1) { // Control flow of this method generally flows top-to-bottom, trying to // minimize the number of branches taken for large (>= 8 bytes, 4 chars) inputs. // If small inputs (< 8 bytes, 4 chars) are given, this jumps to a "small inputs" // handler at the end of the method. if (count < 8) { // We can't run the main loop, but we might still have 4 or more bytes available to us. // If so, jump to the 4 .. 7 bytes logic immediately after the main loop. if (count >= 4) { goto Between4And7BytesRemain; } else { goto InputTooSmallToEnterMainLoop; } } // Main loop - read 8 bytes at a time. // The block function is unrolled 2x in this loop. uint loopCount = count / 8; Debug.Assert(loopCount > 0, "Shouldn't reach this code path for small inputs."); do { // Most x86 processors have two dispatch ports for reads, so we can read 2x 32-bit // values in parallel. We opt for this instead of a single 64-bit read since the // typical use case for Marvin32 is computing String hash codes, and the particular // layout of String instances means the starting data is never 8-byte aligned when // running in a 64-bit process. p0 += Unsafe.ReadUnaligned <uint>(ref data); uint nextUInt32 = Unsafe.ReadUnaligned <uint>(ref Unsafe.AddByteOffset(ref data, 4)); // One block round for each of the 32-bit integers we just read, 2x rounds total. Block(ref p0, ref p1); p0 += nextUInt32; Block(ref p0, ref p1); // Bump the data reference pointer and decrement the loop count. // Decrementing by 1 every time and comparing against zero allows the JIT to produce // better codegen compared to a standard 'for' loop with an incrementing counter. // Requires https://github.com/dotnet/coreclr/issues/7566 to be addressed first // before we can realize the full benefits of this. data = ref Unsafe.AddByteOffset(ref data, 8); } while (--loopCount > 0); // n.b. We've not been updating the original 'count' parameter, so its actual value is // still the original data length. However, we can still rely on its least significant // 3 bits to tell us how much data remains (0 .. 7 bytes) after the loop above is // completed. if ((count & 0b_0100) == 0) { goto DoFinalPartialRead; } Between4And7BytesRemain: // If after finishing the main loop we still have 4 or more leftover bytes, or if we had // 4 .. 7 bytes to begin with and couldn't enter the loop in the first place, we need to // consume 4 bytes immediately and send them through one round of the block function. Debug.Assert(count >= 4, "Only should've gotten here if the original count was >= 4."); p0 += Unsafe.ReadUnaligned <uint>(ref data); Block(ref p0, ref p1); DoFinalPartialRead: // Finally, we have 0 .. 3 bytes leftover. Since we know the original data length was at // least 4 bytes (smaller lengths are handled at the end of this routine), we can safely // read the 4 bytes at the end of the buffer without reading past the beginning of the // original buffer. This necessarily means the data we're about to read will overlap with // some data we've already processed, but we can handle that below. Debug.Assert(count >= 4, "Only should've gotten here if the original count was >= 4."); // Read the last 4 bytes of the buffer. uint partialResult = Unsafe.ReadUnaligned <uint>(ref Unsafe.Add(ref Unsafe.AddByteOffset(ref data, (nuint)count & 7), -4)); // The 'partialResult' local above contains any data we have yet to read, plus some number // of bytes which we've already read from the buffer. An example of this is given below // for little-endian architectures. In this table, AA BB CC are the bytes which we still // need to consume, and ## are bytes which we want to throw away since we've already // consumed them as part of a previous read. // // (partialResult contains) (we want it to contain) // count mod 4 = 0 -> [ ## ## ## ## | ] -> 0x####_#### -> 0x0000_0080 // count mod 4 = 1 -> [ ## ## ## ## | AA ] -> 0xAA##_#### -> 0x0000_80AA // count mod 4 = 2 -> [ ## ## ## ## | AA BB ] -> 0xBBAA_#### -> 0x0080_BBAA // count mod 4 = 3 -> [ ## ## ## ## | AA BB CC ] -> 0xCCBB_AA## -> 0x80CC_BBAA count = ~count << 3; if (BitConverter.IsLittleEndian) { partialResult >>= 8; // make some room for the 0x80 byte partialResult |= 0x8000_0000u; // put the 0x80 byte at the beginning partialResult >>= (int)count & 0x1F; // shift out all previously consumed bytes } else { partialResult <<= 8; // make some room for the 0x80 byte partialResult |= 0x80u; // put the 0x80 byte at the end partialResult <<= (int)count & 0x1F; // shift out all previously consumed bytes } DoFinalRoundsAndReturn: // Now that we've computed the final partial result, merge it in and run two rounds of // the block function to finish out the Marvin algorithm. p0 += partialResult; Block(ref p0, ref p1); Block(ref p0, ref p1); return((int)(p1 ^ p0)); InputTooSmallToEnterMainLoop: // We had only 0 .. 3 bytes to begin with, so we can't perform any 32-bit reads. // This means that we're going to be building up the final result right away and // will only ever run two rounds total of the block function. Let's initialize // the partial result to "no data". if (BitConverter.IsLittleEndian) { partialResult = 0x80u; } else { partialResult = 0x80000000u; } if ((count & 0b_0001) != 0) { // If the buffer is 1 or 3 bytes in length, let's read a single byte now // and merge it into our partial result. This will result in partialResult // having one of the two values below, where AA BB CC are the buffer bytes. // // (little-endian / big-endian) // [ AA ] -> 0x0000_80AA / 0xAA80_0000 // [ AA BB CC ] -> 0x0000_80CC / 0xCC80_0000 partialResult = Unsafe.AddByteOffset(ref data, (nuint)count & 2); if (BitConverter.IsLittleEndian) { partialResult |= 0x8000; } else { partialResult <<= 24; partialResult |= 0x800000u; } } if ((count & 0b_0010) != 0) { // If the buffer is 2 or 3 bytes in length, let's read a single ushort now // and merge it into the partial result. This will result in partialResult // having one of the two values below, where AA BB CC are the buffer bytes. // // (little-endian / big-endian) // [ AA BB ] -> 0x0080_BBAA / 0xAABB_8000 // [ AA BB CC ] -> 0x80CC_BBAA / 0xAABB_CC80 (carried over from above) if (BitConverter.IsLittleEndian) { partialResult <<= 16; partialResult |= (uint)Unsafe.ReadUnaligned <ushort>(ref data); } else { partialResult |= (uint)Unsafe.ReadUnaligned <ushort>(ref data); partialResult = BitOperations.RotateLeft(partialResult, 16); } } // Everything is consumed! Go perform the final rounds and return. goto DoFinalRoundsAndReturn; }
public static void WriteHexByte(byte value, ref char buffer, int index) { Unsafe.Add(ref buffer, index) = HexTable[value >> 4]; Unsafe.Add(ref buffer, index + 1) = HexTable[value & 0xF]; }
public void IndexWithUnsafeReferenceArithmeticsOnArray0Impl(int x, int y, Vector4 v) { int elementOffset = (y * this.width) + x; Unsafe.Add(ref this.array[0], elementOffset) = v; }
public unsafe char this[int index] { [Intrinsic] get { return(Unsafe.Add(ref _firstChar, index)); } }
private static void WriteThreeBytes(ref byte destBytes, int i0) { destBytes = (byte)(i0 >> 16); Unsafe.Add(ref destBytes, 1) = (byte)(i0 >> 8); Unsafe.Add(ref destBytes, 2) = (byte)i0; }
/// <inheritdoc /> protected override void OnFrameApply(ImageFrame <TPixel> source, Rectangle sourceRectangle, Configuration configuration) { DenseMatrix <float>[] kernels = { this.North, this.NorthWest, this.West, this.SouthWest, this.South, this.SouthEast, this.East, this.NorthEast }; int startY = sourceRectangle.Y; int endY = sourceRectangle.Bottom; int startX = sourceRectangle.X; int endX = sourceRectangle.Right; // Align start/end positions. int minX = Math.Max(0, startX); int maxX = Math.Min(source.Width, endX); int minY = Math.Max(0, startY); int maxY = Math.Min(source.Height, endY); // we need a clean copy for each pass to start from using (ImageFrame <TPixel> cleanCopy = source.Clone()) { new ConvolutionProcessor <TPixel>(kernels[0]).Apply(source, sourceRectangle, configuration); if (kernels.Length == 1) { return; } int shiftY = startY; int shiftX = startX; // Reset offset if necessary. if (minX > 0) { shiftX = 0; } if (minY > 0) { shiftY = 0; } // Additional runs. // ReSharper disable once ForCanBeConvertedToForeach for (int i = 1; i < kernels.Length; i++) { using (ImageFrame <TPixel> pass = cleanCopy.Clone()) { new ConvolutionProcessor <TPixel>(kernels[i]).Apply(pass, sourceRectangle, configuration); Buffer2D <TPixel> passPixels = pass.PixelBuffer; Buffer2D <TPixel> targetPixels = source.PixelBuffer; ParallelFor.WithConfiguration( minY, maxY, configuration, y => { int offsetY = y - shiftY; ref TPixel passPixelsBase = ref MemoryMarshal.GetReference(passPixels.GetRowSpan(offsetY)); ref TPixel targetPixelsBase = ref MemoryMarshal.GetReference(targetPixels.GetRowSpan(offsetY)); for (int x = minX; x < maxX; x++) { int offsetX = x - shiftX; // Grab the max components of the two pixels ref TPixel currentPassPixel = ref Unsafe.Add(ref passPixelsBase, offsetX); ref TPixel currentTargetPixel = ref Unsafe.Add(ref targetPixelsBase, offsetX); var pixelValue = Vector4.Max( currentPassPixel.ToVector4(), currentTargetPixel.ToVector4()); currentTargetPixel.PackFromVector4(pixelValue); } });
private static void Test <T>(ref T first, int i) { Consume(Unsafe.Add(ref first, i)); }
internal static bool EndsWithOrdinalIgnoreCase(this ReadOnlySpan <char> span, ReadOnlySpan <char> value) => value.Length <= span.Length && Ordinal.EqualsIgnoreCase( ref Unsafe.Add(ref MemoryMarshal.GetReference(span), span.Length - value.Length), ref MemoryMarshal.GetReference(value), value.Length);
public override ref byte GetPinnableMemoryAddress() => ref Unsafe.Add(ref this.Unwrap().GetPinnableMemoryAddress(), this.adjustment);
public byte GetPart(int i) => Unsafe.Add(ref Unsafe.As <BitBlock4, byte>(ref this), i);
public void SetPart(int i, byte value) => Unsafe.Add(ref Unsafe.As <BitBlock4, byte>(ref this), i) = value;
public Vector4 GetReferencesImpl(int x, int y) { int elementOffset = (y * this.width) + x; return(Unsafe.Add(ref this.pinnable.Data, elementOffset)); }
public static int Compare(string?strA, int indexA, string?strB, int indexB, int length, StringComparison comparisonType) { CheckStringComparison(comparisonType); if (strA == null || strB == null) { if (object.ReferenceEquals(strA, strB)) { // They're both null return(0); } return(strA == null ? -1 : 1); } if (length < 0) { throw new ArgumentOutOfRangeException(nameof(length), SR.ArgumentOutOfRange_NegativeLength); } if (indexA < 0 || indexB < 0) { string paramName = indexA < 0 ? nameof(indexA) : nameof(indexB); throw new ArgumentOutOfRangeException(paramName, SR.ArgumentOutOfRange_Index); } if (strA.Length - indexA < 0 || strB.Length - indexB < 0) { string paramName = strA.Length - indexA < 0 ? nameof(indexA) : nameof(indexB); throw new ArgumentOutOfRangeException(paramName, SR.ArgumentOutOfRange_Index); } if (length == 0 || (object.ReferenceEquals(strA, strB) && indexA == indexB)) { return(0); } int lengthA = Math.Min(length, strA.Length - indexA); int lengthB = Math.Min(length, strB.Length - indexB); switch (comparisonType) { case StringComparison.CurrentCulture: case StringComparison.CurrentCultureIgnoreCase: return(CultureInfo.CurrentCulture.CompareInfo.Compare(strA, indexA, lengthA, strB, indexB, lengthB, GetCaseCompareOfComparisonCulture(comparisonType))); case StringComparison.InvariantCulture: case StringComparison.InvariantCultureIgnoreCase: return(CompareInfo.Invariant.Compare(strA, indexA, lengthA, strB, indexB, lengthB, GetCaseCompareOfComparisonCulture(comparisonType))); case StringComparison.Ordinal: return(CompareOrdinalHelper(strA, indexA, lengthA, strB, indexB, lengthB)); default: Debug.Assert(comparisonType == StringComparison.OrdinalIgnoreCase); // CheckStringComparison validated these earlier return(Ordinal.CompareStringIgnoreCase(ref Unsafe.Add(ref strA.GetRawStringData(), indexA), lengthA, ref Unsafe.Add(ref strB.GetRawStringData(), indexB), lengthB)); } }
public static unsafe RetainedVec Create <T>(RetainableMemory <T> memorySource, int start, int length, bool externallyOwned = false) { if (!memorySource.IsBlittableOffheap) { ThrowHelper.ThrowInvalidOperationException("Memory source must have IsBlittableOffheap = true to be used in RetainedVec."); } RetainedVec vs; if (TypeHelper <T> .IsReferenceOrContainsReferences) { ThrowHelper.DebugAssert(memorySource.Pointer == default && memorySource._array != default); // RM's offset goes to _pointerOrOffset vs = new RetainedVec(externallyOwned ? null : memorySource, memorySource._array, (IntPtr)memorySource._offset, memorySource.Length, VecTypeHelper <T> .RuntimeTypeId); } else { ThrowHelper.DebugAssert(memorySource.Pointer != default && memorySource._array == default); // RM's offset added to _pointerOrOffset vs = new RetainedVec(externallyOwned ? null : memorySource, array: null, (IntPtr)Unsafe.Add <T>(memorySource.Pointer, memorySource._offset), memorySource.Length, VecTypeHelper <T> .RuntimeTypeId); } return(vs.Clone(start, length, externallyOwned)); }
public static unsafe void ClearWithoutReferences(ref byte b, nuint byteLength) { if (byteLength == 0) { return; } #if CORECLR && (AMD64 || ARM64) if (byteLength > 4096) { goto PInvoke; } Unsafe.InitBlockUnaligned(ref b, 0, (uint)byteLength); return; #else // TODO: Optimize other platforms to be on par with AMD64 CoreCLR // Note: It's important that this switch handles lengths at least up to 22. // See notes below near the main loop for why. // The switch will be very fast since it can be implemented using a jump // table in assembly. See http://stackoverflow.com/a/449297/4077294 for more info. switch (byteLength) { case 1: b = 0; return; case 2: Unsafe.As <byte, short>(ref b) = 0; return; case 3: Unsafe.As <byte, short>(ref b) = 0; Unsafe.Add <byte>(ref b, 2) = 0; return; case 4: Unsafe.As <byte, int>(ref b) = 0; return; case 5: Unsafe.As <byte, int>(ref b) = 0; Unsafe.Add <byte>(ref b, 4) = 0; return; case 6: Unsafe.As <byte, int>(ref b) = 0; Unsafe.As <byte, short>(ref Unsafe.Add <byte>(ref b, 4)) = 0; return; case 7: Unsafe.As <byte, int>(ref b) = 0; Unsafe.As <byte, short>(ref Unsafe.Add <byte>(ref b, 4)) = 0; Unsafe.Add <byte>(ref b, 6) = 0; return; case 8: #if BIT64 Unsafe.As <byte, long>(ref b) = 0; #else Unsafe.As <byte, int>(ref b) = 0; Unsafe.As <byte, int>(ref Unsafe.Add <byte>(ref b, 4)) = 0; #endif return; case 9: #if BIT64 Unsafe.As <byte, long>(ref b) = 0; #else Unsafe.As <byte, int>(ref b) = 0; Unsafe.As <byte, int>(ref Unsafe.Add <byte>(ref b, 4)) = 0; #endif Unsafe.Add <byte>(ref b, 8) = 0; return; case 10: #if BIT64 Unsafe.As <byte, long>(ref b) = 0; #else Unsafe.As <byte, int>(ref b) = 0; Unsafe.As <byte, int>(ref Unsafe.Add <byte>(ref b, 4)) = 0; #endif Unsafe.As <byte, short>(ref Unsafe.Add <byte>(ref b, 8)) = 0; return; case 11: #if BIT64 Unsafe.As <byte, long>(ref b) = 0; #else Unsafe.As <byte, int>(ref b) = 0; Unsafe.As <byte, int>(ref Unsafe.Add <byte>(ref b, 4)) = 0; #endif Unsafe.As <byte, short>(ref Unsafe.Add <byte>(ref b, 8)) = 0; Unsafe.Add <byte>(ref b, 10) = 0; return; case 12: #if BIT64 Unsafe.As <byte, long>(ref b) = 0; #else Unsafe.As <byte, int>(ref b) = 0; Unsafe.As <byte, int>(ref Unsafe.Add <byte>(ref b, 4)) = 0; #endif Unsafe.As <byte, int>(ref Unsafe.Add <byte>(ref b, 8)) = 0; return; case 13: #if BIT64 Unsafe.As <byte, long>(ref b) = 0; #else Unsafe.As <byte, int>(ref b) = 0; Unsafe.As <byte, int>(ref Unsafe.Add <byte>(ref b, 4)) = 0; #endif Unsafe.As <byte, int>(ref Unsafe.Add <byte>(ref b, 8)) = 0; Unsafe.Add <byte>(ref b, 12) = 0; return; case 14: #if BIT64 Unsafe.As <byte, long>(ref b) = 0; #else Unsafe.As <byte, int>(ref b) = 0; Unsafe.As <byte, int>(ref Unsafe.Add <byte>(ref b, 4)) = 0; #endif Unsafe.As <byte, int>(ref Unsafe.Add <byte>(ref b, 8)) = 0; Unsafe.As <byte, short>(ref Unsafe.Add <byte>(ref b, 12)) = 0; return; case 15: #if BIT64 Unsafe.As <byte, long>(ref b) = 0; #else Unsafe.As <byte, int>(ref b) = 0; Unsafe.As <byte, int>(ref Unsafe.Add <byte>(ref b, 4)) = 0; #endif Unsafe.As <byte, int>(ref Unsafe.Add <byte>(ref b, 8)) = 0; Unsafe.As <byte, short>(ref Unsafe.Add <byte>(ref b, 12)) = 0; Unsafe.Add <byte>(ref b, 14) = 0; return; case 16: #if BIT64 Unsafe.As <byte, long>(ref b) = 0; Unsafe.As <byte, long>(ref Unsafe.Add <byte>(ref b, 8)) = 0; #else Unsafe.As <byte, int>(ref b) = 0; Unsafe.As <byte, int>(ref Unsafe.Add <byte>(ref b, 4)) = 0; Unsafe.As <byte, int>(ref Unsafe.Add <byte>(ref b, 8)) = 0; Unsafe.As <byte, int>(ref Unsafe.Add <byte>(ref b, 12)) = 0; #endif return; case 17: #if BIT64 Unsafe.As <byte, long>(ref b) = 0; Unsafe.As <byte, long>(ref Unsafe.Add <byte>(ref b, 8)) = 0; #else Unsafe.As <byte, int>(ref b) = 0; Unsafe.As <byte, int>(ref Unsafe.Add <byte>(ref b, 4)) = 0; Unsafe.As <byte, int>(ref Unsafe.Add <byte>(ref b, 8)) = 0; Unsafe.As <byte, int>(ref Unsafe.Add <byte>(ref b, 12)) = 0; #endif Unsafe.Add <byte>(ref b, 16) = 0; return; case 18: #if BIT64 Unsafe.As <byte, long>(ref b) = 0; Unsafe.As <byte, long>(ref Unsafe.Add <byte>(ref b, 8)) = 0; #else Unsafe.As <byte, int>(ref b) = 0; Unsafe.As <byte, int>(ref Unsafe.Add <byte>(ref b, 4)) = 0; Unsafe.As <byte, int>(ref Unsafe.Add <byte>(ref b, 8)) = 0; Unsafe.As <byte, int>(ref Unsafe.Add <byte>(ref b, 12)) = 0; #endif Unsafe.As <byte, short>(ref Unsafe.Add <byte>(ref b, 16)) = 0; return; case 19: #if BIT64 Unsafe.As <byte, long>(ref b) = 0; Unsafe.As <byte, long>(ref Unsafe.Add <byte>(ref b, 8)) = 0; #else Unsafe.As <byte, int>(ref b) = 0; Unsafe.As <byte, int>(ref Unsafe.Add <byte>(ref b, 4)) = 0; Unsafe.As <byte, int>(ref Unsafe.Add <byte>(ref b, 8)) = 0; Unsafe.As <byte, int>(ref Unsafe.Add <byte>(ref b, 12)) = 0; #endif Unsafe.As <byte, short>(ref Unsafe.Add <byte>(ref b, 16)) = 0; Unsafe.Add <byte>(ref b, 18) = 0; return; case 20: #if BIT64 Unsafe.As <byte, long>(ref b) = 0; Unsafe.As <byte, long>(ref Unsafe.Add <byte>(ref b, 8)) = 0; #else Unsafe.As <byte, int>(ref b) = 0; Unsafe.As <byte, int>(ref Unsafe.Add <byte>(ref b, 4)) = 0; Unsafe.As <byte, int>(ref Unsafe.Add <byte>(ref b, 8)) = 0; Unsafe.As <byte, int>(ref Unsafe.Add <byte>(ref b, 12)) = 0; #endif Unsafe.As <byte, int>(ref Unsafe.Add <byte>(ref b, 16)) = 0; return; case 21: #if BIT64 Unsafe.As <byte, long>(ref b) = 0; Unsafe.As <byte, long>(ref Unsafe.Add <byte>(ref b, 8)) = 0; #else Unsafe.As <byte, int>(ref b) = 0; Unsafe.As <byte, int>(ref Unsafe.Add <byte>(ref b, 4)) = 0; Unsafe.As <byte, int>(ref Unsafe.Add <byte>(ref b, 8)) = 0; Unsafe.As <byte, int>(ref Unsafe.Add <byte>(ref b, 12)) = 0; #endif Unsafe.As <byte, int>(ref Unsafe.Add <byte>(ref b, 16)) = 0; Unsafe.Add <byte>(ref b, 20) = 0; return; case 22: #if BIT64 Unsafe.As <byte, long>(ref b) = 0; Unsafe.As <byte, long>(ref Unsafe.Add <byte>(ref b, 8)) = 0; #else Unsafe.As <byte, int>(ref b) = 0; Unsafe.As <byte, int>(ref Unsafe.Add <byte>(ref b, 4)) = 0; Unsafe.As <byte, int>(ref Unsafe.Add <byte>(ref b, 8)) = 0; Unsafe.As <byte, int>(ref Unsafe.Add <byte>(ref b, 12)) = 0; #endif Unsafe.As <byte, int>(ref Unsafe.Add <byte>(ref b, 16)) = 0; Unsafe.As <byte, short>(ref Unsafe.Add <byte>(ref b, 20)) = 0; return; } // P/Invoke into the native version for large lengths if (byteLength >= 512) { goto PInvoke; } nuint i = 0; // byte offset at which we're copying if ((Unsafe.As <byte, int>(ref b) & 3) != 0) { if ((Unsafe.As <byte, int>(ref b) & 1) != 0) { Unsafe.AddByteOffset <byte>(ref b, i) = 0; i += 1; if ((Unsafe.As <byte, int>(ref b) & 2) != 0) { goto IntAligned; } } Unsafe.As <byte, short>(ref Unsafe.AddByteOffset <byte>(ref b, i)) = 0; i += 2; } IntAligned: // On 64-bit IntPtr.Size == 8, so we want to advance to the next 8-aligned address. If // (int)b % 8 is 0, 5, 6, or 7, we will already have advanced by 0, 3, 2, or 1 // bytes to the next aligned address (respectively), so do nothing. On the other hand, // if it is 1, 2, 3, or 4 we will want to copy-and-advance another 4 bytes until // we're aligned. // The thing 1, 2, 3, and 4 have in common that the others don't is that if you // subtract one from them, their 3rd lsb will not be set. Hence, the below check. if (((Unsafe.As <byte, int>(ref b) - 1) & 4) == 0) { Unsafe.As <byte, int>(ref Unsafe.AddByteOffset <byte>(ref b, i)) = 0; i += 4; } nuint end = byteLength - 16; byteLength -= i; // lower 4 bits of byteLength represent how many bytes are left *after* the unrolled loop // We know due to the above switch-case that this loop will always run 1 iteration; max // bytes we clear before checking is 23 (7 to align the pointers, 16 for 1 iteration) so // the switch handles lengths 0-22. Debug.Assert(end >= 7 && i <= end); // This is separated out into a different variable, so the i + 16 addition can be // performed at the start of the pipeline and the loop condition does not have // a dependency on the writes. nuint counter; do { counter = i + 16; // This loop looks very costly since there appear to be a bunch of temporary values // being created with the adds, but the jit (for x86 anyways) will convert each of // these to use memory addressing operands. // So the only cost is a bit of code size, which is made up for by the fact that // we save on writes to b. #if BIT64 Unsafe.As <byte, long>(ref Unsafe.AddByteOffset <byte>(ref b, i)) = 0; Unsafe.As <byte, long>(ref Unsafe.AddByteOffset <byte>(ref b, i + 8)) = 0; #else Unsafe.As <byte, int>(ref Unsafe.AddByteOffset <byte>(ref b, i)) = 0; Unsafe.As <byte, int>(ref Unsafe.AddByteOffset <byte>(ref b, i + 4)) = 0; Unsafe.As <byte, int>(ref Unsafe.AddByteOffset <byte>(ref b, i + 8)) = 0; Unsafe.As <byte, int>(ref Unsafe.AddByteOffset <byte>(ref b, i + 12)) = 0; #endif i = counter; // See notes above for why this wasn't used instead // i += 16; }while (counter <= end); if ((byteLength & 8) != 0) { #if BIT64 Unsafe.As <byte, long>(ref Unsafe.AddByteOffset <byte>(ref b, i)) = 0; #else Unsafe.As <byte, int>(ref Unsafe.AddByteOffset <byte>(ref b, i)) = 0; Unsafe.As <byte, int>(ref Unsafe.AddByteOffset <byte>(ref b, i + 4)) = 0; #endif i += 8; } if ((byteLength & 4) != 0) { Unsafe.As <byte, int>(ref Unsafe.AddByteOffset <byte>(ref b, i)) = 0; i += 4; } if ((byteLength & 2) != 0) { Unsafe.As <byte, short>(ref Unsafe.AddByteOffset <byte>(ref b, i)) = 0; i += 2; } if ((byteLength & 1) != 0) { Unsafe.AddByteOffset <byte>(ref b, i) = 0; // We're not using i after this, so not needed // i += 1; } return; #endif PInvoke: RuntimeImports.RhZeroMemory(ref b, byteLength); }
/// <inheritdoc /> protected override void OnFrameApply(ImageFrame <TPixel> source) { DenseMatrix <float>[] kernels = this.Kernels.Flatten(); int startY = this.SourceRectangle.Y; int endY = this.SourceRectangle.Bottom; int startX = this.SourceRectangle.X; int endX = this.SourceRectangle.Right; // Align start/end positions. int minX = Math.Max(0, startX); int maxX = Math.Min(source.Width, endX); int minY = Math.Max(0, startY); int maxY = Math.Min(source.Height, endY); // we need a clean copy for each pass to start from using (ImageFrame <TPixel> cleanCopy = source.Clone()) { using (var processor = new ConvolutionProcessor <TPixel>(this.Configuration, kernels[0], true, this.Source, this.SourceRectangle)) { processor.Apply(source); } if (kernels.Length == 1) { return; } int shiftY = startY; int shiftX = startX; // Reset offset if necessary. if (minX > 0) { shiftX = 0; } if (minY > 0) { shiftY = 0; } var workingRect = Rectangle.FromLTRB(minX, minY, maxX, maxY); // Additional runs. // ReSharper disable once ForCanBeConvertedToForeach for (int i = 1; i < kernels.Length; i++) { using (ImageFrame <TPixel> pass = cleanCopy.Clone()) { using (var processor = new ConvolutionProcessor <TPixel>(this.Configuration, kernels[i], true, this.Source, this.SourceRectangle)) { processor.Apply(pass); } Buffer2D <TPixel> passPixels = pass.PixelBuffer; Buffer2D <TPixel> targetPixels = source.PixelBuffer; ParallelHelper.IterateRows( workingRect, this.Configuration, rows => { for (int y = rows.Min; y < rows.Max; y++) { int offsetY = y - shiftY; ref TPixel passPixelsBase = ref MemoryMarshal.GetReference(passPixels.GetRowSpan(offsetY)); ref TPixel targetPixelsBase = ref MemoryMarshal.GetReference(targetPixels.GetRowSpan(offsetY)); for (int x = minX; x < maxX; x++) { int offsetX = x - shiftX; // Grab the max components of the two pixels ref TPixel currentPassPixel = ref Unsafe.Add(ref passPixelsBase, offsetX); ref TPixel currentTargetPixel = ref Unsafe.Add(ref targetPixelsBase, offsetX); var pixelValue = Vector4.Max( currentPassPixel.ToVector4(), currentTargetPixel.ToVector4()); currentTargetPixel.FromVector4(pixelValue); } } });
public PointerBasedEnumerator(void *origin, int offset, int length) { _ptr = Unsafe.Add <T>(origin, offset); _remaining = length; }
private static void CopyContactData(int contactCount, ref NonconvexConstraintContactData sourceContacts, ref NonconvexPrestepData targetContacts) { for (int i = 0; i < contactCount; ++i) { ref var sourceContact = ref Unsafe.Add(ref sourceContacts, i); ref var targetContact = ref Unsafe.Add(ref targetContacts, i);
/// <inheritdoc/> protected override void OnFrameApply( ImageFrame <TPixel> source, ImageFrame <TPixel> destination, Rectangle sourceRectangle, Configuration configuration) { int height = this.TargetDimensions.Height; int width = this.TargetDimensions.Width; Rectangle sourceBounds = source.Bounds(); var targetBounds = new Rectangle(0, 0, width, height); // Since could potentially be resizing the canvas we might need to re-calculate the matrix Matrix3x2 matrix = this.GetProcessingMatrix(sourceBounds, targetBounds); // Convert from screen to world space. Matrix3x2.Invert(matrix, out matrix); if (this.Sampler is NearestNeighborResampler) { ParallelHelper.IterateRows( targetBounds, configuration, rows => { for (int y = rows.Min; y < rows.Max; y++) { Span <TPixel> destRow = destination.GetPixelRowSpan(y); for (int x = 0; x < width; x++) { var point = Point.Transform(new Point(x, y), matrix); if (sourceBounds.Contains(point.X, point.Y)) { destRow[x] = source[point.X, point.Y]; } } } }); return; } int maxSourceX = source.Width - 1; int maxSourceY = source.Height - 1; (float radius, float scale, float ratio)xRadiusScale = this.GetSamplingRadius(source.Width, destination.Width); (float radius, float scale, float ratio)yRadiusScale = this.GetSamplingRadius(source.Height, destination.Height); float xScale = xRadiusScale.scale; float yScale = yRadiusScale.scale; var radius = new Vector2(xRadiusScale.radius, yRadiusScale.radius); IResampler sampler = this.Sampler; var maxSource = new Vector4(maxSourceX, maxSourceY, maxSourceX, maxSourceY); int xLength = (int)MathF.Ceiling((radius.X * 2) + 2); int yLength = (int)MathF.Ceiling((radius.Y * 2) + 2); MemoryAllocator memoryAllocator = configuration.MemoryAllocator; using (Buffer2D <float> yBuffer = memoryAllocator.Allocate2D <float>(yLength, height)) using (Buffer2D <float> xBuffer = memoryAllocator.Allocate2D <float>(xLength, height)) { ParallelHelper.IterateRows( targetBounds, configuration, rows => { for (int y = rows.Min; y < rows.Max; y++) { ref TPixel destRowRef = ref MemoryMarshal.GetReference(destination.GetPixelRowSpan(y)); ref float ySpanRef = ref MemoryMarshal.GetReference(yBuffer.GetRowSpan(y)); ref float xSpanRef = ref MemoryMarshal.GetReference(xBuffer.GetRowSpan(y)); for (int x = 0; x < width; x++) { // Use the single precision position to calculate correct bounding pixels // otherwise we get rogue pixels outside of the bounds. var point = Vector2.Transform(new Vector2(x, y), matrix); // Clamp sampling pixel radial extents to the source image edges Vector2 maxXY = point + radius; Vector2 minXY = point - radius; // max, maxY, minX, minY var extents = new Vector4( MathF.Floor(maxXY.X + .5F), MathF.Floor(maxXY.Y + .5F), MathF.Ceiling(minXY.X - .5F), MathF.Ceiling(minXY.Y - .5F)); int right = (int)extents.X; int bottom = (int)extents.Y; int left = (int)extents.Z; int top = (int)extents.W; extents = Vector4.Clamp(extents, Vector4.Zero, maxSource); int maxX = (int)extents.X; int maxY = (int)extents.Y; int minX = (int)extents.Z; int minY = (int)extents.W; if (minX == maxX || minY == maxY) { continue; } // It appears these have to be calculated on-the-fly. // Precalculating transformed weights would require prior knowledge of every transformed pixel location // since they can be at sub-pixel positions on both axis. // I've optimized where I can but am always open to suggestions. if (yScale > 1 && xScale > 1) { CalculateWeightsDown( top, bottom, minY, maxY, point.Y, sampler, yScale, ref ySpanRef, yLength); CalculateWeightsDown( left, right, minX, maxX, point.X, sampler, xScale, ref xSpanRef, xLength); } else { CalculateWeightsScaleUp(minY, maxY, point.Y, sampler, ref ySpanRef); CalculateWeightsScaleUp(minX, maxX, point.X, sampler, ref xSpanRef); } // Now multiply the results against the offsets Vector4 sum = Vector4.Zero; for (int yy = 0, j = minY; j <= maxY; j++, yy++) { float yWeight = Unsafe.Add(ref ySpanRef, yy); for (int xx = 0, i = minX; i <= maxX; i++, xx++) { float xWeight = Unsafe.Add(ref xSpanRef, xx); // Values are first premultiplied to prevent darkening of edge pixels var current = source[i, j].ToVector4(); Vector4Utils.Premultiply(ref current); sum += current * xWeight * yWeight; } } ref TPixel dest = ref Unsafe.Add(ref destRowRef, x); // Reverse the premultiplication Vector4Utils.UnPremultiply(ref sum); dest.FromVector4(sum); }
public static void StoreValueTypeFieldValueIntoValueType(TypedReference typedReference, int fieldOffset, object fieldValue, RuntimeTypeHandle fieldTypeHandle) { Debug.Assert(TypedReference.TargetTypeToken(typedReference).ToEETypePtr().IsValueType); RuntimeImports.RhUnbox(fieldValue, ref Unsafe.Add <byte>(ref typedReference.Value, fieldOffset), fieldTypeHandle.ToEETypePtr()); }
/// <inheritdoc/> protected override void OnFrameApply(ImageFrame <TPixel> source) { int sourceWidth = source.Width; int sourceHeight = source.Height; int tileWidth = (int)MathF.Ceiling(sourceWidth / (float)this.Tiles); int tileHeight = (int)MathF.Ceiling(sourceHeight / (float)this.Tiles); int tileCount = this.Tiles; int halfTileWidth = tileWidth / 2; int halfTileHeight = tileHeight / 2; int luminanceLevels = this.LuminanceLevels; // The image is split up into tiles. For each tile the cumulative distribution function will be calculated. using (var cdfData = new CdfTileData(this.Configuration, sourceWidth, sourceHeight, this.Tiles, this.Tiles, tileWidth, tileHeight, luminanceLevels)) { cdfData.CalculateLookupTables(source, this); var tileYStartPositions = new List <(int y, int cdfY)>(); int cdfY = 0; int yStart = halfTileHeight; for (int tile = 0; tile < tileCount - 1; tile++) { tileYStartPositions.Add((yStart, cdfY)); cdfY++; yStart += tileHeight; } Parallel.For( 0, tileYStartPositions.Count, new ParallelOptions { MaxDegreeOfParallelism = this.Configuration.MaxDegreeOfParallelism }, index => { int y = tileYStartPositions[index].y; int cdfYY = tileYStartPositions[index].cdfY; // It's unfortunate that we have to do this per iteration. ref TPixel sourceBase = ref source.GetPixelReference(0, 0); int cdfX = 0; int x = halfTileWidth; for (int tile = 0; tile < tileCount - 1; tile++) { int tileY = 0; int yEnd = Math.Min(y + tileHeight, sourceHeight); int xEnd = Math.Min(x + tileWidth, sourceWidth); for (int dy = y; dy < yEnd; dy++) { int dyOffSet = dy * sourceWidth; int tileX = 0; for (int dx = x; dx < xEnd; dx++) { ref TPixel pixel = ref Unsafe.Add(ref sourceBase, dyOffSet + dx); float luminanceEqualized = InterpolateBetweenFourTiles( pixel, cdfData, tileCount, tileCount, tileX, tileY, cdfX, cdfYY, tileWidth, tileHeight, luminanceLevels); pixel.FromVector4(new Vector4(luminanceEqualized, luminanceEqualized, luminanceEqualized, pixel.ToVector4().W)); tileX++; } tileY++; } cdfX++; x += tileWidth; } });
public static void StoreReferenceTypeFieldValueIntoValueType(TypedReference typedReference, int fieldOffset, object fieldValue) { Debug.Assert(TypedReference.TargetTypeToken(typedReference).ToEETypePtr().IsValueType); Unsafe.As <byte, object>(ref Unsafe.Add <byte>(ref typedReference.Value, fieldOffset)) = fieldValue; }
/// <summary> /// Finds the assignments with the lowest global assignment cost. /// See https://en.wikipedia.org/wiki/Hungarian_algorithm for an explanation of the algorithm used. /// </summary> /// <param name="assignmentCostsDoubles">The assignment costs.</param> /// <returns>System.Int32[].</returns> /// <exception cref="ArgumentNullException">assignmentCosts</exception> /// <exception cref="ArgumentException">This algorithm implementation does not support cost matrices with fewer columns than rows - assignmentCosts</exception> public static int[] FindAssignments([NotNull] Matrix <double> assignmentCostsDoubles) { if (assignmentCostsDoubles == null) { throw new ArgumentNullException(nameof(assignmentCostsDoubles)); } var rows = assignmentCostsDoubles.RowCount; var columns = assignmentCostsDoubles.ColumnCount; if (rows > columns) { throw new ArgumentException("This algorithm implementation does not support cost matrices with fewer columns than rows", nameof(assignmentCostsDoubles)); } var costs = new Storage <float>(rows, columns); for (var row = 0; row < rows; row++) { var min = float.MaxValue; for (var column = 0; column < columns; column++) { var cost = (float)assignmentCostsDoubles[row, column]; if (float.IsNegativeInfinity(cost)) { costs[row, column] = cost = float.MinValue; } else if (float.IsPositiveInfinity(cost) || float.IsNaN(cost)) { costs[row, column] = cost = float.MaxValue; } else { costs[row, column] = cost; } min = Math.Min(min, cost); } if (float.IsInfinity(min)) { min = float.MinValue; } for (var column = 0; column < columns; column++) { costs[row, column] -= min; } } var masks = new Storage <byte>(rows, columns); var rowsCovered = new bool[rows]; var colsCovered = new bool[columns]; for (var row = 0; row < rows; row++) { for (var column = 0; column < columns; column++) { if (Math.Abs(costs[row, column]) <= 0 && !rowsCovered[row] && !colsCovered[column]) { masks[row, column] = 1; rowsCovered[row] = colsCovered[column] = true; } } } ClearCoveredFlags(); var path = new Location[columns * rows]; var pathStart = default(Location); var step = 1; while (step != -1) { switch (step) { case 1: step = RunStep1(); break; case 2: step = RunStep2(); break; case 3: step = RunStep3(); break; case 4: step = RunStep4(); break; default: throw new Exception($"Unknown step number {step}"); } } var agentsTasks = new int[rows]; for (var row = 0; row < rows; row++) { for (var column = 0; column < columns; column++) { if (masks[row, column] == 1) { agentsTasks[row] = column; break; } } } return(agentsTasks); int RunStep1() { // The covered flags have are reset before this step var coveredColsCount = 0; if (Avx2.IsSupported && rows >= Vector256 <byte> .Count) { var maxVectorOffset = rows - rows % Vector256 <byte> .Count; if (maxVectorOffset > 0) { var onesVector = Vector256.Create((byte)1); for (var column = 0; column < columns; column++) { var currentColCovered = false; ref var rowRef = ref masks.ColumnMajorBackingStore[column * rows]; for (var row = 0; row < maxVectorOffset; row += Vector256 <byte> .Count) { var masksRowVector = Unsafe.ReadUnaligned <Vector256 <byte> >(ref Unsafe.Add(ref rowRef, row)); var comparison = Avx2.CompareEqual(masksRowVector, onesVector); var comparisonMask = Avx2.MoveMask(comparison); if (comparisonMask != 0) { colsCovered[column] = true; currentColCovered = true; coveredColsCount++; break; } } if (!currentColCovered && maxVectorOffset < rows) { for (var row = maxVectorOffset; row < rows; row++) { if (Unsafe.Add(ref rowRef, row) == 1) { colsCovered[column] = true; coveredColsCount++; break; } } } } } }
public static object LoadReferenceTypeFieldValueFromValueType(TypedReference typedReference, int fieldOffset) { Debug.Assert(TypedReference.TargetTypeToken(typedReference).ToEETypePtr().IsValueType); return(Unsafe.As <byte, object>(ref Unsafe.Add <byte>(ref typedReference.Value, fieldOffset))); }