// Where stringToEscape is intended to be URI data, but not an entire URI. // This method will escape any character that is not an unreserved character, including percent signs. public static string EscapeDataString(string stringToEscape) => UriHelper.EscapeString(stringToEscape, checkExistingEscaped: false, UriHelper.UnreservedTable);
// // Resolves into either baseUri or relativeUri according to conditions OR if not possible it uses newUriString // to return combined URI strings from both Uris // otherwise if e != null on output the operation has failed // internal static Uri?ResolveHelper(Uri baseUri, Uri?relativeUri, ref string?newUriString, ref bool userEscaped) { Debug.Assert(!baseUri.IsNotAbsoluteUri && !baseUri.UserDrivenParsing, "Uri::ResolveHelper()|baseUri is not Absolute or is controlled by User Parser."); string relativeStr; if ((object?)relativeUri != null) { if (relativeUri.IsAbsoluteUri) { return(relativeUri); } relativeStr = relativeUri.OriginalString; userEscaped = relativeUri.UserEscaped; } else { relativeStr = string.Empty; } // Here we can assert that passed "relativeUri" is indeed a relative one if (relativeStr.Length > 0 && (UriHelper.IsLWS(relativeStr[0]) || UriHelper.IsLWS(relativeStr[relativeStr.Length - 1]))) { relativeStr = relativeStr.Trim(UriHelper.s_WSchars); } if (relativeStr.Length == 0) { newUriString = baseUri.GetParts(UriComponents.AbsoluteUri, baseUri.UserEscaped ? UriFormat.UriEscaped : UriFormat.SafeUnescaped); return(null); } // Check for a simple fragment in relative part if (relativeStr[0] == '#' && !baseUri.IsImplicitFile && baseUri.Syntax !.InFact(UriSyntaxFlags.MayHaveFragment)) { newUriString = baseUri.GetParts(UriComponents.AbsoluteUri & ~UriComponents.Fragment, UriFormat.UriEscaped) + relativeStr; return(null); } // Check for a simple query in relative part if (relativeStr[0] == '?' && !baseUri.IsImplicitFile && baseUri.Syntax !.InFact(UriSyntaxFlags.MayHaveQuery)) { newUriString = baseUri.GetParts(UriComponents.AbsoluteUri & ~UriComponents.Query & ~UriComponents.Fragment, UriFormat.UriEscaped) + relativeStr; return(null); } // Check on the DOS path in the relative Uri (a special case) if (relativeStr.Length >= 3 && (relativeStr[1] == ':' || relativeStr[1] == '|') && UriHelper.IsAsciiLetter(relativeStr[0]) && (relativeStr[2] == '\\' || relativeStr[2] == '/')) { if (baseUri.IsImplicitFile) { // It could have file:/// prepended to the result but we want to keep it as *Implicit* File Uri newUriString = relativeStr; return(null); } else if (baseUri.Syntax !.InFact(UriSyntaxFlags.AllowDOSPath)) { // The scheme is not changed just the path gets replaced string prefix; if (baseUri.InFact(Flags.AuthorityFound)) { prefix = baseUri.Syntax.InFact(UriSyntaxFlags.PathIsRooted) ? ":///" : "://"; } else { prefix = baseUri.Syntax.InFact(UriSyntaxFlags.PathIsRooted) ? ":/" : ":"; } newUriString = baseUri.Scheme + prefix + relativeStr; return(null); } // If we are here then input like "http://host/path/" + "C:\x" will produce the result http://host/path/c:/x } GetCombinedString(baseUri, relativeStr, userEscaped, ref newUriString); if (ReferenceEquals(newUriString, baseUri._string)) { return(baseUri); } return(null); }
// // Need to check for invalid utf sequences that may not have given any chars. // We got the unescaped chars, we then re-encode them and match off the bytes // to get the invalid sequence bytes that we just copy off // internal static unsafe void MatchUTF8Sequence(char *pDest, char[] dest, ref int destOffset, Span <char> unescapedChars, int charCount, byte[] bytes, int byteCount, bool isQuery, bool iriParsing) { Span <byte> maxUtf8EncodedSpan = stackalloc byte[4]; int count = 0; fixed(char *unescapedCharsPtr = unescapedChars) { for (int j = 0; j < charCount; ++j) { bool isHighSurr = char.IsHighSurrogate(unescapedCharsPtr[j]); Span <byte> encodedBytes = maxUtf8EncodedSpan; int bytesWritten = Encoding.UTF8.GetBytes(unescapedChars.Slice(j, isHighSurr ? 2 : 1), encodedBytes); encodedBytes = encodedBytes.Slice(0, bytesWritten); // we have to keep unicode chars outside Iri range escaped bool inIriRange = false; if (iriParsing) { if (!isHighSurr) { inIriRange = IriHelper.CheckIriUnicodeRange(unescapedChars[j], isQuery); } else { bool surrPair = false; inIriRange = IriHelper.CheckIriUnicodeRange(unescapedChars[j], unescapedChars[j + 1], ref surrPair, isQuery); } } while (true) { // Escape any invalid bytes that were before this character while (bytes[count] != encodedBytes[0]) { Debug.Assert(dest.Length > destOffset, "Destination length exceeded destination offset."); EscapeAsciiChar((char)bytes[count++], dest, ref destOffset); } // check if all bytes match bool allBytesMatch = true; int k = 0; for (; k < encodedBytes.Length; ++k) { if (bytes[count + k] != encodedBytes[k]) { allBytesMatch = false; break; } } if (allBytesMatch) { count += encodedBytes.Length; if (iriParsing) { if (!inIriRange) { // need to keep chars not allowed as escaped for (int l = 0; l < encodedBytes.Length; ++l) { Debug.Assert(dest.Length > destOffset, "Destination length exceeded destination offset."); EscapeAsciiChar((char)encodedBytes[l], dest, ref destOffset); } } else if (!UriHelper.IsBidiControlCharacter(unescapedCharsPtr[j]) || !UriParser.DontKeepUnicodeBidiFormattingCharacters) { //copy chars Debug.Assert(dest.Length > destOffset, "Destination length exceeded destination offset."); pDest[destOffset++] = unescapedCharsPtr[j]; if (isHighSurr) { Debug.Assert(dest.Length > destOffset, "Destination length exceeded destination offset."); pDest[destOffset++] = unescapedCharsPtr[j + 1]; } } } else { //copy chars Debug.Assert(dest.Length > destOffset, "Destination length exceeded destination offset."); pDest[destOffset++] = unescapedCharsPtr[j]; if (isHighSurr) { Debug.Assert(dest.Length > destOffset, "Destination length exceeded destination offset."); pDest[destOffset++] = unescapedCharsPtr[j + 1]; } } break; // break out of while (true) since we've matched this char bytes } else { // copy bytes till place where bytes don't match for (int l = 0; l < k; ++l) { Debug.Assert(dest.Length > destOffset, "Destination length exceeded destination offset."); EscapeAsciiChar((char)bytes[count++], dest, ref destOffset); } } } if (isHighSurr) { j++; } } } // Include any trailing invalid sequences while (count < byteCount) { Debug.Assert(dest.Length > destOffset, "Destination length exceeded destination offset."); EscapeAsciiChar((char)bytes[count++], dest, ref destOffset); } }
internal unsafe static string UnicodeEquivalent(char *hostname, int start, int end, ref bool allAscii, ref bool atLeastOneValidIdn) { IdnMapping map = new IdnMapping(); // hostname already validated allAscii = true; atLeastOneValidIdn = false; string idn = null; if (end <= start) { return(idn); } string unescapedHostname = UriHelper.StripBidiControlCharacter(hostname, start, (end - start)); string unicodeEqvlHost = null; int curPos = 0; int newPos = 0; int length = unescapedHostname.Length; bool asciiLabel = true; bool foundAce = false; bool checkedAce = false; bool foundDot = false; // We run a loop where for every label // a) if label is ascii and no ace then we lowercase it // b) if label is ascii and ace and not valid idn then just lowercase it // c) if label is ascii and ace and is valid idn then get its unicode eqvl // d) if label is unicode then clean it by running it through idnmapping do { asciiLabel = true; foundAce = false; checkedAce = false; foundDot = false; //find the dot or hit the end newPos = curPos; while (newPos < length) { char c = unescapedHostname[newPos]; if (!checkedAce) { checkedAce = true; if ((newPos + 3 < length) && (c == 'x') && IsIdnAce(unescapedHostname, newPos)) { foundAce = true; } } if (asciiLabel && (c > '\x7F')) { asciiLabel = false; allAscii = false; } if ((c == '.') || (c == '\u3002') || //IDEOGRAPHIC FULL STOP (c == '\uFF0E') || //FULLWIDTH FULL STOP (c == '\uFF61')) //HALFWIDTH IDEOGRAPHIC FULL STOP { foundDot = true; break; } ++newPos; } if (!asciiLabel) { string asciiForm = unescapedHostname.Substring(curPos, newPos - curPos); try { asciiForm = map.GetAscii(asciiForm); } catch (ArgumentException) { throw new UriFormatException(SR.net_uri_BadUnicodeHostForIdn); } unicodeEqvlHost += map.GetUnicode(asciiForm); if (foundDot) { unicodeEqvlHost += "."; } } else { bool aceValid = false; if (foundAce) { // check ace validity try { unicodeEqvlHost += map.GetUnicode(unescapedHostname.Substring(curPos, newPos - curPos)); if (foundDot) { unicodeEqvlHost += "."; } aceValid = true; atLeastOneValidIdn = true; } catch (ArgumentException) { // not valid ace so treat it as a normal ascii label } } if (!aceValid) { // for invalid aces we just lowercase the label unicodeEqvlHost += unescapedHostname.Substring(curPos, newPos - curPos).ToLowerInvariant(); if (foundDot) { unicodeEqvlHost += "."; } } } curPos = newPos + (foundDot ? 1 : 0); } while (curPos < length); return(unicodeEqvlHost); }
public static unsafe int UnescapePercentEncodedUTF8Sequence(char *input, int length, ref ValueStringBuilder dest, bool isQuery, bool iriParsing) { // The following assertions rely on the input not mutating mid-operation, as is the case currently since callers are working with strings // If we start accepting input such as spans, this method must be audited to ensure no buffer overruns/infinite loops could occur // As an optimization, this method should only be called after the first character is known to be a part of a non-ascii UTF8 sequence Debug.Assert(length >= 3); Debug.Assert(input[0] == '%'); Debug.Assert(UriHelper.DecodeHexChars(input[1], input[2]) != Uri.c_DummyChar); Debug.Assert(UriHelper.DecodeHexChars(input[1], input[2]) >= 128); uint fourByteBuffer = 0; int bytesLeftInBuffer = 0; int totalCharsConsumed = 0; int charsToCopy = 0; int bytesConsumed = 0; RefillBuffer: int i = totalCharsConsumed + (bytesLeftInBuffer * 3); ReadByteFromInput: if ((uint)(length - i) <= 2 || input[i] != '%') { goto NoMoreOrInvalidInput; } uint value = input[i + 1]; if ((uint)((value - 'A') & ~0x20) <= ('F' - 'A')) { value = (value | 0x20) - 'a' + 10; } else if ((value - '8') <= ('9' - '8')) { value -= '0'; } else { goto NoMoreOrInvalidInput; // First character wasn't hex or was <= 7F (Ascii) } uint second = (uint)input[i + 2] - '0'; if (second <= 9) { // second is already [0, 9] } else if ((uint)((second - ('A' - '0')) & ~0x20) <= ('F' - 'A')) { second = ((second + '0') | 0x20) - 'a' + 10; } else { goto NoMoreOrInvalidInput; // Second character wasn't Hex } value = (value << 4) | second; Debug.Assert(value >= 128); // Rotate the buffer and overwrite the last byte if (BitConverter.IsLittleEndian) { fourByteBuffer = (fourByteBuffer >> 8) | (value << 24); } else { fourByteBuffer = (fourByteBuffer << 8) | value; } if (++bytesLeftInBuffer != 4) { i += 3; goto ReadByteFromInput; } DecodeRune: Debug.Assert(totalCharsConsumed % 3 == 0); Debug.Assert(bytesLeftInBuffer == 2 || bytesLeftInBuffer == 3 || bytesLeftInBuffer == 4); Debug.Assert((fourByteBuffer & (BitConverter.IsLittleEndian ? 0x00000080 : 0x80000000)) != 0); Debug.Assert((fourByteBuffer & (BitConverter.IsLittleEndian ? 0x00008000 : 0x00800000)) != 0); Debug.Assert(bytesLeftInBuffer < 3 || (fourByteBuffer & (BitConverter.IsLittleEndian ? 0x00800000 : 0x00008000)) != 0); Debug.Assert(bytesLeftInBuffer < 4 || (fourByteBuffer & (BitConverter.IsLittleEndian ? 0x80000000 : 0x00000080)) != 0); uint temp = fourByteBuffer; // make a copy so that the *copy* (not the original) is marked address-taken if (Rune.DecodeFromUtf8(new ReadOnlySpan <byte>(&temp, bytesLeftInBuffer), out Rune rune, out bytesConsumed) == OperationStatus.Done) { Debug.Assert(bytesConsumed >= 2, $"Rune.DecodeFromUtf8 consumed {bytesConsumed} bytes, likely indicating input was modified concurrently during UnescapePercentEncodedUTF8Sequence's execution"); if (!iriParsing || IriHelper.CheckIriUnicodeRange((uint)rune.Value, isQuery)) { if (charsToCopy != 0) { dest.Append(input + totalCharsConsumed - charsToCopy, charsToCopy); charsToCopy = 0; } dest.Append(rune); goto AfterDecodeRune; } }
// // IRI normalization for strings containing characters that are not allowed or // escaped characters that should be unescaped in the context of the specified Uri component. // internal static unsafe string EscapeUnescapeIri(char *pInput, int start, int end, UriComponents component) { int size = end - start; ValueStringBuilder dest = new ValueStringBuilder(size); byte[]? bytes = null; int next = start; char ch; for (; next < end; ++next) { if ((ch = pInput[next]) == '%') { if (next + 2 < end) { ch = UriHelper.EscapedAscii(pInput[next + 1], pInput[next + 2]); // Do not unescape a reserved char if (ch == Uri.c_DummyChar || ch == '%' || CheckIsReserved(ch, component) || UriHelper.IsNotSafeForUnescape(ch)) { // keep as is dest.Append(pInput[next++]); dest.Append(pInput[next++]); dest.Append(pInput[next]); continue; } else if (ch <= '\x7F') { Debug.Assert(ch < 0xFF, "Expecting ASCII character."); //ASCII dest.Append(ch); next += 2; continue; } else { // possibly utf8 encoded sequence of unicode // check if safe to unescape according to Iri rules Debug.Assert(ch < 0xFF, "Expecting ASCII character."); int startSeq = next; int byteCount = 1; // lazy initialization of max size, will reuse the array for next sequences if ((object?)bytes == null) { bytes = new byte[end - next]; } bytes[0] = (byte)ch; next += 3; while (next < end) { // Check on exit criterion if ((ch = pInput[next]) != '%' || next + 2 >= end) { break; } // already made sure we have 3 characters in str ch = UriHelper.EscapedAscii(pInput[next + 1], pInput[next + 2]); //invalid hex sequence ? if (ch == Uri.c_DummyChar) { break; } // character is not part of a UTF-8 sequence ? else if (ch < '\x80') { break; } else { //a UTF-8 sequence bytes[byteCount++] = (byte)ch; next += 3; } Debug.Assert(ch < 0xFF, "Expecting ASCII character."); } next--; // for loop will increment // Using encoder with no replacement fall-back will skip all invalid UTF-8 sequences. Encoding noFallbackCharUTF8 = Encoding.GetEncoding( Encoding.UTF8.CodePage, new EncoderReplacementFallback(""), new DecoderReplacementFallback("")); char[] unescapedChars = new char[bytes.Length]; int charCount = noFallbackCharUTF8.GetChars(bytes, 0, byteCount, unescapedChars, 0); if (charCount != 0) { // If invalid sequences were present in the original escaped string, we need to // copy the escaped versions of those sequences. // Decoded Unicode values will be kept only when they are allowed by the URI/IRI RFC // rules. UriHelper.MatchUTF8Sequence(ref dest, unescapedChars, charCount, bytes, byteCount, component == UriComponents.Query, true); } else { // copy escaped sequence as is for (int i = startSeq; i <= next; ++i) { dest.Append(pInput[i]); } } } } else { dest.Append(pInput[next]); } } else if (ch > '\x7f') { // unicode bool escape; bool surrogatePair = false; char ch2 = '\0'; if ((char.IsHighSurrogate(ch)) && (next + 1 < end)) { ch2 = pInput[next + 1]; escape = !CheckIriUnicodeRange(ch, ch2, ref surrogatePair, component == UriComponents.Query); } else { escape = !CheckIriUnicodeRange(ch, component == UriComponents.Query); } if (escape) { Span <byte> encodedBytes = stackalloc byte[4]; Rune rune; if (surrogatePair) { rune = new Rune(ch, ch2); } else if (!Rune.TryCreate(ch, out rune)) { rune = Rune.ReplacementChar; } int bytesWritten = rune.EncodeToUtf8(encodedBytes); encodedBytes = encodedBytes.Slice(0, bytesWritten); foreach (byte b in encodedBytes) { UriHelper.EscapeAsciiChar(b, ref dest); } } else { dest.Append(ch); if (surrogatePair) { dest.Append(ch2); } } if (surrogatePair) { next++; } } else { // just copy the character dest.Append(pInput[next]); } } string result = dest.ToString(); return(result); }
// // Check reserved chars according to RFC 3987 in a specific component // internal static bool CheckIsReserved(char ch, UriComponents component) { if ((component != UriComponents.Scheme) && (component != UriComponents.UserInfo) && (component != UriComponents.Host) && (component != UriComponents.Port) && (component != UriComponents.Path) && (component != UriComponents.Query) && (component != UriComponents.Fragment) ) { return((component == (UriComponents)0) ? UriHelper.IsGenDelim(ch) : false); } else if (UriParser.DontEnableStrictRFC3986ReservedCharacterSets) { // Since we aren't enabling strict RFC 3986 reserved sets, we stick with the old behavior // (for app-compat) which was a broken mix of RFCs 2396 and 3986. switch (component) { case UriComponents.UserInfo: if (ch == '/' || ch == '?' || ch == '#' || ch == '[' || ch == ']' || ch == '@') { return(true); } break; case UriComponents.Host: if (ch == ':' || ch == '/' || ch == '?' || ch == '#' || ch == '[' || ch == ']' || ch == '@') { return(true); } break; case UriComponents.Path: if (ch == '/' || ch == '?' || ch == '#' || ch == '[' || ch == ']') { return(true); } break; case UriComponents.Query: if (ch == '#' || ch == '[' || ch == ']') { return(true); } break; case UriComponents.Fragment: if (ch == '#' || ch == '[' || ch == ']') { return(true); } break; default: break; } return(false); } else { return(UriHelper.RFC3986ReservedMarks.IndexOf(ch) >= 0); } }
// // IRI normalization for strings containing characters that are not allowed or // escaped characters that should be unescaped in the context of the specified Uri component. // internal static unsafe string EscapeUnescapeIri(char *pInput, int start, int end, UriComponents component) { char[] dest = new char[end - start]; byte[] bytes = null; // Pin the array to do pointer accesses GCHandle destHandle = GCHandle.Alloc(dest, GCHandleType.Pinned); char * pDest = (char *)destHandle.AddrOfPinnedObject(); const int percentEncodingLen = 3; // Escaped UTF-8 will take 3 chars: %AB. const int bufferCapacityIncrease = 30 * percentEncodingLen; int bufferRemaining = 0; int next = start; int destOffset = 0; char ch; bool escape = false; bool surrogatePair = false; for (; next < end; ++next) { escape = false; surrogatePair = false; if ((ch = pInput[next]) == '%') { if (next + 2 < end) { ch = UriHelper.EscapedAscii(pInput[next + 1], pInput[next + 2]); // Do not unescape a reserved char if (ch == Uri.c_DummyChar || ch == '%' || CheckIsReserved(ch, component) || UriHelper.IsNotSafeForUnescape(ch)) { // keep as is Debug.Assert(dest.Length > destOffset, "Destination length exceeded destination offset."); pDest[destOffset++] = pInput[next++]; Debug.Assert(dest.Length > destOffset, "Destination length exceeded destination offset."); pDest[destOffset++] = pInput[next++]; Debug.Assert(dest.Length > destOffset, "Destination length exceeded destination offset."); pDest[destOffset++] = pInput[next]; continue; } else if (ch <= '\x7F') { Debug.Assert(ch < 0xFF, "Expecting ASCII character."); Debug.Assert(dest.Length > destOffset, "Destination length exceeded destination offset."); //ASCII pDest[destOffset++] = ch; next += 2; continue; } else { // possibly utf8 encoded sequence of unicode // check if safe to unescape according to Iri rules Debug.Assert(ch < 0xFF, "Expecting ASCII character."); int startSeq = next; int byteCount = 1; // lazy initialization of max size, will reuse the array for next sequences if ((object)bytes == null) { bytes = new byte[end - next]; } bytes[0] = (byte)ch; next += 3; while (next < end) { // Check on exit criterion if ((ch = pInput[next]) != '%' || next + 2 >= end) { break; } // already made sure we have 3 characters in str ch = UriHelper.EscapedAscii(pInput[next + 1], pInput[next + 2]); //invalid hex sequence ? if (ch == Uri.c_DummyChar) { break; } // character is not part of a UTF-8 sequence ? else if (ch < '\x80') { break; } else { //a UTF-8 sequence bytes[byteCount++] = (byte)ch; next += 3; } Debug.Assert(ch < 0xFF, "Expecting ASCII character."); } next--; // for loop will increment // Using encoder with no replacement fall-back will skip all invalid UTF-8 sequences. Encoding noFallbackCharUTF8 = Encoding.GetEncoding( Encoding.UTF8.CodePage, new EncoderReplacementFallback(""), new DecoderReplacementFallback("")); char[] unescapedChars = new char[bytes.Length]; int charCount = noFallbackCharUTF8.GetChars(bytes, 0, byteCount, unescapedChars, 0); if (charCount != 0) { // If invalid sequences were present in the original escaped string, we need to // copy the escaped versions of those sequences. // Decoded Unicode values will be kept only when they are allowed by the URI/IRI RFC // rules. UriHelper.MatchUTF8Sequence(pDest, dest, ref destOffset, unescapedChars, charCount, bytes, byteCount, component == UriComponents.Query, true); } else { // copy escaped sequence as is for (int i = startSeq; i <= next; ++i) { Debug.Assert(dest.Length > destOffset, "Destination length exceeded destination offset."); pDest[destOffset++] = pInput[i]; } } } } else { Debug.Assert(dest.Length > destOffset, "Destination length exceeded destination offset."); pDest[destOffset++] = pInput[next]; } } else if (ch > '\x7f') { // unicode char ch2; if ((char.IsHighSurrogate(ch)) && (next + 1 < end)) { ch2 = pInput[next + 1]; escape = !CheckIriUnicodeRange(ch, ch2, ref surrogatePair, component == UriComponents.Query); if (!escape) { // copy the two chars Debug.Assert(dest.Length > destOffset, "Destination length exceeded destination offset."); pDest[destOffset++] = pInput[next++]; Debug.Assert(dest.Length > destOffset, "Destination length exceeded destination offset."); pDest[destOffset++] = pInput[next]; } } else { if (CheckIriUnicodeRange(ch, component == UriComponents.Query)) { if (!UriHelper.IsBidiControlCharacter(ch) || !UriParser.DontKeepUnicodeBidiFormattingCharacters) { // copy it Debug.Assert(dest.Length > destOffset, "Destination length exceeded destination offset."); pDest[destOffset++] = pInput[next]; } } else { // escape it escape = true; } } } else { // just copy the character Debug.Assert(dest.Length > destOffset, "Destination length exceeded destination offset."); pDest[destOffset++] = pInput[next]; } if (escape) { const int MaxNumberOfBytesEncoded = 4; if (bufferRemaining < MaxNumberOfBytesEncoded * percentEncodingLen) { int newBufferLength = 0; checked { // may need more memory since we didn't anticipate escaping newBufferLength = dest.Length + bufferCapacityIncrease; bufferRemaining += bufferCapacityIncrease; } char[] newDest = new char[newBufferLength]; fixed(char *pNewDest = newDest) { Buffer.MemoryCopy((byte *)pDest, (byte *)pNewDest, newBufferLength * sizeof(char), destOffset * sizeof(char)); } if (destHandle.IsAllocated) { destHandle.Free(); } dest = newDest; // re-pin new dest[] array destHandle = GCHandle.Alloc(dest, GCHandleType.Pinned); pDest = (char *)destHandle.AddrOfPinnedObject(); } byte[] encodedBytes = new byte[MaxNumberOfBytesEncoded]; fixed(byte *pEncodedBytes = &encodedBytes[0]) { int encodedBytesCount = Encoding.UTF8.GetBytes(pInput + next, surrogatePair ? 2 : 1, pEncodedBytes, MaxNumberOfBytesEncoded); Debug.Assert(encodedBytesCount <= MaxNumberOfBytesEncoded, "UTF8 encoder should not exceed specified byteCount"); bufferRemaining -= encodedBytesCount * percentEncodingLen; for (int count = 0; count < encodedBytesCount; ++count) { UriHelper.EscapeAsciiChar((char)encodedBytes[count], dest, ref destOffset); } } } } if (destHandle.IsAllocated) { destHandle.Free(); } Debug.Assert(destOffset <= dest.Length, "Destination length met or exceeded destination offset."); return(new string(dest, 0, destOffset)); }
// // IRI normalization for strings containing characters that are not allowed or // escaped characters that should be unescaped in the context of the specified Uri component. // internal static unsafe string EscapeUnescapeIri(char *pInput, int start, int end, UriComponents component) { int size = end - start; ValueStringBuilder dest = new ValueStringBuilder(size); byte[]? bytes = null; const int percentEncodingLen = 3; // Escaped UTF-8 will take 3 chars: %AB. int bufferRemaining = 0; int next = start; char ch; bool escape = false; bool surrogatePair = false; for (; next < end; ++next) { escape = false; surrogatePair = false; if ((ch = pInput[next]) == '%') { if (next + 2 < end) { ch = UriHelper.EscapedAscii(pInput[next + 1], pInput[next + 2]); // Do not unescape a reserved char if (ch == Uri.c_DummyChar || ch == '%' || CheckIsReserved(ch, component) || UriHelper.IsNotSafeForUnescape(ch)) { // keep as is dest.Append(pInput[next++]); dest.Append(pInput[next++]); dest.Append(pInput[next]); continue; } else if (ch <= '\x7F') { Debug.Assert(ch < 0xFF, "Expecting ASCII character."); //ASCII dest.Append(ch); next += 2; continue; } else { // possibly utf8 encoded sequence of unicode // check if safe to unescape according to Iri rules Debug.Assert(ch < 0xFF, "Expecting ASCII character."); int startSeq = next; int byteCount = 1; // lazy initialization of max size, will reuse the array for next sequences if ((object?)bytes == null) { bytes = new byte[end - next]; } bytes[0] = (byte)ch; next += 3; while (next < end) { // Check on exit criterion if ((ch = pInput[next]) != '%' || next + 2 >= end) { break; } // already made sure we have 3 characters in str ch = UriHelper.EscapedAscii(pInput[next + 1], pInput[next + 2]); //invalid hex sequence ? if (ch == Uri.c_DummyChar) { break; } // character is not part of a UTF-8 sequence ? else if (ch < '\x80') { break; } else { //a UTF-8 sequence bytes[byteCount++] = (byte)ch; next += 3; } Debug.Assert(ch < 0xFF, "Expecting ASCII character."); } next--; // for loop will increment // Using encoder with no replacement fall-back will skip all invalid UTF-8 sequences. Encoding noFallbackCharUTF8 = Encoding.GetEncoding( Encoding.UTF8.CodePage, new EncoderReplacementFallback(""), new DecoderReplacementFallback("")); char[] unescapedChars = new char[bytes.Length]; int charCount = noFallbackCharUTF8.GetChars(bytes, 0, byteCount, unescapedChars, 0); if (charCount != 0) { // If invalid sequences were present in the original escaped string, we need to // copy the escaped versions of those sequences. // Decoded Unicode values will be kept only when they are allowed by the URI/IRI RFC // rules. UriHelper.MatchUTF8Sequence(ref dest, unescapedChars, charCount, bytes, byteCount, component == UriComponents.Query, true); } else { // copy escaped sequence as is for (int i = startSeq; i <= next; ++i) { dest.Append(pInput[i]); } } } } else { dest.Append(pInput[next]); } } else if (ch > '\x7f') { // unicode char ch2; if ((char.IsHighSurrogate(ch)) && (next + 1 < end)) { ch2 = pInput[next + 1]; escape = !CheckIriUnicodeRange(ch, ch2, ref surrogatePair, component == UriComponents.Query); if (!escape) { // copy the two chars dest.Append(pInput[next++]); dest.Append(pInput[next]); } } else { if (CheckIriUnicodeRange(ch, component == UriComponents.Query)) { // copy it dest.Append(pInput[next]); } else { // escape it escape = true; } } } else { // just copy the character dest.Append(pInput[next]); } if (escape) { const int MaxNumberOfBytesEncoded = 4; byte[] encodedBytes = new byte[MaxNumberOfBytesEncoded]; fixed(byte *pEncodedBytes = &encodedBytes[0]) { int encodedBytesCount = Encoding.UTF8.GetBytes(pInput + next, surrogatePair ? 2 : 1, pEncodedBytes, MaxNumberOfBytesEncoded); Debug.Assert(encodedBytesCount <= MaxNumberOfBytesEncoded, "UTF8 encoder should not exceed specified byteCount"); bufferRemaining -= encodedBytesCount * percentEncodingLen; for (int count = 0; count < encodedBytesCount; ++count) { UriHelper.EscapeAsciiChar((char)encodedBytes[count], ref dest); } } } } string result = dest.ToString(); return(result); }
// // Parse // // Convert this IPv6 address into a sequence of 8 16-bit numbers // // Inputs: // <member> Name // The validated IPv6 address // // Outputs: // <member> numbers // Array filled in with the numbers in the IPv6 groups // // <member> PrefixLength // Set to the number after the prefix separator (/) if found // // Assumes: // <Name> has been validated and contains only hex digits in groups of // 16-bit numbers, the characters ':' and '/', and a possible IPv4 // address // // Returns: // true if this is a loopback, false otherwise. There is no failure indication as the sting must be a valid one. // // Throws: // Nothing // unsafe internal static bool Parse(string address, ushort *numbers, int start, ref string scopeId) { int number = 0; int index = 0; int compressorIndex = -1; bool numberIsValid = true; //This used to be a class instance member but have not been used so far int PrefixLength = 0; if (address[start] == '[') { ++start; } for (int i = start; i < address.Length && address[i] != ']';) { switch (address[i]) { case '%': if (numberIsValid) { numbers[index++] = (ushort)number; numberIsValid = false; } start = i; for (++i; address[i] != ']' && address[i] != '/'; ++i) { ; } scopeId = address.Substring(start, i - start); // ignore prefix if any for (; address[i] != ']'; ++i) { ; } break; case ':': numbers[index++] = (ushort)number; number = 0; ++i; if (address[i] == ':') { compressorIndex = index; ++i; } else if ((compressorIndex < 0) && (index < 6)) { // // no point checking for IPv4 address if we don't // have a compressor or we haven't seen 6 16-bit // numbers yet // break; } // // check to see if the upcoming number is really an IPv4 // address. If it is, convert it to 2 ushort numbers // for (int j = i; (address[j] != ']') && (address[j] != ':') && (address[j] != '%') && (address[j] != '/') && (j < i + 4); ++j) { if (address[j] == '.') { // // we have an IPv4 address. Find the end of it: // we know that since we have a valid IPv6 // address, the only things that will terminate // the IPv4 address are the prefix delimiter '/' // or the end-of-string (which we conveniently // delimited with ']') // while ((address[j] != ']') && (address[j] != '/') && (address[j] != '%')) { ++j; } number = IPv4AddressHelper.ParseHostNumber(address, i, j); numbers[index++] = (ushort)(number >> 16); numbers[index++] = (ushort)number; i = j; // // set this to avoid adding another number to // the array if there's a prefix // number = 0; numberIsValid = false; break; } } break; case '/': if (numberIsValid) { numbers[index++] = (ushort)number; numberIsValid = false; } // // since we have a valid IPv6 address string, the prefix // length is the last token in the string // for (++i; address[i] != ']'; ++i) { PrefixLength = PrefixLength * 10 + (address[i] - '0'); } break; default: number = number * 16 + UriHelper.FromHex(address[i++]); break; } } // // add number to the array if its not the prefix length or part of // an IPv4 address that's already been handled // if (numberIsValid) { numbers[index++] = (ushort)number; } // // if we had a compressor sequence ("::") then we need to expand the // numbers array // if (compressorIndex > 0) { int toIndex = NumberOfLabels - 1; int fromIndex = index - 1; for (int i = index - compressorIndex; i > 0; --i) { numbers[toIndex--] = numbers[fromIndex]; numbers[fromIndex--] = 0; } } // // is the address loopback? Loopback is defined as one of: // // 0:0:0:0:0:0:0:1 // 0:0:0:0:0:0:127.0.0.1 == 0:0:0:0:0:0:7F00:0001 // 0:0:0:0:0:FFFF:127.0.0.1 == 0:0:0:0:0:FFFF:7F00:0001 // return(((numbers[0] == 0) && (numbers[1] == 0) && (numbers[2] == 0) && (numbers[3] == 0) && (numbers[4] == 0)) && (((numbers[5] == 0) && (numbers[6] == 0) && (numbers[7] == 1)) || (((numbers[6] == 0x7F00) && (numbers[7] == 0x0001)) && ((numbers[5] == 0) || (numbers[5] == 0xFFFF))))); }
// // InternalIsValid // // Determine whether a name is a valid IPv6 address. Rules are: // // * 8 groups of 16-bit hex numbers, separated by ':' // * a *single* run of zeros can be compressed using the symbol '::' // * an optional string of a ScopeID delimited by '%' // * an optional (last) 1 or 2 character prefix length field delimited by '/' // * the last 32 bits in an address can be represented as an IPv4 address // // Inputs: // <argument> name // Domain name field of a URI to check for pattern match with // IPv6 address // validateStrictAddress: if set to true, it expects strict ipv6 address. Otherwise it expects // part of the string in ipv6 format. // // Outputs: // Nothing // // Assumes: // the correct name is terminated by ']' character // // Returns: // true if <name> has IPv6 format/ipv6 address based on validateStrictAddress, else false // // Throws: // Nothing // // Remarks: MUST NOT be used unless all input indexes are verified and trusted. // start must be next to '[' position, or error is reported unsafe private static bool InternalIsValid(char *name, int start, ref int end, bool validateStrictAddress) { int sequenceCount = 0; int sequenceLength = 0; bool haveCompressor = false; bool haveIPv4Address = false; bool havePrefix = false; bool expectingNumber = true; int lastSequence = 1; int i; for (i = start; i < end; ++i) { if (havePrefix ? (name[i] >= '0' && name[i] <= '9') : UriHelper.IsHexDigit(name[i])) { ++sequenceLength; expectingNumber = false; } else { if (sequenceLength > 4) { return(false); } if (sequenceLength != 0) { ++sequenceCount; lastSequence = i - sequenceLength; } switch (name[i]) { case '%': while (true) { //accept anything in scopeID if (++i == end) { // no closing ']', fail return(false); } if (name[i] == ']') { goto case ']'; } else if (name[i] == '/') { goto case '/'; } } case ']': start = i; i = end; //this will make i = end+1 continue; case ':': if ((i > 0) && (name[i - 1] == ':')) { if (haveCompressor) { // // can only have one per IPv6 address // return(false); } haveCompressor = true; expectingNumber = false; } else { expectingNumber = true; } break; case '/': if (validateStrictAddress) { return(false); } if ((sequenceCount == 0) || havePrefix) { return(false); } havePrefix = true; expectingNumber = true; break; case '.': if (haveIPv4Address) { return(false); } i = end; if (!IPv4AddressHelper.IsValid(name, lastSequence, ref i, true, false, false)) { return(false); } // ipv4 address takes 2 slots in ipv6 address, one was just counted meeting the '.' ++sequenceCount; haveIPv4Address = true; --i; // it will be incremented back on the next loop break; default: return(false); } sequenceLength = 0; } } // // if the last token was a prefix, check number of digits // if (havePrefix && ((sequenceLength < 1) || (sequenceLength > 2))) { return(false); } // // these sequence counts are -1 because it is implied in end-of-sequence // int expectedSequenceCount = 8 + (havePrefix ? 1 : 0); if (!expectingNumber && (sequenceLength <= 4) && (haveCompressor ? (sequenceCount < expectedSequenceCount) : (sequenceCount == expectedSequenceCount))) { if (i == end + 1) { // ']' was found end = start + 1; return(true); } return(false); } return(false); }