private static void GenerateMetadataNodes( MetadataReader reader, string name, int parentIndex, OrderPreservingMultiDictionary <string, MetadataDefinition> .ValueSet definitionsWithSameName, List <Node> unsortedNodes) { var node = new Node(name, parentIndex); var nodeIndex = unsortedNodes.Count; unsortedNodes.Add(node); // Add all child members var definitionMap = OrderPreservingMultiDictionary <string, MetadataDefinition> .GetInstance(); try { foreach (var definition in definitionsWithSameName) { LookupMetadataDefinitions(reader, definition, definitionMap); } foreach (var kvp in definitionMap) { if (UnicodeCharacterUtilities.IsValidIdentifier(kvp.Key)) { GenerateMetadataNodes(reader, kvp.Key, nodeIndex, kvp.Value, unsortedNodes); } } } finally { definitionMap.Free(); } }
internal static void VerifyArguments(Diagnostic diagnostic, Compilation compilationOpt, Func <Diagnostic, bool> isSupportedDiagnostic) { if (diagnostic is DiagnosticWithInfo) { // Compiler diagnostic, skip validations. return; } if (diagnostic == null) { throw new ArgumentNullException(nameof(diagnostic)); } if (compilationOpt != null) { VerifyDiagnosticLocationsInCompilation(diagnostic, compilationOpt); } if (!isSupportedDiagnostic(diagnostic)) { throw new ArgumentException(string.Format(CodeAnalysisResources.UnsupportedDiagnosticReported, diagnostic.Id), nameof(diagnostic)); } if (!UnicodeCharacterUtilities.IsValidIdentifier(diagnostic.Id)) { // Disallow invalid diagnostic IDs. // Note that the parsing logic in Csc/Vbc MSBuild tasks to decode command line compiler output relies on diagnostics having a valid ID. // See https://github.com/dotnet/roslyn/issues/4376 for details. throw new ArgumentException(string.Format(CodeAnalysisResources.InvalidDiagnosticIdReported, diagnostic.Id), nameof(diagnostic)); } }
private Token ScanIdentifierAfterStartCharacter(bool verbatim) { // Assert the offset is immediately following the start character. Debug.Assert(_offset > 0); Debug.Assert(UnicodeCharacterUtilities.IsIdentifierStartCharacter(_text[_offset - 1])); Debug.Assert(_offset == 1 || !UnicodeCharacterUtilities.IsIdentifierPartCharacter(_text[_offset - 2])); int length = _text.Length; int start = _offset - 1; while ((_offset < length) && UnicodeCharacterUtilities.IsIdentifierPartCharacter(_text[_offset])) { _offset++; } var text = _text.Substring(start, _offset - start); var keywordKind = verbatim ? SyntaxKind.None : SyntaxFacts.GetKeywordKind(text); if (keywordKind == SyntaxKind.None) { return(new Token(TokenKind.Identifier, text)); } return(new Token(TokenKind.Keyword, text, keywordKind)); }
private void GenerateMetadataNodes( MetadataNode parentNode, string nodeName, OrderPreservingMultiDictionary <string, MetadataDefinition> .ValueSet definitionsWithSameName) { if (!UnicodeCharacterUtilities.IsValidIdentifier(nodeName)) { return; } var childNode = MetadataNode.Allocate(nodeName); _parentToChildren.Add(parentNode, childNode); // Add all child members var definitionMap = OrderPreservingMultiDictionary <string, MetadataDefinition> .GetInstance(); try { foreach (var definition in definitionsWithSameName) { LookupMetadataDefinitions(definition, definitionMap); } foreach (var kvp in definitionMap) { GenerateMetadataNodes(childNode, kvp.Key, kvp.Value); } } finally { definitionMap.Free(); } }
private Token Scan() { int length = _text.Length; while (_offset < length && char.IsWhiteSpace(_text[_offset])) { _offset++; } if (_offset == length) { return(new Token(TokenKind.End)); } var c = _text[_offset++]; if (UnicodeCharacterUtilities.IsIdentifierStartCharacter(c)) { return(ScanIdentifierAfterStartCharacter(verbatim: false)); } else if (c == '@' && _offset < length && UnicodeCharacterUtilities.IsIdentifierStartCharacter(_text[_offset])) { _offset++; return(ScanIdentifierAfterStartCharacter(verbatim: true)); } return(new Token((TokenKind)c)); }
private void GenerateMetadataNodes( MetadataNode parentNode, string nodeName, OrderPreservingMultiDictionary < string, MetadataDefinition > .ValueSet definitionsWithSameName ) { if (!UnicodeCharacterUtilities.IsValidIdentifier(nodeName)) { return; } var childNode = MetadataNode.Allocate(nodeName); _parentToChildren.Add(parentNode, childNode); // Add all child members var definitionMap = OrderPreservingMultiDictionary < string, MetadataDefinition > .GetInstance(); try { foreach (var definition in definitionsWithSameName) { if (definition.Kind == MetadataDefinitionKind.Member) { // We need to support having multiple methods with same name but different receiver type. _extensionMethodToParameterTypeInfo.Add( childNode, definition.ReceiverTypeInfo ); } LookupMetadataDefinitions(definition, definitionMap); } foreach (var(name, definitions) in definitionMap) { GenerateMetadataNodes(childNode, name, definitions); } } finally { definitionMap.Free(); } }
/// <summary> /// Old VS projects had some pretty messed-up looking values for the /// "DefineConstants" property. It worked fine in the IDE, because it /// effectively munged up the string so that it ended up being valid for /// the compiler. We do the equivalent munging here now. /// /// Basically, we take the incoming string, and split it on comma/semicolon/space. /// Then we look at the resulting list of strings, and remove any that are /// illegal identifiers, and pass the remaining ones through to the compiler. /// /// Note that CSharp doesn't support assigning a value to the constants ... in /// other words, a constant is either defined or not defined ... it can't have /// an actual value. /// </summary> internal static string?GetDefineConstantsSwitch( string?originalDefineConstants, TaskLoggingHelper log ) { if (originalDefineConstants == null) { return(null); } StringBuilder finalDefineConstants = new StringBuilder(); // Split the incoming string on comma/semicolon/space. string[] allIdentifiers = originalDefineConstants.Split(new char[] { ',', ';', ' ' }); // Loop through all the parts, and for the ones that are legal C# identifiers, // add them to the outgoing string. foreach (string singleIdentifier in allIdentifiers) { if (UnicodeCharacterUtilities.IsValidIdentifier(singleIdentifier)) { // Separate them with a semicolon if there's something already in // the outgoing string. if (finalDefineConstants.Length > 0) { finalDefineConstants.Append(";"); } finalDefineConstants.Append(singleIdentifier); } else if (singleIdentifier.Length > 0) { log.LogWarningWithCodeFromResources( "Csc_InvalidParameterWarning", "/define:", singleIdentifier ); } } if (finalDefineConstants.Length > 0) { return(finalDefineConstants.ToString()); } else { // We wouldn't want to pass in an empty /define: switch on the csc.exe command-line. return(null); } }
// Returns the number of characters in the // identifier starting at the current offset. private int ScanIdentifier() { int length = _text.Length - _offset; if (length > 0 && UnicodeCharacterUtilities.IsIdentifierStartCharacter(_text[_offset])) { int n = 1; while (n < length && UnicodeCharacterUtilities.IsIdentifierPartCharacter(_text[_offset + n])) { n++; } return(n); } return(0); }
public virtual string Process(IReadOnlyDictionary <string, IValueForm>?forms, string value) { const char invalidCharacterReplacement = '_'; value = value ?? throw new ArgumentNullException(nameof(value)); value = value.Trim(); StringBuilder safeValueStr = new StringBuilder(value.Length); for (int i = 0; i < value.Length; i++) { if (i < value.Length - 1 && char.IsSurrogatePair(value[i], value[i + 1])) { safeValueStr.Append(invalidCharacterReplacement); // Skip both chars that make up this symbol. i++; continue; } bool isFirstCharacterOfIdentifier = safeValueStr.Length == 0 || safeValueStr[safeValueStr.Length - 1] == '.'; bool isValidFirstCharacter = UnicodeCharacterUtilities.IsIdentifierStartCharacter(value[i]); bool isValidPartCharacter = UnicodeCharacterUtilities.IsIdentifierPartCharacter(value[i]); if (isFirstCharacterOfIdentifier && !isValidFirstCharacter && isValidPartCharacter) { // This character cannot be at the beginning, but is good otherwise. Prefix it with something valid. safeValueStr.Append(invalidCharacterReplacement); safeValueStr.Append(value[i]); } else if ((isFirstCharacterOfIdentifier && isValidFirstCharacter) || (!isFirstCharacterOfIdentifier && isValidPartCharacter) || (safeValueStr.Length > 0 && i < value.Length - 1 && value[i] == '.')) { // This character is allowed to be where it is. safeValueStr.Append(value[i]); } else { safeValueStr.Append(invalidCharacterReplacement); } } return(safeValueStr.ToString()); }
/// <summary> /// Spec section 2.4.2 says that identifiers are compared without regard /// to leading "@" characters or unicode formatting characters. As in dev10, /// this is actually accomplished by dropping such characters during parsing. /// Unfortunately, metadata names can still contain these characters and will /// not be referenceable from source if they do (lookup will fail since the /// characters will have been dropped from the search string). /// See DevDiv #14432 for more. /// </summary> internal static bool ContainsDroppedIdentifierCharacters(string name) { if (string.IsNullOrEmpty(name)) { return(false); } if (name[0] == '@') { return(true); } int nameLength = name.Length; for (int i = 0; i < nameLength; i++) { if (UnicodeCharacterUtilities.IsFormattingChar(name[i])) { return(true); } } return(false); }
private static void GenerateMetadataNodes( MetadataReader reader, NamespaceDefinition globalNamespace, List <Node> unsortedNodes) { var definitionMap = OrderPreservingMultiDictionary <string, MetadataDefinition> .GetInstance(); try { LookupMetadataDefinitions(reader, globalNamespace, definitionMap); foreach (var kvp in definitionMap) { if (UnicodeCharacterUtilities.IsValidIdentifier(kvp.Key)) { GenerateMetadataNodes(reader, kvp.Key, 0 /*index of root node*/, kvp.Value, unsortedNodes); } } } finally { definitionMap.Free(); } }
/// <summary> /// Check that the name is a valid identifier. /// </summary> public static bool IsValidIdentifier(string name) { return(UnicodeCharacterUtilities.IsValidIdentifier(name)); }
/// <summary> /// Returns true if the Unicode character can be a part of a C# identifier. /// </summary> /// <param name="ch">The Unicode character.</param> public static bool IsIdentifierPartCharacter(char ch) { return(UnicodeCharacterUtilities.IsIdentifierPartCharacter(ch)); }
/// <summary> /// Check that the name is a valid identifier. /// </summary> public static bool IsValidIdentifier([NotNullWhen(true)] string?name) { return(UnicodeCharacterUtilities.IsValidIdentifier(name)); }
/// <summary> /// Makes sure that the string at least somewhat resembles the correct form. /// Does not check validity on class or method identifiers /// Example line: /// at ConsoleApp4.MyClass.ThrowAtOne(p1, p2) /// |-------------------||--------||-------| /// Class Method Args /// </summary> /// <remarks> /// See https://docs.microsoft.com/en-us/dotnet/api/system.environment.stacktrace for more information /// on expected stacktrace form. At time of writing, this is based on the following "ToString" implementation in the runtime: /// https://github.com/dotnet/runtime/blob/72d643d05ab23888f30a57d447154e36f979f3d1/src/libraries/System.Private.CoreLib/src/System/Diagnostics/StackTrace.cs#L206 /// </remarks> public static bool TryParseMethodSignature(ReadOnlySpan <char> line, out TextSpan classSpan, out TextSpan methodSpan, out TextSpan argsSpan) { var state = new ParseStateMachine(); for (var i = 0; i < line.Length; i++) { if (state.CurrentParsingSpan == CurrentParsingSpan.Finished) { break; } var c = line[i]; state.CurrentSpanLength++; // // Every if statement should be a branch and always end with a "continue" statement. // It is cumbersome to read this as a switch statement, especially given that not all branches // are just switches on a character. If new arms are added, follow the general rule that // all top if statements should not continue execution after they exit. // // // When starting to parse an identifier we want the first character to be valid. Arguments will // be an exception here so don't check validity of those characters for the first item // if (state.CurrentSpanLength == 1 && state.CurrentParsingSpan != CurrentParsingSpan.Arguments) { // When starting to parse an identifier we want the first character to be valid if (!UnicodeCharacterUtilities.IsIdentifierStartCharacter(c)) { state.Reset(); continue; } // If we're starting to parse the type then we want the previous character to either be a space // or this to be the beginning of the string. We don't want to try to have valid identifier starts // as a subword of some text if (i > 0) { var previousChar = line[i - 1]; if (previousChar != ' ') { state.Reset(); continue; } } continue; } if (c == ' ') { if (!state.AllowSpace) { // We encountered a space in an area we don't expect. Reset the state and start trying to parse // the next block as a method signature state.Reset(); } continue; } if (c == '.') { // Dot separators are allowed in the following cases: // 1. We are parsing the fully qualified type portion // 2. We are parsing arguments which could use a dot to fully qualify a type // 3. We are inside of a generic context, which could use dot to fully qualify a type if (state.CurrentParsingSpan == CurrentParsingSpan.Type || state.CurrentParsingSpan == CurrentParsingSpan.Arguments) { // Check that the previous item was a valid identifier character or ] (generic closure) if (i > 0) { var previousChar = line[i - 1]; if (UnicodeCharacterUtilities.IsIdentifierPartCharacter(previousChar) || previousChar == ']') { continue; } } // Either there is no previous character, or the previous character does not allow for a '.' // following it. Reset and continue parsing state.Reset(); continue; } continue; } if (c == '[' || c == '<') { state.GenericDepth++; continue; } if (c == ']' || c == '>') { if (state.GenericDepth == 0) { state.Reset(); } else { state.GenericDepth--; } continue; } if (c == '(') { if (state.CurrentParsingSpan == CurrentParsingSpan.Type) { state.StartParsingArguments(line); } else { // In cases where we encounter a '(' and already are parsing arguments we want // to stop parsing. This could be problematic in cases where the value of a variable // is provided and is a string, but for now we will just fail parsing that. state.Reset(); } continue; } if (c == ')') { // ')' is invalid except for closing the end of the arguments list if (state.CurrentParsingSpan != CurrentParsingSpan.Arguments) { state.Reset(); continue; } // Similar to assuming that '(' will always be considered a start of the argument section, we assume // that ')' will end it. There are cases where this is not true, but for now that's not supported. state.StopParsingArguments(); continue; } if (c == ',') { // Comma is allowed if we are parsing arguments or are currently going through a generic list. // As of now, no validation is done that the comma is valid in this location if (state.CurrentParsingSpan != CurrentParsingSpan.Arguments && state.GenericDepth == 0) { state.Reset(); } continue; } // In cases where we have no explicitly handled a character, our last effort is to make sure // we are only accepting valid identifier characters. Every character that needs to be handled // differently should be before this check if (!UnicodeCharacterUtilities.IsIdentifierPartCharacter(c)) { state.Reset(); continue; } } classSpan = state.TypeSpan; methodSpan = state.MethodSpan; argsSpan = state.ArgumentsSpan; return(state.CurrentParsingSpan == CurrentParsingSpan.Finished && classSpan != default && methodSpan != default && argsSpan != default); }