/// <summary> /// Take a list of strings like 'ftp', 'http', 'attachment' and append them as a full /// searchable instance to the collection of schemes to find in the input, like /// 'ftp://', 'ftp%3a//', 'http://', 'http%3a//' etc /// </summary> /// <param name="validSchemes"></param> private void SetValidSchemes(IEnumerable <string> validSchemes) { ValidSchemesNames.Clear(); ValidSchemesSuffixed.Clear(); foreach (var validScheme in validSchemes) { var lowerInvariant = validScheme.Trim().ToLowerInvariant(); ValidSchemesNames.Add(lowerInvariant); ValidSchemesSuffixed.Add(lowerInvariant + "://"); ValidSchemesSuffixed.Add(lowerInvariant + "%3a//"); } }
/// <summary> /// Reads the scheme and allows returns true if the scheme is in our allowed collection (e.g. http(s?):// or ftp(s?)://) /// @return True if the scheme was found, else false. /// </summary> private bool ReadScheme() { var originalLength = _buffer.Length; var numSlashes = 0; while (!_reader.Eof()) { var curr = _reader.Read(); //if we match a slash, look for a second one. if (curr == '/') { _buffer.Append(curr); if (numSlashes == 1) { // return only if the buffer currently ends with an approved protocol. // When we have buffered a string like: ":u(https://test.co" and are scanning the ':', we // consider this to have found a scheme (the https bit only, which will be parsed out later) var bufferedUrlContent = _buffer.ToString().ToLowerInvariant(); // return only if we detect an approved protocol at the end of the current buffer. For // efficiency, first check an exact match if (ValidSchemesSuffixed.Contains(bufferedUrlContent)) { _currentUrlMarker.SetIndex(UrlPart.SCHEME, 0); return(true); } // If no exact match found, try to find a valid scheme in the trailing content of the current buffer, // starting with the longest matches available (e.g. sftp:// rather than ftp:// for (var i = ValidSchemesSuffixedOrdered.Length - 1; i >= 0; i--) { var vss = ValidSchemesSuffixedOrdered[i]; if (bufferedUrlContent.EndsWith(vss)) { // see if we need to remove extra characters from the start of the buffer if (bufferedUrlContent.Length > vss.Length) { _buffer.Remove(0, bufferedUrlContent.Length - vss.Length); } _currentUrlMarker.SetIndex(UrlPart.SCHEME, 0); return(true); } } return(false); } numSlashes++; } else if (curr == ' ' || CheckMatchingCharacter(curr) != CharacterMatch.CharacterNotMatched) { //if we find a space or end of input, then nothing found. _buffer.Append(curr); return(false); } else if (curr == '[') { //if we're starting to see an ipv6 address _reader.GoBack(); //unread the '[', so that we can start looking for ipv6 return(false); } else if (originalLength > 0 || numSlashes > 0 || !CharUtils.IsAlpha(curr)) { // if it's not a character a-z or A-Z then assume we aren't matching scheme, but instead // matching username and password. _reader.GoBack(); return(ReadUserPass(0)); } } return(false); }