Beispiel #1
0
        /// <summary>
        /// Take a list of strings like 'ftp', 'http', 'attachment' and append them as a full
        /// searchable instance to the collection of schemes to find in the input, like
        /// 'ftp://', 'ftp%3a//', 'http://', 'http%3a//' etc
        /// </summary>
        /// <param name="validSchemes"></param>
        private void SetValidSchemes(IEnumerable <string> validSchemes)
        {
            ValidSchemesNames.Clear();
            ValidSchemesSuffixed.Clear();

            foreach (var validScheme in validSchemes)
            {
                var lowerInvariant = validScheme.Trim().ToLowerInvariant();
                ValidSchemesNames.Add(lowerInvariant);
                ValidSchemesSuffixed.Add(lowerInvariant + "://");
                ValidSchemesSuffixed.Add(lowerInvariant + "%3a//");
            }
        }
Beispiel #2
0
        /// <summary>
        /// Reads the scheme and allows returns true if the scheme is in our allowed collection (e.g. http(s?):// or ftp(s?)://)
        /// @return True if the scheme was found, else false.
        /// </summary>
        private bool ReadScheme()
        {
            var originalLength = _buffer.Length;
            var numSlashes     = 0;

            while (!_reader.Eof())
            {
                var curr = _reader.Read();

                //if we match a slash, look for a second one.
                if (curr == '/')
                {
                    _buffer.Append(curr);
                    if (numSlashes == 1)
                    {
                        // return only if the buffer currently ends with an approved protocol.
                        // When we have buffered a string like: ":u(https://test.co" and are scanning the ':', we
                        // consider this to have found a scheme (the https bit only, which will be parsed out later)
                        var bufferedUrlContent = _buffer.ToString().ToLowerInvariant();

                        // return only if we detect an approved protocol at the end of the current buffer. For
                        // efficiency, first check an exact match
                        if (ValidSchemesSuffixed.Contains(bufferedUrlContent))
                        {
                            _currentUrlMarker.SetIndex(UrlPart.SCHEME, 0);
                            return(true);
                        }

                        // If no exact match found, try to find a valid scheme in the trailing content of the current buffer,
                        // starting with the longest matches available (e.g. sftp:// rather than ftp://
                        for (var i = ValidSchemesSuffixedOrdered.Length - 1; i >= 0; i--)
                        {
                            var vss = ValidSchemesSuffixedOrdered[i];
                            if (bufferedUrlContent.EndsWith(vss))
                            {
                                // see if we need to remove extra characters from the start of the buffer
                                if (bufferedUrlContent.Length > vss.Length)
                                {
                                    _buffer.Remove(0, bufferedUrlContent.Length - vss.Length);
                                }
                                _currentUrlMarker.SetIndex(UrlPart.SCHEME, 0);
                                return(true);
                            }
                        }
                        return(false);
                    }

                    numSlashes++;
                }
                else if (curr == ' ' || CheckMatchingCharacter(curr) != CharacterMatch.CharacterNotMatched)
                {
                    //if we find a space or end of input, then nothing found.
                    _buffer.Append(curr);
                    return(false);
                }
                else if (curr == '[')
                {
                    //if we're starting to see an ipv6 address
                    _reader.GoBack();                     //unread the '[', so that we can start looking for ipv6
                    return(false);
                }
                else if (originalLength > 0 || numSlashes > 0 || !CharUtils.IsAlpha(curr))
                {
                    // if it's not a character a-z or A-Z then assume we aren't matching scheme, but instead
                    // matching username and password.
                    _reader.GoBack();
                    return(ReadUserPass(0));
                }
            }

            return(false);
        }