Пример #1
0
        /// <summary>
        /// Try to read the port of the url.
        /// </summary>
        /// <returns>True if a valid port was read.</returns>
        private bool ReadPort()
        {
            _currentUrlMarker.SetIndex(UrlPart.PORT, _buffer.Length);
            //The length of the port read.
            var portLen = 0;

            while (!_reader.Eof())
            {
                //read the next one and remember the length
                var curr = _reader.Read();
                portLen++;

                if (curr == '/')
                {
                    //continue to read path
                    _buffer.Append(curr);
                    return(ReadPath());
                }

                if (curr == '?')
                {
                    //continue to read query string
                    _buffer.Append(curr);
                    return(ReadQueryString());
                }

                if (curr == '#')
                {
                    //continue to read fragment.
                    _buffer.Append(curr);
                    return(ReadFragment());
                }

                if (CheckMatchingCharacter(curr) == CharacterMatch.CharacterMatchStop || !CharUtils.IsNumeric(curr))
                {
                    //if we got here, then what we got so far is a valid url. don't append the current character.
                    _reader.GoBack();

                    //no port found; it was something like google.com:hello.world
                    if (portLen == 1)
                    {
                        //remove the ":" from the end.
                        _buffer.Remove(_buffer.Length - 1, 1);
                    }

                    _currentUrlMarker.UnsetIndex(UrlPart.PORT);
                    return(ReadEnd(ReadEndState.ValidUrl));
                }

                //this is a valid character in the port string.
                _buffer.Append(curr);
            }

            //found a correct url
            return(ReadEnd(ReadEndState.ValidUrl));
        }
Пример #2
0
        /// <summary>
        /// The default input reader which looks for specific flags to start detecting the url.
        /// </summary>
        private void ReadDefault()
        {
            //Keeps track of the number of characters read to be able to later cut out the domain name.
            var length = 0;

            // keep track of how many times each character in each index of the raw input has been read
            var contentReadByIndexCount = new byte[_reader.ContentLength];

            //until end of string read the contents
            while (!_reader.Eof())
            {
                // We want to ensure that backtracking and looping on content is limited from infinite-loops, so
                // we take the hit and track each time an element in the input is read, and if its been hit too
                // many times, we step forwards until we find an element that has NOT been read too many times
                var currentIndex = _reader.GetPosition();
                contentReadByIndexCount[currentIndex] += 1;
                while (contentReadByIndexCount[currentIndex] >= ContentReadByIndexMaximum)
                {
                    // Forcably step to the next character in the input, so we jump out of infinite loops
                    _reader.Read();
                    currentIndex = _reader.GetPosition();
                }

                // Read the next char to process.
                var curr = _reader.Read();

                switch (curr)
                {
                case ' ':
                    //space was found, check if it's a valid single level domain.
                    if (_options.HasFlag(UrlDetectorOptions.ALLOW_SINGLE_LEVEL_DOMAIN) && _buffer.Length > 0 && _hasScheme)
                    {
                        _reader.GoBack();
                        if (!ReadDomainName(_buffer.ToString().Substring(length)))
                        {
                            ReadEnd(ReadEndState.InvalidUrl);
                        }
                        ;
                    }

                    _buffer.Append(curr);
                    ReadEnd(ReadEndState.InvalidUrl);
                    length = 0;
                    break;

                case '%':
                    if (_reader.CanReadChars(2))
                    {
                        if (_reader.Peek(2).Equals("3a", StringComparison.InvariantCultureIgnoreCase))
                        {
                            _buffer.Append(curr);
                            _buffer.Append(_reader.Read());
                            _buffer.Append(_reader.Read());
                            length = ProcessColon(length);
                        }
                        else if (CharUtils.IsHex(_reader.PeekChar(0)) && CharUtils.IsHex(_reader.PeekChar(1)))
                        {
                            _buffer.Append(curr);
                            _buffer.Append(_reader.Read());
                            _buffer.Append(_reader.Read());

                            if (!ReadDomainName(_buffer.ToString().Substring(length)))
                            {
                                ReadEnd(ReadEndState.InvalidUrl);
                            }
                            length = 0;
                        }
                    }

                    break;

                case '\u3002':                         //non-standard dots
                case '\uFF0E':
                case '\uFF61':
                case '.':                         //"." was found, read the domain name using the start from length.
                    _buffer.Append(curr);
                    if (!ReadDomainName(_buffer.ToString().Substring(length)))
                    {
                        ReadEnd(ReadEndState.InvalidUrl);
                    }
                    length = 0;
                    break;

                case '@':                         //Check the domain name after a username
                    if (_buffer.Length > 0)
                    {
                        _currentUrlMarker.SetIndex(UrlPart.USERNAME_PASSWORD, length);
                        _buffer.Append(curr);
                        if (!ReadDomainName(null))
                        {
                            ReadEnd(ReadEndState.InvalidUrl);
                        }
                        length = 0;
                    }

                    break;

                case '[':
                    if (_dontMatchIpv6)
                    {
                        //Check if we need to match characters. If we match characters and this is a start or stop of range,
                        //either way reset the world and start processing again.
                        if (CheckMatchingCharacter(curr) != CharacterMatch.CharacterNotMatched)
                        {
                            ReadEnd(ReadEndState.InvalidUrl);
                            length = 0;
                        }
                    }

                    var beginning = _reader.GetPosition();

                    //if it doesn't have a scheme, clear the buffer.
                    if (!_hasScheme)
                    {
                        _buffer.Remove(0, _buffer.Length);
                    }

                    _buffer.Append(curr);

                    if (!ReadDomainName(_buffer.ToString().Substring(length)))
                    {
                        //if we didn't find an ipv6 address, then check inside the brackets for urls
                        ReadEnd(ReadEndState.InvalidUrl);
                        _reader.Seek(beginning);
                        _dontMatchIpv6 = true;
                    }

                    length = 0;
                    break;

                case '/':
                    // "/" was found, then we either read a scheme, or if we already read a scheme, then
                    // we are reading a url in the format http://123123123/asdf

                    if (_hasScheme || _options.HasFlag(UrlDetectorOptions.ALLOW_SINGLE_LEVEL_DOMAIN) && _buffer.Length > 1)
                    {
                        //we already have the scheme, so then we already read:
                        //http://something/ <- if something is all numeric then its a valid url.
                        //OR we are searching for single level domains. We have buffer length > 1 condition
                        //to weed out infinite backtrack in cases of html5 roots

                        //unread this "/" and continue to check the domain name starting from the beginning of the domain
                        _reader.GoBack();
                        if (!ReadDomainName(_buffer.ToString().Substring(length)))
                        {
                            ReadEnd(ReadEndState.InvalidUrl);
                        }

                        length = 0;
                    }
                    else
                    {
                        //we don't have a scheme already, then clear state, then check for html5 root such as: "//google.com/"
                        // remember the state of the quote when clearing state just in case its "//google.com" so its not cleared.
                        ReadEnd(ReadEndState.InvalidUrl);
                        _buffer.Append(curr);
                        _hasScheme = ReadHtml5Root();
                        length     = _buffer.Length;
                    }

                    break;

                case ':':
                    //add the ":" to the url and check for scheme/username
                    _buffer.Append(curr);
                    length = ProcessColon(length);
                    break;

                default:
                    //Check if we need to match characters. If we match characters and this is a start or stop of range,
                    //either way reset the world and start processing again.
                    if (CheckMatchingCharacter(curr) != CharacterMatch.CharacterNotMatched)
                    {
                        ReadEnd(ReadEndState.InvalidUrl);
                        length = 0;
                    }
                    else
                    {
                        _buffer.Append(curr);
                    }

                    break;
                }
            }

            if (_options.HasFlag(UrlDetectorOptions.ALLOW_SINGLE_LEVEL_DOMAIN) && _buffer.Length > 0 && _hasScheme)
            {
                if (!ReadDomainName(_buffer.ToString().Substring(length)))
                {
                    ReadEnd(ReadEndState.InvalidUrl);
                }
            }
        }
Пример #3
0
        /// <summary>
        /// Reads the scheme and allows returns true if the scheme is in our allowed collection (e.g. http(s?):// or ftp(s?)://)
        /// @return True if the scheme was found, else false.
        /// </summary>
        private bool ReadScheme()
        {
            var originalLength = _buffer.Length;
            var numSlashes     = 0;

            while (!_reader.Eof())
            {
                var curr = _reader.Read();

                //if we match a slash, look for a second one.
                if (curr == '/')
                {
                    _buffer.Append(curr);
                    if (numSlashes == 1)
                    {
                        // return only if the buffer currently ends with an approved protocol.
                        // When we have buffered a string like: ":u(https://test.co" and are scanning the ':', we
                        // consider this to have found a scheme (the https bit only, which will be parsed out later)
                        var bufferedUrlContent = _buffer.ToString().ToLowerInvariant();

                        // return only if we detect an approved protocol at the end of the current buffer. For
                        // efficiency, first check an exact match
                        if (ValidSchemesSuffixed.Contains(bufferedUrlContent))
                        {
                            _currentUrlMarker.SetIndex(UrlPart.SCHEME, 0);
                            return(true);
                        }

                        // If no exact match found, try to find a valid scheme in the trailing content of the current buffer,
                        // starting with the longest matches available (e.g. sftp:// rather than ftp://
                        for (var i = ValidSchemesSuffixedOrdered.Length - 1; i >= 0; i--)
                        {
                            var vss = ValidSchemesSuffixedOrdered[i];
                            if (bufferedUrlContent.EndsWith(vss))
                            {
                                // see if we need to remove extra characters from the start of the buffer
                                if (bufferedUrlContent.Length > vss.Length)
                                {
                                    _buffer.Remove(0, bufferedUrlContent.Length - vss.Length);
                                }
                                _currentUrlMarker.SetIndex(UrlPart.SCHEME, 0);
                                return(true);
                            }
                        }
                        return(false);
                    }

                    numSlashes++;
                }
                else if (curr == ' ' || CheckMatchingCharacter(curr) != CharacterMatch.CharacterNotMatched)
                {
                    //if we find a space or end of input, then nothing found.
                    _buffer.Append(curr);
                    return(false);
                }
                else if (curr == '[')
                {
                    //if we're starting to see an ipv6 address
                    _reader.GoBack();                     //unread the '[', so that we can start looking for ipv6
                    return(false);
                }
                else if (originalLength > 0 || numSlashes > 0 || !CharUtils.IsAlpha(curr))
                {
                    // if it's not a character a-z or A-Z then assume we aren't matching scheme, but instead
                    // matching username and password.
                    _reader.GoBack();
                    return(ReadUserPass(0));
                }
            }

            return(false);
        }
Пример #4
0
        /// <summary>
        /// Reads a single char from the content stream and increments the index.
        /// @return The next available character.
        /// </summary>
        /// <returns></returns>
        public char Read()
        {
            var chr = _content[_index++];

            return(CharUtils.IsWhiteSpace(chr) ? ' ' : chr);
        }
Пример #5
0
        /// <summary>
        /// Sees that there's an open "[", and is now checking for ":"'s and stopping when there is a ']' or invalid character.
        /// Handles ipv4 formatted ipv6 addresses, zone indices, truncated notation.
        /// @return Returns true if it is a valid ipv6 address
        /// </summary>
        private bool IsValidIpv6(string testDomain)
        {
            var domainArray = testDomain.ToCharArray();

            // Return false if we don't see [....]
            // or if we only have '[]'
            // or if we detect [:8000: ...]; only [::8000: ...] is okay
            if (domainArray.Length < 3 || domainArray[domainArray.Length - 1] != ']' || domainArray[0] != '[' ||
                domainArray[1] == ':' && domainArray[2] != ':')
            {
                return(false);
            }

            var numSections = 1;
            var hexDigits   = 0;
            var prevChar    = '\0';
            //char prevChar = 0;

            //used to check ipv4 addresses at the end of ipv6 addresses.
            var lastSection = new StringBuilder();
            var hexSection  = true;

            // If we see a '%'. Example: http://[::ffff:0xC0.0x00.0x02.0xEB%251]
            var zoneIndiceMode = false;

            //If doubleColonFlag is true, that means we've already seen one "::"; we're not allowed to have more than one.
            var doubleColonFlag = false;

            var index = 0;

            for (; index < domainArray.Length; index++)
            {
                switch (domainArray[index])
                {
                case '[':                         //found beginning of ipv6 address
                    break;

                case '%':
                case ']':                         //found end of ipv6 address
                    if (domainArray[index] == '%')
                    {
                        //see if there's a urlencoded dot
                        if (domainArray.Length - index >= 2 && domainArray[index + 1] == '2' && domainArray[index + 2] == 'e')
                        {
                            lastSection.Append("%2e");
                            index     += 2;
                            hexSection = false;
                            break;
                        }

                        zoneIndiceMode = true;
                    }

                    if (!hexSection && (!zoneIndiceMode || domainArray[index] == '%'))
                    {
                        if (IsValidIpv4(lastSection.ToString()))
                        {
                            numSections++;                                     //ipv4 takes up 2 sections.
                        }
                        else
                        {
                            return(false);
                        }
                    }

                    break;

                case ':':
                    if (prevChar == ':')
                    {
                        if (doubleColonFlag)
                        {
                            //only allowed to have one "::" in an ipv6 address.
                            return(false);
                        }

                        doubleColonFlag = true;
                    }

                    //This means that we reached invalid characters in the previous section
                    if (!hexSection)
                    {
                        return(false);
                    }

                    hexSection = true;                         //reset hex to true
                    hexDigits  = 0;                            //reset count for hex digits
                    numSections++;
                    lastSection.Remove(0, lastSection.Length); //clear last section
                    break;

                default:
                    if (zoneIndiceMode)
                    {
                        if (!CharUtils.IsUnreserved(domainArray[index]))
                        {
                            return(false);
                        }
                    }
                    else
                    {
                        lastSection.Append(domainArray[index]);                                 //collect our possible ipv4 address
                        if (hexSection && CharUtils.IsHex(domainArray[index]))
                        {
                            hexDigits++;
                        }
                        else
                        {
                            hexSection = false;                                     //non hex digit.
                        }
                    }

                    break;
                }

                if (hexDigits > 4 || numSections > 8)
                {
                    return(false);
                }

                prevChar = domainArray[index];
            }

            //numSections != 1 checks for things like: [adf]
            //If there are more than 8 sections for the address or there isn't a double colon, then it's invalid.
            return(numSections != 1 && (numSections >= 8 || doubleColonFlag));
        }
Пример #6
0
        /// <summary>
        /// Handles Hexadecimal, octal, decimal, dotted decimal, dotted hex, dotted octal.
        /// @param testDomain the string we're testing
        /// @return Returns true if it's a valid ipv4 address
        /// </summary>
        private bool IsValidIpv4(string testDomain)
        {
            var valid = false;

            if (testDomain.Length > 0)
            {
                //handling format without dots. Ex: http://2123123123123/path/a, http://0x8242343/aksdjf
                if (_dots == 0)
                {
                    try
                    {
                        long value;
                        if (testDomain.Length > 2 && testDomain[0] == '0' && testDomain[1] == 'x')
                        {
                            // hex
                            var isParsed = long.TryParse(testDomain.Substring(2), NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture, out value);
                            if (!isParsed)
                            {
                                return(true);
                            }
                        }
                        else if (testDomain[0] == '0')
                        {
                            // octal
                            var possibleDomain = testDomain.Substring(1);
                            if (OctalEncodingHelper.LooksLikeOctal(possibleDomain.AsSpan()))
                            {
                                value = Convert.ToInt64(possibleDomain, 8);
                            }
                            else
                            {
                                return(false);
                            }
                        }
                        else
                        {
                            // decimal
                            var isParsed = long.TryParse(testDomain, out value);
                            if (!isParsed)
                            {
                                return(false);
                            }
                        }

                        valid = value <= MAX_NUMERIC_DOMAIN_VALUE && value >= MIN_NUMERIC_DOMAIN_VALUE;
                    }
                    catch (Exception)
                    {
                        valid = false;
                    }
                }
                else if (_dots == 3)
                {
                    //Dotted decimal/hex/octal format
                    var parts = CharUtils.SplitByDot(testDomain);
                    valid = true;

                    //check each part of the ip and make sure its valid.
                    for (var i = 0; i < parts.Length && valid; i++)
                    {
                        var part = parts[i];
                        if (part.Length > 0)
                        {
                            string parsedNum;
                            int    @base;
                            if (part.Length > 2 && part[0] == '0' && part[1] == 'x')
                            {
                                //dotted hex
                                parsedNum = part.Substring(2);
                                @base     = 16;
                            }
                            else if (part[0] == '0')
                            {
                                //dotted octal
                                parsedNum = part.Substring(1);
                                @base     = 8;
                            }
                            else
                            {
                                //dotted decimal
                                parsedNum = part;
                                @base     = 10;
                            }

                            int section;
                            if (parsedNum.Length == 0)
                            {
                                section = 0;
                            }
                            else
                            {
                                // For efficiency, we try to avoid try/catch and instead use tryparse
                                if (@base == 16)
                                {
                                    var isParsed = int.TryParse(parsedNum, NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture, out section);
                                    if (!isParsed)
                                    {
                                        return(false);
                                    }
                                }
                                else if (@base == 10)
                                {
                                    var isParsed = int.TryParse(parsedNum, NumberStyles.AllowDecimalPoint, CultureInfo.InvariantCulture, out section);
                                    if (!isParsed)
                                    {
                                        return(false);
                                    }
                                }
                                else
                                {
                                    // for other bases, fall back to try/catch
                                    if (@base == 8 && OctalEncodingHelper.LooksLikeOctal(parsedNum.AsSpan()))
                                    {
                                        try
                                        {
                                            section = Convert.ToInt32(parsedNum, @base);
                                        }
                                        catch (Exception)
                                        {
                                            return(false);
                                        }
                                    }
                                    else
                                    {
                                        return(false);
                                    }
                                }
                            }

                            if (section < MIN_IP_PART || section > MAX_IP_PART)
                            {
                                valid = false;
                            }
                        }
                        else
                        {
                            valid = false;
                        }
                    }
                }
            }

            return(valid);
        }
Пример #7
0
        /// <summary>
        /// Reads the Dns and returns the next state the state machine should take in throwing this out, or continue processing
        /// if this is a valid domain name.
        /// @return The next state to take.
        /// </summary>
        public ReaderNextState ReadDomainName()
        {
            //Read the current, and if its bad, just return.
            if (ReadCurrent() == ReaderNextState.InvalidDomainName)
            {
                return(ReaderNextState.InvalidDomainName);
            }

            //If this is the first domain part, check if it's ip address in is hexa
            //similar to what is done on 'readCurrent' method
            bool isAllHexSoFar = (_current == null || _current.Equals("")) &&
                                 _reader.CanReadChars(3) &&
                                 ("0x".Equals(_reader.Peek(2), StringComparison.InvariantCultureIgnoreCase));

            if (isAllHexSoFar)
            {
                //Append hexa radix symbol characters (0x)
                _buffer.Append(_reader.Read());
                _buffer.Append(_reader.Read());
                _currentLabelLength += 2;
                _topLevelLength      = _currentLabelLength;
            }

            //while not done and not end of string keep reading.
            var done = false;

            while (!done && !_reader.Eof())
            {
                var curr = _reader.Read();

                if (curr == '/')
                {
                    //continue by reading the path
                    return(CheckDomainNameValid(ReaderNextState.ReadPath, curr));
                }

                if (curr == ':' && (!_seenBracket || _seenCompleteBracketSet))
                {
                    //Don't check for a port if it's in the middle of an ipv6 address
                    //continue by reading the port.
                    return(CheckDomainNameValid(ReaderNextState.ReadPort, curr));
                }

                if (curr == '?')
                {
                    //continue by reading the query string
                    return(CheckDomainNameValid(ReaderNextState.ReadQueryString, curr));
                }

                if (curr == '#')
                {
                    //continue by reading the fragment
                    return(CheckDomainNameValid(ReaderNextState.ReadFragment, curr));
                }
                else if (curr == '@')
                {
                    //this may not have been a domain after all, but rather a username/password instead
                    _reader.GoBack();
                    return(ReaderNextState.ReadUserPass);
                }
                else if (CharUtils.IsDot(curr) ||
                         curr == '%' && _reader.CanReadChars(2) && _reader.Peek(2).Equals(HEX_ENCODED_DOT, StringComparison.InvariantCultureIgnoreCase))
                {
                    //if the current character is a dot or a urlEncodedDot

                    //handles the case: hello..
                    if (_currentLabelLength < 1)
                    {
                        done = true;
                    }
                    else
                    {
                        //append the "." to the domain name
                        _buffer.Append(curr);

                        //if it was not a normal dot, then it is url encoded
                        //read the next two chars, which are the hex representation
                        if (!CharUtils.IsDot(curr))
                        {
                            _buffer.Append(_reader.Read());
                            _buffer.Append(_reader.Read());
                        }

                        //increment the dots only if it's not part of the zone index and reset the last length.
                        if (!_zoneIndex)
                        {
                            _dots++;
                            _currentLabelLength = 0;
                        }

                        //if the length of the last section is longer than or equal to 64, it's too long to be a valid domain
                        if (_currentLabelLength >= MAX_LABEL_LENGTH)
                        {
                            return(ReaderNextState.InvalidDomainName);
                        }
                    }
                }
                else if (_seenBracket && (CharUtils.IsHex(curr) || curr == ':' || curr == '[' || curr == ']' || curr == '%') &&
                         !_seenCompleteBracketSet)
                {
                    //if this is an ipv6 address.
                    switch (curr)
                    {
                    case ':':
                        _currentLabelLength = 0;
                        break;

                    case '[':
                        // if we read another '[', we need to restart by re-reading from this bracket instead.
                        _reader.GoBack();
                        return(ReaderNextState.InvalidDomainName);

                    case ']':
                        _seenCompleteBracketSet = true;                     //means that we already have a complete ipv6 address.
                        _zoneIndex = false;                                 //set this back off so that we can keep counting dots after ipv6 is over.
                        break;

                    case '%':                             //set flag to subtract subsequent dots because it's part of the zone index
                        _zoneIndex = true;
                        break;

                    default:
                        _currentLabelLength++;
                        break;
                    }

                    _numeric = false;
                    _buffer.Append(curr);
                }
                else if (CharUtils.IsAlphaNumeric(curr) || curr == '-' || curr >= INTERNATIONAL_CHAR_START)
                {
                    //Valid domain name character. Either a-z, A-Z, 0-9, -, or international character
                    if (_seenCompleteBracketSet)
                    {
                        //covers case of [fe80::]www.google.com
                        _reader.GoBack();
                        done = true;
                    }
                    else
                    {
                        if (isAllHexSoFar && !CharUtils.IsHex(curr))
                        {
                            _numeric = false;
                        }
                        //if its not numeric, remember that;
                        if (!isAllHexSoFar && !CharUtils.IsNumeric(curr))
                        {
                            _numeric = false;
                        }

                        //append to the states.
                        _buffer.Append(curr);
                        _currentLabelLength++;
                        _topLevelLength = _currentLabelLength;
                    }
                }
                else if (curr == '[' && !_seenBracket)
                {
                    _seenBracket = true;
                    _numeric     = false;
                    _buffer.Append(curr);
                }
                else if (curr == '[' && _seenCompleteBracketSet)
                {
                    //Case where [::][ ...
                    _reader.GoBack();
                    done = true;
                }
                else if (curr == '%' && _reader.CanReadChars(2) && CharUtils.IsHex(_reader.PeekChar(0)) &&
                         CharUtils.IsHex(_reader.PeekChar(1)))
                {
                    //append to the states.
                    _buffer.Append(curr);
                    _buffer.Append(_reader.Read());
                    _buffer.Append(_reader.Read());
                    _currentLabelLength += 3;
                    _topLevelLength      = _currentLabelLength;
                }
                else
                {
                    //called to increment the count of matching characters
                    //_characterHandler.addCharacter(curr);

                    _characterHandler(curr);

                    //invalid character, we are done.
                    done = true;
                }
            }

            //Check the domain name to make sure its ok.
            return(CheckDomainNameValid(ReaderNextState.ValidDomainName, null));
        }
Пример #8
0
        /// <summary>
        /// Reads and parses the current string to make sure the domain name started where it was supposed to,
        /// and the current domain name is correct.
        /// @return The next state to use after reading the current.
        /// </summary>
        private ReaderNextState ReadCurrent()
        {
            if (_current != null)
            {
                //Handles the case where the string is ".hello"
                if (_current.Length == 1 && CharUtils.IsDot(_current[0]))
                {
                    return(ReaderNextState.InvalidDomainName);
                }

                if (_current.Length == 3 && _current.Equals("%" + HEX_ENCODED_DOT, StringComparison.InvariantCultureIgnoreCase))
                {
                    return(ReaderNextState.InvalidDomainName);
                }

                //The location where the domain name started.
                _startDomainName = _buffer.Length - _current.Length;

                //flag that the domain is currently all numbers and/or dots.
                _numeric = true;

                //If an invalid char is found, we can just restart the domain from there.
                var newStart = 0;

                var currArray = _current.ToCharArray();
                var length    = currArray.Length;

                //hex special case
                var isAllHexSoFar = length > 2 && currArray[0] == '0' && (currArray[1] == 'x' || currArray[1] == 'X');

                var index = isAllHexSoFar ? 2 : 0;
                var done  = false;

                while (index < length && !done)
                {
                    //get the current character and update length counts.
                    var curr = currArray[index];
                    _currentLabelLength++;
                    _topLevelLength = _currentLabelLength;

                    //Is the length of the last part > 64 (plus one since we just incremented)
                    if (_currentLabelLength > MAX_LABEL_LENGTH)
                    {
                        return(ReaderNextState.InvalidDomainName);
                    }

                    if (CharUtils.IsDot(curr))
                    {
                        //found a dot. Increment dot count, and reset last length
                        _dots++;
                        _currentLabelLength = 0;
                    }
                    else if (curr == '[')
                    {
                        _seenBracket = true;
                        _numeric     = false;
                    }
                    else if (curr == '%' && index + 2 < length && CharUtils.IsHex(currArray[index + 1]) &&
                             CharUtils.IsHex(currArray[index + 2]))
                    {
                        //handle url encoded dot
                        if (currArray[index + 1] == '2' && currArray[index + 2] == 'e')
                        {
                            _dots++;
                            _currentLabelLength = 0;
                        }
                        else
                        {
                            _numeric = false;
                        }

                        index += 2;
                    }
                    else if (isAllHexSoFar)
                    {
                        //if it's a valid character in the domain that is not numeric
                        if (!CharUtils.IsHex(curr))
                        {
                            _numeric      = false;
                            isAllHexSoFar = false;
                            index--;                             //backtrack to rerun last character knowing it isn't hex.
                        }
                    }
                    else if (CharUtils.IsAlpha(curr) || curr == '-' || curr >= INTERNATIONAL_CHAR_START)
                    {
                        _numeric = false;
                    }
                    else if (!CharUtils.IsNumeric(curr) && !_options.HasFlag(UrlDetectorOptions.ALLOW_SINGLE_LEVEL_DOMAIN))
                    {
                        //if its not _numeric and not alphabetical, then restart searching for a domain from this point.
                        newStart            = index + 1;
                        _currentLabelLength = 0;
                        _topLevelLength     = 0;
                        _numeric            = true;
                        _dots = 0;
                        done  = true;
                    }

                    index++;
                }

                //An invalid character for the domain was found somewhere in the current buffer.
                //cut the first part of the domain out. For example:
                // http://asdf%asdf.google.com <- asdf.google.com is still valid, so restart from the %
                if (newStart > 0)
                {
                    //make sure the location is not at the end. Otherwise the thing is just invalid.
                    if (newStart < _current.Length)
                    {
                        _buffer.Clear();
                        _buffer.Append(_current.Substring(newStart));

                        //_buffer.Replace(0, _buffer.Length(), _current.javaSubstring(newStart));

                        //cut out the previous part, so now the domain name has to be from here.
                        _startDomainName = 0;
                    }

                    //now after cutting if the buffer is just "." newStart > current (last character in current is invalid)
                    if (newStart >= _current.Length || _buffer.ToString().Equals("."))
                    {
                        return(ReaderNextState.InvalidDomainName);
                    }
                }
            }
            else
            {
                _startDomainName = _buffer.Length;
            }

            //all else is good, return OK
            return(ReaderNextState.ValidDomainName);
        }