C# (CSharp) urldetector.detection CharUtils.IsAlpha Examples

Programming Language: C# (CSharp)

Namespace/Package Name: urldetector.detection

Class/Type: CharUtils

Method/Function: IsAlpha

Examples at hotexamples.com: 2

C# (CSharp) urldetector.detection CharUtils.IsAlpha - 2 examples found. These are the top rated real world C# (CSharp) examples of urldetector.detection.CharUtils.IsAlpha extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

IsHex(4)

IsDot(3)

IsNumeric(3)

IsAlpha(2)

IsAlphaNumeric(1)

IsUnreserved(1)

IsWhiteSpace(1)

SplitByDot(1)

Example #1

Show file

        /// <summary>
        /// Reads the scheme and allows returns true if the scheme is in our allowed collection (e.g. http(s?):// or ftp(s?)://)
        /// @return True if the scheme was found, else false.
        /// </summary>
        private bool ReadScheme()
        {
            var originalLength = _buffer.Length;
            var numSlashes     = 0;

            while (!_reader.Eof())
            {
                var curr = _reader.Read();

                //if we match a slash, look for a second one.
                if (curr == '/')
                {
                    _buffer.Append(curr);
                    if (numSlashes == 1)
                    {
                        // return only if the buffer currently ends with an approved protocol.
                        // When we have buffered a string like: ":u(https://test.co" and are scanning the ':', we
                        // consider this to have found a scheme (the https bit only, which will be parsed out later)
                        var bufferedUrlContent = _buffer.ToString().ToLowerInvariant();

                        // return only if we detect an approved protocol at the end of the current buffer. For
                        // efficiency, first check an exact match
                        if (ValidSchemesSuffixed.Contains(bufferedUrlContent))
                        {
                            _currentUrlMarker.SetIndex(UrlPart.SCHEME, 0);
                            return(true);
                        }

                        // If no exact match found, try to find a valid scheme in the trailing content of the current buffer,
                        // starting with the longest matches available (e.g. sftp:// rather than ftp://
                        for (var i = ValidSchemesSuffixedOrdered.Length - 1; i >= 0; i--)
                        {
                            var vss = ValidSchemesSuffixedOrdered[i];
                            if (bufferedUrlContent.EndsWith(vss))
                            {
                                // see if we need to remove extra characters from the start of the buffer
                                if (bufferedUrlContent.Length > vss.Length)
                                {
                                    _buffer.Remove(0, bufferedUrlContent.Length - vss.Length);
                                }
                                _currentUrlMarker.SetIndex(UrlPart.SCHEME, 0);
                                return(true);
                            }
                        }
                        return(false);
                    }

                    numSlashes++;
                }
                else if (curr == ' ' || CheckMatchingCharacter(curr) != CharacterMatch.CharacterNotMatched)
                {
                    //if we find a space or end of input, then nothing found.
                    _buffer.Append(curr);
                    return(false);
                }
                else if (curr == '[')
                {
                    //if we're starting to see an ipv6 address
                    _reader.GoBack();                     //unread the '[', so that we can start looking for ipv6
                    return(false);
                }
                else if (originalLength > 0 || numSlashes > 0 || !CharUtils.IsAlpha(curr))
                {
                    // if it's not a character a-z or A-Z then assume we aren't matching scheme, but instead
                    // matching username and password.
                    _reader.GoBack();
                    return(ReadUserPass(0));
                }
            }

            return(false);
        }

Example #2

Show file

File: DomainNameReader.cs Project: eladaus/URL-Detector

        /// <summary>
        /// Reads and parses the current string to make sure the domain name started where it was supposed to,
        /// and the current domain name is correct.
        /// @return The next state to use after reading the current.
        /// </summary>
        private ReaderNextState ReadCurrent()
        {
            if (_current != null)
            {
                //Handles the case where the string is ".hello"
                if (_current.Length == 1 && CharUtils.IsDot(_current[0]))
                {
                    return(ReaderNextState.InvalidDomainName);
                }

                if (_current.Length == 3 && _current.Equals("%" + HEX_ENCODED_DOT, StringComparison.InvariantCultureIgnoreCase))
                {
                    return(ReaderNextState.InvalidDomainName);
                }

                //The location where the domain name started.
                _startDomainName = _buffer.Length - _current.Length;

                //flag that the domain is currently all numbers and/or dots.
                _numeric = true;

                //If an invalid char is found, we can just restart the domain from there.
                var newStart = 0;

                var currArray = _current.ToCharArray();
                var length    = currArray.Length;

                //hex special case
                var isAllHexSoFar = length > 2 && currArray[0] == '0' && (currArray[1] == 'x' || currArray[1] == 'X');

                var index = isAllHexSoFar ? 2 : 0;
                var done  = false;

                while (index < length && !done)
                {
                    //get the current character and update length counts.
                    var curr = currArray[index];
                    _currentLabelLength++;
                    _topLevelLength = _currentLabelLength;

                    //Is the length of the last part > 64 (plus one since we just incremented)
                    if (_currentLabelLength > MAX_LABEL_LENGTH)
                    {
                        return(ReaderNextState.InvalidDomainName);
                    }

                    if (CharUtils.IsDot(curr))
                    {
                        //found a dot. Increment dot count, and reset last length
                        _dots++;
                        _currentLabelLength = 0;
                    }
                    else if (curr == '[')
                    {
                        _seenBracket = true;
                        _numeric     = false;
                    }
                    else if (curr == '%' && index + 2 < length && CharUtils.IsHex(currArray[index + 1]) &&
                             CharUtils.IsHex(currArray[index + 2]))
                    {
                        //handle url encoded dot
                        if (currArray[index + 1] == '2' && currArray[index + 2] == 'e')
                        {
                            _dots++;
                            _currentLabelLength = 0;
                        }
                        else
                        {
                            _numeric = false;
                        }

                        index += 2;
                    }
                    else if (isAllHexSoFar)
                    {
                        //if it's a valid character in the domain that is not numeric
                        if (!CharUtils.IsHex(curr))
                        {
                            _numeric      = false;
                            isAllHexSoFar = false;
                            index--;                             //backtrack to rerun last character knowing it isn't hex.
                        }
                    }
                    else if (CharUtils.IsAlpha(curr) || curr == '-' || curr >= INTERNATIONAL_CHAR_START)
                    {
                        _numeric = false;
                    }
                    else if (!CharUtils.IsNumeric(curr) && !_options.HasFlag(UrlDetectorOptions.ALLOW_SINGLE_LEVEL_DOMAIN))
                    {
                        //if its not _numeric and not alphabetical, then restart searching for a domain from this point.
                        newStart            = index + 1;
                        _currentLabelLength = 0;
                        _topLevelLength     = 0;
                        _numeric            = true;
                        _dots = 0;
                        done  = true;
                    }

                    index++;
                }

                //An invalid character for the domain was found somewhere in the current buffer.
                //cut the first part of the domain out. For example:
                // http://asdf%asdf.google.com <- asdf.google.com is still valid, so restart from the %
                if (newStart > 0)
                {
                    //make sure the location is not at the end. Otherwise the thing is just invalid.
                    if (newStart < _current.Length)
                    {
                        _buffer.Clear();
                        _buffer.Append(_current.Substring(newStart));

                        //_buffer.Replace(0, _buffer.Length(), _current.javaSubstring(newStart));

                        //cut out the previous part, so now the domain name has to be from here.
                        _startDomainName = 0;
                    }

                    //now after cutting if the buffer is just "." newStart > current (last character in current is invalid)
                    if (newStart >= _current.Length || _buffer.ToString().Equals("."))
                    {
                        return(ReaderNextState.InvalidDomainName);
                    }
                }
            }
            else
            {
                _startDomainName = _buffer.Length;
            }

            //all else is good, return OK
            return(ReaderNextState.ValidDomainName);
        }