Exemplo n.º 1
0
        /// <summary>
        /// Tokenize by bracket.
        /// </summary>
        private void TokenizeByBrackets()
        {
            string matchingBracket = null;

            Func <int, int, int> findFirstBracket = (start, end) =>
            {
                for (var i = start; i < end; i++)
                {
                    foreach (var bracket in Brackets)
                    {
                        if (_filename[i].Equals(char.Parse(bracket.Item1)))
                        {
                            matchingBracket = bracket.Item2;
                            return(i);
                        }
                    }
                }

                return(-1);
            };

            var isBracketOpen = false;

            for (var i = 0; i < _filename.Length;)
            {
                int foundIdx;
                if (!isBracketOpen)
                {
                    // Look for opening brace
                    foundIdx = findFirstBracket(i, _filename.Length);
                }
                else
                {
                    // Look for closing brace
                    foundIdx = _filename.IndexOf(matchingBracket, i);
                }

                var range = new TokenRange(i, foundIdx == -1 ? _filename.Length : foundIdx - i);
                if (range.Size > 0)
                {
                    // Check if our range contains any known anime identifiers
                    TokenizeByPreidentified(isBracketOpen, range);
                }

                if (foundIdx != -1)
                {
                    // mark as bracket
                    AddToken(Token.TokenCategory.Bracket, true, new TokenRange(range.Offset + range.Size, 1));
                    isBracketOpen = !isBracketOpen;
                    i             = foundIdx + 1;
                }
                else
                {
                    break;
                }
            }
        }
Exemplo n.º 2
0
        /// <summary>
        /// Given a particular <code>filename</code> and <code>range</code> attempt to preidentify the token before we attempt the main parsing logic
        /// </summary>
        /// <param name="filename">the filename</param>
        /// <param name="range">the search range</param>
        /// <param name="elements">elements array that any pre-identified elements will be added to</param>
        /// <param name="preidentifiedTokens">elements array that any pre-identified token ranges will be added to</param>
        public void PeekAndAdd(string filename, TokenRange range, List <Element> elements,
                               List <TokenRange> preidentifiedTokens)
        {
            var endR   = range.Offset + range.Size;
            var search = filename.Substring(range.Offset, endR > filename.Length ? filename.Length - range.Offset : endR - range.Offset);

            foreach (var entry in PeekEntries)
            {
                foreach (var keyword in entry.Item2)
                {
                    var foundIdx = search.IndexOf(keyword, StringComparison.CurrentCulture);
                    if (foundIdx != -1)
                    {
                        foundIdx += range.Offset;
                        elements.Add(new Element(entry.Item1, keyword));
                        preidentifiedTokens.Add(new TokenRange(foundIdx, keyword.Length));
                    }
                }
            }
        }
Exemplo n.º 3
0
        private string GetDelimiters(TokenRange range)
        {
            var delimiters = new StringBuilder();

            bool IsDelimiter(char c)
            {
                if (StringHelper.IsAlphanumericChar(c))
                {
                    return(false);
                }
                return(_options.AllowedDelimiters.Contains(c.ToString()) && !delimiters.ToString().Contains(c.ToString()));
            }

            foreach (var i in Enumerable.Range(range.Offset, Math.Min(_filename.Length, range.Offset + range.Size) - range.Offset)
                     .Where(value => IsDelimiter(_filename[value])))
            {
                delimiters.Append(_filename[i]);
            }

            return(delimiters.ToString());
        }
Exemplo n.º 4
0
        /// <summary>
        /// Tokenize by delimiters allowed in <see cref="Options"/>.AllowedDelimiters.
        /// </summary>
        /// <param name="enclosed">whether or not the current <code>range</code> is enclosed in braces</param>
        /// <param name="range">the token range</param>
        private void TokenizeByDelimiters(bool enclosed, TokenRange range)
        {
            var delimiters = GetDelimiters(range);

            if (string.IsNullOrEmpty(delimiters))
            {
                AddToken(Token.TokenCategory.Unknown, enclosed, range);
                return;
            }

            for (int i = range.Offset, end = range.Offset + range.Size; i < end;)
            {
                var found = Enumerable.Range(i, Math.Min(end, _filename.Length) - i)
                            .Where(c => delimiters.Contains(_filename[c].ToString()))
                            .DefaultIfEmpty(end)
                            .FirstOrDefault();

                TokenRange subRange = new TokenRange(i, found - i);
                if (subRange.Size > 0)
                {
                    AddToken(Token.TokenCategory.Unknown, enclosed, subRange);
                }

                if (found != end)
                {
                    AddToken(Token.TokenCategory.Delimiter, enclosed, new TokenRange(subRange.Offset + subRange.Size, 1));
                    i = found + 1;
                }
                else
                {
                    break;
                }
            }

            ValidateDelimiterTokens();
        }
Exemplo n.º 5
0
        /// <summary>
        /// Tokenize by looking for known anime identifiers
        /// </summary>
        /// <param name="enclosed">whether or not the current <code>range</code> is enclosed in braces</param>
        /// <param name="range">the token range</param>
        private void TokenizeByPreidentified(bool enclosed, TokenRange range)
        {
            List <TokenRange> preidentifiedTokens = new List <TokenRange>();

            // Find known anime identifiers
            KeywordManager.Instance.PeekAndAdd(_filename, range, _elements, preidentifiedTokens);

            var        offset   = range.Offset;
            TokenRange subRange = new TokenRange(range.Offset, 0);

            while (offset < range.Offset + range.Size)
            {
                foreach (var preidentifiedToken in preidentifiedTokens)
                {
                    if (offset == preidentifiedToken.Offset)
                    {
                        if (subRange.Size > 0)
                        {
                            TokenizeByDelimiters(enclosed, subRange);
                        }

                        AddToken(Token.TokenCategory.Identifier, enclosed, preidentifiedToken);
                        subRange.Offset = preidentifiedToken.Offset + preidentifiedToken.Size;
                        offset          = subRange.Offset - 1; // It's going to be incremented below
                    }
                }

                subRange.Size = ++offset - subRange.Offset;
            }

            // Either there was no preidentified token range, or we're now about to process the tail of our current range
            if (subRange.Size > 0)
            {
                TokenizeByDelimiters(enclosed, subRange);
            }
        }
Exemplo n.º 6
0
 /// <summary>
 /// Adds a token to the inernal list of tokens
 /// </summary>
 /// <param name="category">the token category</param>
 /// <param name="enclosed">whether or not the token is enclosed in braces</param>
 /// <param name="range">the token range</param>
 private void AddToken(Token.TokenCategory category, bool enclosed, TokenRange range)
 {
     _tokens.Add(new Token(category, StringHelper.SubstringWithCheck(_filename, range.Offset, range.Size), enclosed));
 }