/// <summary> /// Tokenize by bracket. /// </summary> private void TokenizeByBrackets() { string matchingBracket = null; int FindFirstBracket(int start, int end) { for (var i = start; i < end; i++) { foreach (var bracket in Brackets) { if (!_filename[i].Equals(char.Parse(bracket.Item1))) { continue; } matchingBracket = bracket.Item2; return(i); } } return(-1); } var isBracketOpen = false; for (var i = 0; i < _filename.Length;) { var foundIdx = !isBracketOpen?FindFirstBracket(i, _filename.Length) : _filename.IndexOf(matchingBracket, i, StringComparison.Ordinal); var range = new TokenRange(i, foundIdx == -1 ? _filename.Length : foundIdx - i); if (range.Size > 0) { // Check if our range contains any known anime identifiers TokenizeByPreidentified(isBracketOpen, range); } if (foundIdx != -1) { // mark as bracket AddToken(Token.TokenCategory.Bracket, true, new TokenRange(range.Offset + range.Size, 1)); isBracketOpen = !isBracketOpen; i = foundIdx + 1; } else { break; } } }
/// <summary> /// Given a particular <code>filename</code> and <code>range</code> attempt to preidentify the token before we attempt the main parsing logic /// </summary> /// <param name="filename">the filename</param> /// <param name="range">the search range</param> /// <param name="elements">elements array that any pre-identified elements will be added to</param> /// <param name="preidentifiedTokens">elements array that any pre-identified token ranges will be added to</param> public static void PeekAndAdd(string filename, TokenRange range, List <Element> elements, List <TokenRange> preidentifiedTokens) { var endR = range.Offset + range.Size; var search = filename.Substring(range.Offset, endR > filename.Length ? filename.Length - range.Offset : endR - range.Offset); foreach (var entry in PeekEntries) { foreach (var keyword in entry.Item2) { var foundIdx = search.IndexOf(keyword, StringComparison.CurrentCulture); if (foundIdx == -1) { continue; } foundIdx += range.Offset; elements.Add(new Element(entry.Item1, keyword)); preidentifiedTokens.Add(new TokenRange(foundIdx, keyword.Length)); } } }
private string GetDelimiters(TokenRange range) { var delimiters = new StringBuilder(); bool IsDelimiter(char c) { if (StringHelper.IsAlphanumericChar(c)) { return(false); } return(_options.AllowedDelimiters.Contains(c.ToString()) && !delimiters.ToString().Contains(c.ToString())); } foreach (var i in Enumerable.Range(range.Offset, Math.Min(_filename.Length, range.Offset + range.Size) - range.Offset) .Where(value => IsDelimiter(_filename[value]))) { delimiters.Append(_filename[i]); } return(delimiters.ToString()); }
/// <summary> /// Tokenize by looking for known anime identifiers /// </summary> /// <param name="enclosed">whether or not the current <code>range</code> is enclosed in braces</param> /// <param name="range">the token range</param> private void TokenizeByPreidentified(bool enclosed, TokenRange range) { var preidentifiedTokens = new List <TokenRange>(); // Find known anime identifiers KeywordManager.PeekAndAdd(_filename, range, _elements, preidentifiedTokens); var offset = range.Offset; var subRange = new TokenRange(range.Offset, 0); while (offset < range.Offset + range.Size) { foreach (var preidentifiedToken in preidentifiedTokens) { if (offset != preidentifiedToken.Offset) { continue; } if (subRange.Size > 0) { TokenizeByDelimiters(enclosed, subRange); } AddToken(Token.TokenCategory.Identifier, enclosed, preidentifiedToken); subRange.Offset = preidentifiedToken.Offset + preidentifiedToken.Size; offset = subRange.Offset - 1; // It's going to be incremented below } subRange.Size = ++offset - subRange.Offset; } // Either there was no preidentified token range, or we're now about to process the tail of our current range if (subRange.Size > 0) { TokenizeByDelimiters(enclosed, subRange); } }
/// <summary> /// Tokenize by delimiters allowed in <see cref="Options"/>.AllowedDelimiters. /// </summary> /// <param name="enclosed">whether or not the current <code>range</code> is enclosed in braces</param> /// <param name="range">the token range</param> private void TokenizeByDelimiters(bool enclosed, TokenRange range) { var delimiters = GetDelimiters(range); if (string.IsNullOrEmpty(delimiters)) { AddToken(Token.TokenCategory.Unknown, enclosed, range); return; } for (int i = range.Offset, end = range.Offset + range.Size; i < end;) { var found = Enumerable.Range(i, Math.Min(end, _filename.Length) - i) .Where(c => delimiters.Contains(_filename[c].ToString())) .DefaultIfEmpty(end) .FirstOrDefault(); var subRange = new TokenRange(i, found - i); if (subRange.Size > 0) { AddToken(Token.TokenCategory.Unknown, enclosed, subRange); } if (found != end) { AddToken(Token.TokenCategory.Delimiter, enclosed, new TokenRange(subRange.Offset + subRange.Size, 1)); i = found + 1; } else { break; } } ValidateDelimiterTokens(); }
/// <summary> /// Adds a token to the internal list of tokens /// </summary> /// <param name="category">the token category</param> /// <param name="enclosed">whether or not the token is enclosed in braces</param> /// <param name="range">the token range</param> private void AddToken(Token.TokenCategory category, bool enclosed, TokenRange range) { _tokens.Add(new Token(category, StringHelper.SubstringWithCheck(_filename, range.Offset, range.Size), enclosed)); }