Exemplo n.º 1
0
        public static int IndexOfAny(this ReadOnlySpan <char> @this, CharacterSet chars)
        {
            for (var searchLocation = 0; searchLocation < @this.Length; searchLocation++)
            {
                if (chars.Contains(@this[searchLocation]))
                {
                    return(searchLocation);
                }
            }

            return(-1);
        }
        public override Core.Tokenization.Token Recognize(string s, int from, bool allowTokenBundles, ref int consumedLength)
        {
            if (String.IsNullOrEmpty(s) || from >= s.Length)
            {
                return(null);
            }

            consumedLength = 0;
            int originalStart = from;

            if (_DefaultPunctCharset.Contains(s[from]))
            {
                while (from < s.Length && _DefaultPunctCharset.Contains(s[from]))
                {
                    ++consumedLength;
                    ++from;
                }
                Token t = new SimpleToken(s.Substring(originalStart, consumedLength), TokenType.GeneralPunctuation);
                return(t);
            }

            System.Text.RegularExpressions.Match m = _DefaultWordRegex.Match(s, from);
            if (m != null && m.Success && m.Index == from)
            {
                consumedLength = m.Length;
                Token t = new SimpleToken(m.Value, TokenType.Word);
                return(t);
            }

            /*
             *      AUTOMATON PUNCT [U+3000-U+303FU+3200-U+32FFU+FF01-U+FF0FU+FF1A-U+FF20U+FF3B-U+FF3DU+FF5B-U+FF64]
             *      NFA WORD [U+30A0-U+30FFU+FF65-U+FF9F]+
             *      NFA WORD [U+3040-U+3091U+3093-U+309F]+
             *      NFA WORD [U+3092]
             *      NFA WORD [U+4E00-U+9FFF]+
             *      NFA WORD [U+FF21-U+FF3AU+FF41-U+FF5A]+
             */

            return(base.Recognize(s, from, allowTokenBundles, ref consumedLength));
        }
Exemplo n.º 3
0
        public override Core.Tokenization.Token Recognize(string s, int from, bool allowTokenBundles, ref int consumedLength)
        {
            if (String.IsNullOrEmpty(s) || from >= s.Length)
            {
                return(null);
            }

            consumedLength = 0;
            int originalStart = from;

            if (_DefaultPunctCharset.Contains(s[from]))
            {
                while (from < s.Length && _DefaultPunctCharset.Contains(s[from]))
                {
                    ++consumedLength;
                    ++from;
                }
                Token t = new SimpleToken(s.Substring(originalStart, consumedLength), TokenType.GeneralPunctuation);
                return(t);
            }

            if (s[from] >= 0x4e00 && s[from] <= 0x9fff)
            {
                while (from < s.Length && s[from] >= 0x4e00 && s[from] <= 0x9fff)
                {
                    ++consumedLength;
                    ++from;
                }
                Token t = new SimpleToken(s.Substring(originalStart, consumedLength), TokenType.CharSequence);
                return(t);
            }

            // TODO CJK punctuation etc.

            return(base.Recognize(s, from, allowTokenBundles, ref consumedLength));
        }
Exemplo n.º 4
0
        public static void RemoveChars(this StringBuilder @this, CharacterSet chars)
        {
            var nextWriteLocation = 0;

            for (var searchLocation = 0; searchLocation < @this.Length; searchLocation++)
            {
                var c = @this[searchLocation];
                if (!chars.Contains(c))
                {
                    @this[nextWriteLocation] = c;
                    nextWriteLocation++;
                }
            }

            @this.Remove(nextWriteLocation, @this.Length - nextWriteLocation);
        }
Exemplo n.º 5
0
        public string ParseCharactersFromCharSet(CharacterSet charSet, bool shouldIncludeChars = true, int maxCount = -1)
        {
            if (maxCount == -1)
            {
                maxCount = int.MaxValue;
            }

            int startIndex = index;

            // Optimisation from profiling:
            // Store in temporary local variables
            // since they're properties that would have to access
            // the rule stack every time otherwise.
            int i  = index;
            int li = lineIndex;

            int count = 0;

            while (i < _chars.Length && charSet.Contains(_chars [i]) == shouldIncludeChars && count < maxCount)
            {
                if (_chars [i] == '\n')
                {
                    li++;
                }
                i++;
                count++;
            }

            index     = i;
            lineIndex = li;

            int lastCharIndex = index;

            if (lastCharIndex > startIndex)
            {
                return(new string (_chars, startIndex, index - startIndex));
            }
            else
            {
                return(null);
            }
        }
Exemplo n.º 6
0
            private float GetScore(string input)
            {
                if (string.IsNullOrEmpty(input))
                {
                    return(0);
                }

                float finalScore;
                bool  cacheHit = scoreCache.TryGetValue(input, out finalScore);

                if (!cacheHit)
                {
                    finalScore = ScoreForAbbreviation(input, textBoxCache);
                    scoreCache.Add(input, finalScore);
                }
                return(finalScore);

                // https://github.com/quicksilver/Quicksilver/blob/8be7395b795179cf51cf30ebf82779e0f9ba2138/Quicksilver/Code-QuickStepFoundation/QSSense.m
                float ScoreForAbbreviation(String str, String abbr)
                {
                    return(ScoreForAbbreviationWithRanges(str, abbr, new StringRange(0, str.Length), new StringRange(0, abbr.Length)));
                }

                float ScoreForAbbreviationWithRanges(String str, String abbr, StringRange strRange, StringRange abbrRange)
                {
                    const float IGNORED_SCORE = 0.9f;
                    const float SKIPPED_SCORE = 0.15f;

                    if (abbrRange.Length == 0)
                    {
                        return(IGNORED_SCORE); //deduct some points for all remaining letters
                    }
                    if (abbrRange.Length > strRange.Length)
                    {
                        return(0.0f);
                    }

                    float       score = 0.0f, remainingScore = 0.0f;
                    int         i, j;
                    StringRange matchedRange, remainingStrRange = new StringRange(0, 0), adjustedStrRange = strRange;

                    for (i = abbrRange.Length; i > 0; i--)
                    {
                        //Search for steadily smaller portions of the abbreviation
                        String curAbbr = abbr.Substring(abbrRange.Start, i);
                        int    idx     = str.IndexOf(curAbbr, adjustedStrRange.Start, adjustedStrRange.Length - abbrRange.Length + i, StringComparison.CurrentCultureIgnoreCase);
                        matchedRange = new StringRange(idx, curAbbr.Length);
                        if (idx == -1)
                        {
                            // not found
                            continue;
                        }

                        remainingStrRange.Start  = matchedRange.Start + matchedRange.Length;
                        remainingStrRange.Length = strRange.Start + strRange.Length - remainingStrRange.Start;

                        // Search what is left of the string with the rest of the abbreviation
                        remainingScore = ScoreForAbbreviationWithRanges(str, abbr, remainingStrRange, new StringRange(abbrRange.Start + i, abbrRange.Length - i));

                        if (remainingScore != 0)
                        {
                            score = remainingStrRange.Start - strRange.Start;
                            // ignore skipped characters if is first letter of a word
                            if (matchedRange.Start > strRange.Start)
                            {//if some letters were skipped
                                if (WordSeperators.Contains(str.ElementAt(matchedRange.Start - 1)))
                                {
                                    for (j = matchedRange.Start - 2; j >= strRange.Start; j--)
                                    {
                                        if (WordSeperators.Contains(str.ElementAt(j)))
                                        {
                                            score--;
                                        }
                                        else
                                        {
                                            score -= SKIPPED_SCORE;
                                        }
                                    }
                                }
                                else if (Uppercase.Contains(str.ElementAt(matchedRange.Start)))
                                {
                                    for (j = matchedRange.Start - 1; j >= strRange.Start; j--)
                                    {
                                        if (Uppercase.Contains(str.ElementAt(j)))
                                        {
                                            score--;
                                        }
                                        else
                                        {
                                            score -= SKIPPED_SCORE;
                                        }
                                    }
                                }
                                else
                                {
                                    score -= (matchedRange.Start - strRange.Start) / 2;
                                }
                            }
                            score += remainingScore * remainingStrRange.Length;
                            score /= strRange.Length;
                            return(score);
                        }
                    }
                    return(0.0f);
                }
            }
Exemplo n.º 7
0
        public string ParseUntil(ParseRule stopRule, CharacterSet pauseCharacters = null, CharacterSet endCharacters = null)
        {
            int ruleId = BeginRule();


            CharacterSet pauseAndEnd = new CharacterSet();

            if (pauseCharacters != null)
            {
                pauseAndEnd.UnionWith(pauseCharacters);
            }
            if (endCharacters != null)
            {
                pauseAndEnd.UnionWith(endCharacters);
            }

            StringBuilder parsedString      = new StringBuilder();
            object        ruleResultAtPause = null;

            // Keep attempting to parse strings up to the pause (and end) points.
            //  - At each of the pause points, attempt to parse according to the rule
            //  - When the end point is reached (or EOF), we're done
            do
            {
                // TODO: Perhaps if no pause or end characters are passed, we should check *every* character for stopRule?
                string partialParsedString = ParseUntilCharactersFromCharSet(pauseAndEnd);
                if (partialParsedString != null)
                {
                    parsedString.Append(partialParsedString);
                }

                // Attempt to run the parse rule at this pause point
                ruleResultAtPause = Peek(stopRule);

                // Rule completed - we're done
                if (ruleResultAtPause != null)
                {
                    break;
                }
                else
                {
                    if (endOfInput)
                    {
                        break;
                    }

                    // Reached a pause point, but rule failed. Step past and continue parsing string
                    char pauseCharacter = currentCharacter;
                    if (pauseCharacters != null && pauseCharacters.Contains(pauseCharacter))
                    {
                        parsedString.Append(pauseCharacter);
                        if (pauseCharacter == '\n')
                        {
                            lineIndex++;
                        }
                        index++;
                        continue;
                    }
                    else
                    {
                        break;
                    }
                }
            } while(true);

            if (parsedString.Length > 0)
            {
                return((string)SucceedRule(ruleId, parsedString.ToString()));
            }
            else
            {
                return((string)FailRule(ruleId));
            }
        }
Exemplo n.º 8
0
        public override Core.Tokenization.Token Recognize(string s, int from, bool allowTokenBundles, ref int consumedLength)
        {
            // TODO this could test whether the match is followed by whitespace or non-word characters (punctuation etc) -
            //  that would be simpler than inlcuding the respective constraints in the RX

            Token winner          = null;
            int   winningLength   = 0;
            int   winningPriority = 0;

            for (int p = 0; p < _Patterns.Count; ++p)
            {
                System.Text.RegularExpressions.Regex rx = _Patterns[p].Regex;
                CharacterSet first = _Patterns[p].First;

                // NOTE if the requirement that m.Index == from is dropped, we cannot use FIRST any more
                if (first != null && from < s.Length && !first.Contains(s[from]))
                {
                    continue;
                }

                System.Text.RegularExpressions.Match m = rx.Match(s, from);
                if (m != null && m.Success && m.Index == from)
                {
                    if (VerifyContextConstraints(s, m.Index + m.Value.Length))
                    {
                        Token t = CreateToken(m.Value, m.Groups);
                        // TODO set other token values?
                        if (t != null && m.Length > 0)
                        {
                            // longest wins, if two matches are found with equal length, the one with
                            //  higher prio wins, or if both have same prio, first match wins
                            if ((m.Length > winningLength) ||
                                winner == null ||
                                (m.Length == winningLength && _Patterns[p].Priority > winningPriority && !allowTokenBundles))
                            {
                                winningLength   = m.Length;
                                winner          = t;
                                winningPriority = _Patterns[p].Priority;
                            }
                            else if (allowTokenBundles && m.Length == winningLength)
                            {
                                if (!(winner is TokenBundle))
                                {
                                    winner = new TokenBundle(winner, winningPriority);
                                }

                                ((TokenBundle)winner).Add(t, _Patterns[p].Priority);
                                winningPriority = Math.Max(winningPriority, _Patterns[p].Priority);
                            }
                        }
                    }
                }
            }

            if (winner != null)
            {
                consumedLength = winningLength;
                return(winner);
            }
            else
            {
                return(null);
            }
        }