Esempio n. 1
0
        /// <summary>
        /// First the match case option is checked, then the term translations and their
        /// source term positions are retrieved with FindMatches. Finally the term
        /// translations are inserted with InjectMatches
        /// </summary>
        /// <param name="text"></param>
        public void VisitText(Text text)
        {
            bool matchCase        = _options.MatchCase == "true" ? true : false;
            bool useBoundaryChars = _options.UseBoundaryCharacters == "true" ? true : false;

            //List containing all four types of matches: normal and regex matches plus normal and regex replace matches
            List <PositionAndTranslation> allMatches = new List <PositionAndTranslation>();

            //Add normal and regex matches

            allMatches.AddRange(_trieProcessor.FindMatches(this._trie, text.ToString(), _options.TokenBoundaryCharacters, matchCase, useBoundaryChars));
            allMatches.AddRange(_trieProcessor.FindRegexMatches(this._regexTrie,
                                                                text.ToString(),
                                                                _options.TokenBoundaryCharacters,
                                                                useBoundaryChars));

            //Add the results of the secondary regex tries to the match list
            //The match discardal in the case of secondary regex trie matches using groups should be implemented here, possibly with a switch
            allMatches.AddRange(_trieProcessor.FindRegexMatches(
                                    this.SndRegexTrie, text.ToString(), _options.TokenBoundaryCharacters, useBoundaryChars));
            allMatches.AddRange(_trieProcessor.FindMatches(
                                    this.SndTrie, text.ToString(), _options.TokenBoundaryCharacters, matchCase, useBoundaryChars));

            //If there are matches, remove the overlaps and inject them
            if (allMatches.Count > 0)
            {
                this._positionAndTranslationOfTerms = _trieProcessor.RemoveOverLaps(allMatches);
                this._segment.Add(_trieProcessor.InjectMatches(text.ToString(), _positionAndTranslationOfTerms));
                this._originalSegmentChanged = true;
            }
            else
            {
                _segment.Add(text);
            }
        }
Esempio n. 2
0
        /// <summary>
        /// First the match case option is checked, then the term translations and their
        /// source term positions are retrieved with FindMatches.
        /// </summary>
        /// <param name="text"></param>


        //This used to be broken (only the last element would be taken into account), I fixed it 15.11.2011
        public void VisitText(Text text)
        {
            bool matchCase        = _options.MatchCase == "true" ? true : false;
            bool useBoundaryChars = _options.UseBoundaryCharacters == "true" ? true : false;
            List <PositionAndTranslation> allMatches = new List <PositionAndTranslation>();

            allMatches.AddRange(_trieProcessor.FindMatches(_trie, text.ToString(), _options.TokenBoundaryCharacters, matchCase, useBoundaryChars));
            allMatches.AddRange(_trieProcessor.FindRegexMatches(_regexTrie, text.ToString(), _options.TokenBoundaryCharacters, useBoundaryChars));
            _TermList.AddRange(_trieProcessor.RemoveOverLaps(allMatches));
        }