예제 #1
0
        protected int CalcRegExpScore(RegExpBase regExp, string text, out bool isMatch)
        {
            var scoreResult = 0;

            isMatch = false;

            try
            {
                var matches = regExp.GetFilteredMatches(text);
                if (matches.Any())
                {
                    isMatch = true;

                    var totalMatches = matches.Count;

                    regExp.IncrementTotalDocuments();
                    regExp.AddTotalMatches(totalMatches);

                    ///////////////////////////////////////////////////////////////////////////////

                    var score  = regExp.Score ?? 0;
                    var factor = regExp.Factor ?? 0;

                    scoreResult  = totalMatches * score;
                    scoreResult += (totalMatches / 2) * (totalMatches - 1) * factor;
                }
            }
            catch (Exception ex)
            {
                Logger.HandleException(ex);
            }

            return(scoreResult);
        }
예제 #2
0
        protected RegExpMatchResult Navigate_Document(RegExpBase regExp, int totalDocsCount, List <double> documents, int blockStartPosition, int position, bool forward)
        {
            var increment = forward ? 1 : -1;

            _position = position + increment;

            _unique = false;

            ///////////////////////////////////////////////////////////////////////////////

            if (forward && _position >= totalDocsCount)
            {
                _position = 0;
            }
            else if (!forward && _position < 0)
            {
                _position = totalDocsCount - 1;
            }

            ///////////////////////////////////////////////////////////////////////////////

            while (_position >= 0 && _position < totalDocsCount)
            {
                _matches.Clear();
                _matches = new List <RegExpMatchResult>();
                for (int columnIndex = 0; columnIndex < _noteColumnCount; columnIndex++)
                {
                    string documentText;
                    if (!GetDocumentTextByPosition(documents, blockStartPosition, _position, forward, out documentText, columnIndex))
                    {
                        return(RegExpMatchResult.NeedMoreDataResult());
                    }

                    ///////////////////////////////////////////////////////////////////////////////

                    _matches.AddRange(regExp.GetFilteredMatches(documentText)
                                      .Select(x => new RegExpMatchResult(regExp, _position, x, columnIndex))
                                      .ToList());
                }
                if (_matches.Count > 0)
                {
                    _currentMatchIndex = 0;

                    return(_matches[_currentMatchIndex]);
                }
                ///////////////////////////////////////////////////////////////////////////////

                _position += increment;
            }

            ///////////////////////////////////////////////////////////////////////////////

            return(RegExpMatchResult.EmptyResult());
        }
예제 #3
0
        private void CalcDocumentStatisticsSingle(int columnIndexID, List <int> columnIndexList, IDataRecord record, RegExpProcessingResultsCollection <RegExpStatisticsSingleProcessingResult> results)
        {
            if (record.IsDBNull(columnIndexID))
            {
                return;
            }

            ///////////////////////////////////////////////////////////////////////////////

            for (var i = 0; i < columnIndexList.Count; i++)
            {
                if (record.IsDBNull(columnIndexList[i]))
                {
                    return;
                }

                ///////////////////////////////////////////////////////////////////////////////

                var documentID = record.GetDouble(columnIndexID);
                var docText    = record.GetString(columnIndexList[i]);

                ///////////////////////////////////////////////////////////////////////////////

                var matches = _regExp.GetFilteredMatches(docText);
                if (matches.Any())
                {
                    matches.ForEach(x => results.Add(new RegExpStatisticsSingleProcessingResult
                    {
                        Word       = x.Value,
                        DocumentID = documentID,
                        ColumnID   = columnIndexList[i] - 1,
                        Start      = x.Index,
                        Length     = x.Length
                    }));
                }
            }
        }
예제 #4
0
        protected RegExpMatchResult Navigate_UniqueMatch(RegExpBase regExp, int totalDocsCount, List <double> documentsBlock, int blockStartPosition, int position, bool forward)
        {
            var increment = forward ? 1 : -1;

            if (_position != position)
            {
                _matches           = null;
                _currentMatchIndex = -1;

                _position = position;
                _navigationStartPosition = _position;
            }

            if (!_unique)
            {
                _matches           = null;
                _currentMatchIndex = -1;

                _unique = true;
            }

            ///////////////////////////////////////////////////////////////////////////////

            if (_matches != null)
            {
                if (_currentMatchIndex == -1)
                {
                    _currentMatchIndex = forward ? 0 : _matches.Count - 1;
                }

                _currentMatchIndex += increment;

                if (_currentMatchIndex >= 0 && _currentMatchIndex < _matches.Count)
                {
                    _position = _matches[_currentMatchIndex]
                                .Position;

                    _navigationStartPosition = _position;

                    return(_matches[_currentMatchIndex]);
                }

                _position += increment;
            }

            if (forward && _position >= totalDocsCount || !forward && _position < 0)
            {
                _position = position;
                _navigationStartPosition = _position;

                if (_matches != null)
                {
                    _currentMatchIndex = forward ? _matches.Count - 1 : 0;
                }

                return(RegExpMatchResult.EmptyResult());
            }

            ///////////////////////////////////////////////////////////////////////////////

            var comparer = new MatchEqualityComparer();

            ///////////////////////////////////////////////////////////////////////////////

            while (_position >= 0 && _position < totalDocsCount)
            {
                var hasUniqueMatches = false;

                for (int columnIndex = 0; columnIndex < _noteColumnCount; columnIndex++)
                {
                    string documentText;
                    if (!GetDocumentTextByPosition(documentsBlock, blockStartPosition, _position, forward, out documentText, columnIndex))
                    {
                        return(RegExpMatchResult.NeedMoreDataResult());
                    }

                    var matches = regExp.GetFilteredMatches(documentText)
                                  .Distinct(comparer)
                                  .ToList();
                    if (matches.Any())
                    {
                        if (_matches != null)
                        {
                            foreach (var match in matches)
                            {
                                if (_matches.All(x => x.Match.Value != match.Value))
                                {
                                    if (forward)
                                    {
                                        _matches.Add(new RegExpMatchResult(regExp, _position, match, columnIndex));
                                    }
                                    else
                                    {
                                        _matches.Insert(0, new RegExpMatchResult(regExp, _position, match, columnIndex));
                                    }

                                    hasUniqueMatches = true;
                                }
                            }
                        }
                        else
                        {
                            _matches = matches.Select(x => new RegExpMatchResult(regExp, _position, x, columnIndex))
                                       .ToList();

                            hasUniqueMatches = true;
                        }
                    }
                    ///////////////////////////////////////////////////////////////////////////////
                }

                if (hasUniqueMatches)
                {
                    if (_currentMatchIndex == -1)
                    {
                        if (forward)
                        {
                            _currentMatchIndex = 0;
                        }
                        else
                        {
                            _currentMatchIndex = _matches.Count - 1;
                        }
                    }
                    else if (_currentMatchIndex < 0)
                    {
                        _currentMatchIndex = 0;
                    }
                    else if (_currentMatchIndex >= _matches.Count)
                    {
                        _currentMatchIndex = _matches.Count - 1;
                    }

                    _position = _matches[_currentMatchIndex]
                                .Position;
                    _navigationStartPosition = _position;

                    return(_matches[_currentMatchIndex]);
                }

                ///////////////////////////////////////////////////////////////////////////////

                _position += increment;
            }

            ///////////////////////////////////////////////////////////////////////////////

            _currentMatchIndex = -1;
            _position          = _navigationStartPosition;

            return(RegExpMatchResult.EmptyResult());
        }