protected int CalcRegExpScore(RegExpBase regExp, string text, out bool isMatch) { var scoreResult = 0; isMatch = false; try { var matches = regExp.GetFilteredMatches(text); if (matches.Any()) { isMatch = true; var totalMatches = matches.Count; regExp.IncrementTotalDocuments(); regExp.AddTotalMatches(totalMatches); /////////////////////////////////////////////////////////////////////////////// var score = regExp.Score ?? 0; var factor = regExp.Factor ?? 0; scoreResult = totalMatches * score; scoreResult += (totalMatches / 2) * (totalMatches - 1) * factor; } } catch (Exception ex) { Logger.HandleException(ex); } return(scoreResult); }
protected RegExpMatchResult Navigate_Document(RegExpBase regExp, int totalDocsCount, List <double> documents, int blockStartPosition, int position, bool forward) { var increment = forward ? 1 : -1; _position = position + increment; _unique = false; /////////////////////////////////////////////////////////////////////////////// if (forward && _position >= totalDocsCount) { _position = 0; } else if (!forward && _position < 0) { _position = totalDocsCount - 1; } /////////////////////////////////////////////////////////////////////////////// while (_position >= 0 && _position < totalDocsCount) { _matches.Clear(); _matches = new List <RegExpMatchResult>(); for (int columnIndex = 0; columnIndex < _noteColumnCount; columnIndex++) { string documentText; if (!GetDocumentTextByPosition(documents, blockStartPosition, _position, forward, out documentText, columnIndex)) { return(RegExpMatchResult.NeedMoreDataResult()); } /////////////////////////////////////////////////////////////////////////////// _matches.AddRange(regExp.GetFilteredMatches(documentText) .Select(x => new RegExpMatchResult(regExp, _position, x, columnIndex)) .ToList()); } if (_matches.Count > 0) { _currentMatchIndex = 0; return(_matches[_currentMatchIndex]); } /////////////////////////////////////////////////////////////////////////////// _position += increment; } /////////////////////////////////////////////////////////////////////////////// return(RegExpMatchResult.EmptyResult()); }
private void CalcDocumentStatisticsSingle(int columnIndexID, List <int> columnIndexList, IDataRecord record, RegExpProcessingResultsCollection <RegExpStatisticsSingleProcessingResult> results) { if (record.IsDBNull(columnIndexID)) { return; } /////////////////////////////////////////////////////////////////////////////// for (var i = 0; i < columnIndexList.Count; i++) { if (record.IsDBNull(columnIndexList[i])) { return; } /////////////////////////////////////////////////////////////////////////////// var documentID = record.GetDouble(columnIndexID); var docText = record.GetString(columnIndexList[i]); /////////////////////////////////////////////////////////////////////////////// var matches = _regExp.GetFilteredMatches(docText); if (matches.Any()) { matches.ForEach(x => results.Add(new RegExpStatisticsSingleProcessingResult { Word = x.Value, DocumentID = documentID, ColumnID = columnIndexList[i] - 1, Start = x.Index, Length = x.Length })); } } }
protected RegExpMatchResult Navigate_UniqueMatch(RegExpBase regExp, int totalDocsCount, List <double> documentsBlock, int blockStartPosition, int position, bool forward) { var increment = forward ? 1 : -1; if (_position != position) { _matches = null; _currentMatchIndex = -1; _position = position; _navigationStartPosition = _position; } if (!_unique) { _matches = null; _currentMatchIndex = -1; _unique = true; } /////////////////////////////////////////////////////////////////////////////// if (_matches != null) { if (_currentMatchIndex == -1) { _currentMatchIndex = forward ? 0 : _matches.Count - 1; } _currentMatchIndex += increment; if (_currentMatchIndex >= 0 && _currentMatchIndex < _matches.Count) { _position = _matches[_currentMatchIndex] .Position; _navigationStartPosition = _position; return(_matches[_currentMatchIndex]); } _position += increment; } if (forward && _position >= totalDocsCount || !forward && _position < 0) { _position = position; _navigationStartPosition = _position; if (_matches != null) { _currentMatchIndex = forward ? _matches.Count - 1 : 0; } return(RegExpMatchResult.EmptyResult()); } /////////////////////////////////////////////////////////////////////////////// var comparer = new MatchEqualityComparer(); /////////////////////////////////////////////////////////////////////////////// while (_position >= 0 && _position < totalDocsCount) { var hasUniqueMatches = false; for (int columnIndex = 0; columnIndex < _noteColumnCount; columnIndex++) { string documentText; if (!GetDocumentTextByPosition(documentsBlock, blockStartPosition, _position, forward, out documentText, columnIndex)) { return(RegExpMatchResult.NeedMoreDataResult()); } var matches = regExp.GetFilteredMatches(documentText) .Distinct(comparer) .ToList(); if (matches.Any()) { if (_matches != null) { foreach (var match in matches) { if (_matches.All(x => x.Match.Value != match.Value)) { if (forward) { _matches.Add(new RegExpMatchResult(regExp, _position, match, columnIndex)); } else { _matches.Insert(0, new RegExpMatchResult(regExp, _position, match, columnIndex)); } hasUniqueMatches = true; } } } else { _matches = matches.Select(x => new RegExpMatchResult(regExp, _position, x, columnIndex)) .ToList(); hasUniqueMatches = true; } } /////////////////////////////////////////////////////////////////////////////// } if (hasUniqueMatches) { if (_currentMatchIndex == -1) { if (forward) { _currentMatchIndex = 0; } else { _currentMatchIndex = _matches.Count - 1; } } else if (_currentMatchIndex < 0) { _currentMatchIndex = 0; } else if (_currentMatchIndex >= _matches.Count) { _currentMatchIndex = _matches.Count - 1; } _position = _matches[_currentMatchIndex] .Position; _navigationStartPosition = _position; return(_matches[_currentMatchIndex]); } /////////////////////////////////////////////////////////////////////////////// _position += increment; } /////////////////////////////////////////////////////////////////////////////// _currentMatchIndex = -1; _position = _navigationStartPosition; return(RegExpMatchResult.EmptyResult()); }