protected ColRegExpExtractProcessingResult ExtractRegExpValues(ColRegExp regExp, double documentID, string text) { ColRegExpExtractProcessingResult result = null; try { var matches = regExp.GetFilteredMatches(text); var matchValues = matches.Select(match => match.Value) .ToList(); if (matchValues.Any()) { var returnValue = String.Empty; switch (regExp.ExtractOptions.InstanceNo) { case 1: returnValue = matchValues[0]; break; case 2: returnValue = matchValues[matches.Count - 1]; break; case 3: returnValue = matchValues.ElementAtOrDefault(regExp.ExtractOptions.NthInstaceNumber.Value - 1); break; case 4: returnValue = matchValues.Count > 1 ? Newtonsoft.Json.JsonConvert.SerializeObject(matchValues.ToArray()) : matchValues.First(); break; } /////////////////////////////////////////////////////////////////////////////// if (!String.IsNullOrEmpty(returnValue)) { result = new ColRegExpExtractProcessingResult() { DocumentID = documentID, ColumnID = regExp.ColumnID, Value = returnValue, ExtractOptions = regExp.ExtractOptions }; } } } catch (Exception ex) { Logger.HandleException(ex); } return(result); }
protected void CalcRegExpScore(ColRegExp regExp, string text, int score) { try { var matches = regExp.GetFilteredMatches(text); if (matches.Any()) { regExp.IncrementTotalDocuments(); if (score > 0) { regExp.IncrementPositiveDocuments(); } regExp.AddTotalMatches(matches.Count); } } catch (Exception ex) { Logger.HandleException(ex); } }