/// <summary> /// extractor the percentage entities from the sentence /// </summary> /// <param name="source"></param> /// <returns></returns> public List <ExtractResult> Extract(string source) { string originSource = source; var positionMap = default(Dictionary <int, int>); var numExtResults = default(IList <ExtractResult>); // preprocess the source sentence via extracting and replacing the numbers in it source = this.PreprocessStrWithNumberExtracted(originSource, out positionMap, out numExtResults); List <MatchCollection> allMatches = new List <MatchCollection>(); // match percentage with regexes foreach (Regex regex in Regexes) { allMatches.Add(regex.Matches(source)); } bool[] matched = new bool[source.Length]; for (int i = 0; i < source.Length; i++) { matched[i] = false; } for (int i = 0; i < allMatches.Count; i++) { foreach (Match match in allMatches[i]) { for (int j = 0; j < match.Length; j++) { matched[j + match.Index] = true; } } } List <ExtractResult> result = new List <ExtractResult>(); int last = -1; //get index of each matched results for (int i = 0; i < source.Length; i++) { if (matched[i]) { if (i + 1 == source.Length || matched[i + 1] == false) { int start = last + 1; int length = i - last; string substr = source.Substring(start, length); ExtractResult er = new ExtractResult { Start = start, Length = length, Text = substr, Type = ExtractType }; result.Add(er); } } else { last = i; } } // post-processing, restoring the extracted numbers this.PostProcessing(result, originSource, positionMap, numExtResults); return(result); }
public virtual List <ExtractResult> Extract(string source) { if (string.IsNullOrEmpty(source)) { return(new List <ExtractResult>()); } var result = new List <ExtractResult>(); var matchSource = new Dictionary <Match, string>(); var matched = new bool[source.Length]; for (var i = 0; i < source.Length; i++) { matched[i] = false; } var collections = Regexes.ToDictionary(o => o.Key.Matches(source), p => p.Value); foreach (var collection in collections) { foreach (Match m in collection.Key) { for (var j = 0; j < m.Length; j++) { matched[m.Index + j] = true; } //Keep Source Data for extra information matchSource.Add(m, collection.Value); } } var last = -1; for (var i = 0; i < source.Length; i++) { if (matched[i]) { if (i + 1 == source.Length || !matched[i + 1]) { var start = last + 1; var length = i - last; var substr = source.Substring(start, length); if (matchSource.Keys.Any(o => o.Index == start && o.Length == length)) { var srcMatch = matchSource.Keys.First(o => o.Index == start && o.Length == length); var er = new ExtractResult { Start = start, Length = length, Text = substr, Type = ExtractType, Data = matchSource.ContainsKey(srcMatch) ? matchSource[srcMatch] : null }; result.Add(er); } } } else { last = i; } } return(result); }