示例#1
0
        /// <summary>
        /// extractor the percentage entities from the sentence
        /// </summary>
        /// <param name="source"></param>
        /// <returns></returns>
        public List <ExtractResult> Extract(string source)
        {
            string originSource  = source;
            var    positionMap   = default(Dictionary <int, int>);
            var    numExtResults = default(IList <ExtractResult>);

            // preprocess the source sentence via extracting and replacing the numbers in it
            source = this.PreprocessStrWithNumberExtracted(originSource, out positionMap, out numExtResults);

            List <MatchCollection> allMatches = new List <MatchCollection>();

            // match percentage with regexes
            foreach (Regex regex in Regexes)
            {
                allMatches.Add(regex.Matches(source));
            }

            bool[] matched = new bool[source.Length];
            for (int i = 0; i < source.Length; i++)
            {
                matched[i] = false;
            }

            for (int i = 0; i < allMatches.Count; i++)
            {
                foreach (Match match in allMatches[i])
                {
                    for (int j = 0; j < match.Length; j++)
                    {
                        matched[j + match.Index] = true;
                    }
                }
            }

            List <ExtractResult> result = new List <ExtractResult>();
            int last = -1;

            //get index of each matched results
            for (int i = 0; i < source.Length; i++)
            {
                if (matched[i])
                {
                    if (i + 1 == source.Length || matched[i + 1] == false)
                    {
                        int           start  = last + 1;
                        int           length = i - last;
                        string        substr = source.Substring(start, length);
                        ExtractResult er     = new ExtractResult
                        {
                            Start  = start,
                            Length = length,
                            Text   = substr,
                            Type   = ExtractType
                        };
                        result.Add(er);
                    }
                }
                else
                {
                    last = i;
                }
            }

            // post-processing, restoring the extracted numbers
            this.PostProcessing(result, originSource, positionMap, numExtResults);

            return(result);
        }
示例#2
0
        public virtual List <ExtractResult> Extract(string source)
        {
            if (string.IsNullOrEmpty(source))
            {
                return(new List <ExtractResult>());
            }

            var result      = new List <ExtractResult>();
            var matchSource = new Dictionary <Match, string>();
            var matched     = new bool[source.Length];

            for (var i = 0; i < source.Length; i++)
            {
                matched[i] = false;
            }

            var collections = Regexes.ToDictionary(o => o.Key.Matches(source), p => p.Value);

            foreach (var collection in collections)
            {
                foreach (Match m in collection.Key)
                {
                    for (var j = 0; j < m.Length; j++)
                    {
                        matched[m.Index + j] = true;
                    }
                    //Keep Source Data for extra information
                    matchSource.Add(m, collection.Value);
                }
            }

            var last = -1;

            for (var i = 0; i < source.Length; i++)
            {
                if (matched[i])
                {
                    if (i + 1 == source.Length || !matched[i + 1])
                    {
                        var start  = last + 1;
                        var length = i - last;
                        var substr = source.Substring(start, length);
                        if (matchSource.Keys.Any(o => o.Index == start && o.Length == length))
                        {
                            var srcMatch = matchSource.Keys.First(o => o.Index == start && o.Length == length);
                            var er       = new ExtractResult
                            {
                                Start  = start,
                                Length = length,
                                Text   = substr,
                                Type   = ExtractType,
                                Data   = matchSource.ContainsKey(srcMatch) ? matchSource[srcMatch] : null
                            };
                            result.Add(er);
                        }
                    }
                }
                else
                {
                    last = i;
                }
            }

            return(result);
        }