Exemplo n.º 1
0
        public void Parse(Stream stream, Action <BracketPipeTextFragment> callback)
        {
            bool skip = false;

            if (Distinct)
            {
                cache = new FastLookupCache <string>(CacheSize);
            }
            else
            {
                cache = null;
            }

            var filter = GetFilter(Filter);

            if (filter == null)
            {
                throw new FetchoException("{0} is an invalid filter type", Filter);
            }

            var tag = new Stack <string>();

            using (var reader = new HtmlReader(stream))
            {
                foreach (var c in reader)
                {
                    switch (c.Type)
                    {
                    case HtmlTokenType.StartTag:
                        if (c.Value == ScriptHtmlTag)
                        {
                            skip = true;
                        }
                        if (c.Value == StyleHtmlTag)
                        {
                            skip = true;
                        }
                        tag.Push(c.Value);
                        break;

                    case HtmlTokenType.EndTag:
                        if (c.Value == ScriptHtmlTag)
                        {
                            skip = false;
                        }
                        if (c.Value == StyleHtmlTag)
                        {
                            skip = false;
                        }
                        if (tag.Count > 0)
                        {
                            tag.Pop();
                        }
                        break;

                    default:
                        break;
                    }

                    if (!skip && c.Type == HtmlTokenType.Text)
                    {
                        if (c.Value.Length.IsBetween(MinimumLength, MaximumLength))
                        {
                            if (cache == null || !cache.Contains(c.Value))
                            {
                                cache?.Enqueue(c.Value);
                                string tagvalue = string.Empty;
                                if (tag.Count > 0)
                                {
                                    tagvalue = tag.Peek();
                                }
                                var fragment = new BracketPipeTextFragment(tagvalue, c.Value);
                                if (filter(fragment))
                                {
                                    callback(fragment);
                                }
                            }
                        }
                    }
                }
            }
        }
Exemplo n.º 2
0
 private bool GenericTagFilter(string[] validTags, BracketPipeTextFragment fragment)
 => validTags.Any(x => x == fragment.Tag);