Exemple #1
0
        public bool Matches(HtmlItem item)
        {
            //if it's the right type
            if (item.IsType(Type))
            {
                //check if there's the right number of attributes
                if (AttributesCount != -1 && item.Attributes.Count != AttributesCount)
                {
                    return(false);
                }


                //if we have to check the properties
                if (Attributes != null)
                {
                    //if any property fails to match, the whole thing doesn't match
                    for (int i = 0; i < Attributes.Length; i++)
                    {
                        if (!Attributes[i].Matches(item))
                        {
                            return(false);
                        }
                    }
                }

                return(true);
            }

            return(false);
        }
Exemple #2
0
        /// <summary>
        /// Finds the first match for the type, and must match the attributes if they're specified. Option to disable recursive search will only search on the next level if set to false.
        /// </summary>
        /// <param name="type"></param>
        /// <param name="attributes"></param>
        /// <returns></returns>
        public HtmlItem Get(string type, Dictionary <string, string> attributes = null, bool continueUp = false, bool recursive = true)
        {
            //look through children
            HtmlItem i;

            if ((i = get(type, attributes, recursive)) != null)
            {
                return(i);
            }

            //if we can continue up
            if (continueUp)
            {
                HtmlItem n = next();
                if (n != null)
                {
                    //first check if it matches
                    if (n.IsType(type) && n.HasAttributes(attributes))
                    {
                        return(n);
                    }

                    return(n.Get(type, attributes, continueUp, recursive));
                }
            }

            return(null);
        }
Exemple #3
0
        private HtmlItem get(string type, Dictionary <string, string> attributes, bool recursive)
        {
            //look through children
            for (int i = 0; i < Children.Count; i++)
            {
                if (Children[i].IsType(type))
                {
                    if (Children[i].HasAttributes(attributes))
                    {
                        return(Children[i]);
                    }
                }

                if (recursive)
                {
                    HtmlItem item = Children[i].get(type, attributes, recursive);

                    if (item != null)
                    {
                        return(item);
                    }
                }
            }

            return(null);
        }
Exemple #4
0
        /// <summary>
        /// Processes all the instructions. Returns null if failed to grab it.
        /// </summary>
        /// <returns></returns>
        public string Get(HtmlItem item)
        {
            switch (Instruction)
            {
            case InstructionType.GetAnyItem:
            case InstructionType.GetNextItem:

                //if there's no next instruction, there was an error
                if (Next == null)
                {
                    return(null);
                }

                //if we get any item, we search as far down as necessary. Otherwise we just search one level
                HtmlItem nextItem = item.Get(Type, AttributesToMatch, false, Instruction == InstructionType.GetAnyItem);

                if (nextItem == null)
                {
                    return(null);
                }

                return(Next.Get(nextItem));



            case InstructionType.GetAttributeValue:

                string answer;

                if (item.Attributes.TryGetValue(AttributeToFetch, out answer))
                {
                    return(answer);
                }

                return(null);



            case InstructionType.GetValue:

                return(item.GetText());

            case InstructionType.GetImmediateValue:
                return(item.GetImmediateText());



            case InstructionType.Start:

                if (Next == null)
                {
                    return(null);
                }

                return(Next.Get(item));
            }

            return(null);
        }
Exemple #5
0
            public bool Matches(HtmlItem item)
            {
                //if we're just checking for the property existing
                if (AttributeValue == null)
                {
                    return(item.Attributes.ContainsKey(AttributeName));
                }

                //otherwise we need to check if the value matches too
                return(item.Attributes.Contains(new KeyValuePair <string, string>(AttributeName, AttributeValue)));
            }
Exemple #6
0
            /// <summary>
            /// Returns a new dictionary with only the keys, not the values
            /// </summary>
            /// <param name="item"></param>
            /// <returns></returns>
            protected Dictionary <string, string> copyAttributeKeys(HtmlItem item)
            {
                Dictionary <string, string> attributes = new Dictionary <string, string>();

                foreach (var pair in item.Attributes)
                {
                    attributes[pair.Key] = null;
                }

                return(attributes);
            }
Exemple #7
0
            private HtmlInstruction createInstructions(HtmlItem item)
            {
                if (item == null)
                {
                    return(null);
                }

                //if (nextInstruction != null)
                //{
                //    HtmlInstruction instruction = HtmlInstruction.CreateGetNextItem(item.Type, null);
                //    instruction.Next = nextInstruction;
                //    return instruction;
                //}

                do
                {
                    //if this is our end point
                    HtmlInstruction nextInstruction = Found(item);

                    if (nextInstruction == null)
                    {
                        nextInstruction = createInstructions(item.Next(HtmlItem.NextType.Down));
                    }

                    if (nextInstruction != null)
                    {
                        Dictionary <string, string> attributes = null;

                        if (UseAttributeKeys && item.Attributes.Count > 0)
                        {
                            if (UseAttributeValues)
                            {
                                attributes = copyAttributesWithValues(item);
                            }

                            else
                            {
                                attributes = copyAttributeKeys(item);
                            }
                        }

                        HtmlInstruction instruction = HtmlInstruction.CreateGetNextItem(item.Type, attributes);
                        instruction.Next = nextInstruction;
                        return(instruction);
                    }
                } while ((item = item.Next(HtmlItem.NextType.SameLevel)) != null);

                return(null);
            }
Exemple #8
0
        private HtmlItem createItem(string data, HtmlItem parent)
        {
            HtmlItem item = new HtmlItem()
            {
                Attributes = new Dictionary <string, string>(),
                Children   = new List <HtmlItem>(),
                Parent     = parent
            };

            //move to end of tag while grabbing contents inside tag
            int origI = i;

            i = StringTools.Grab(data, i, "<~>", arr);

            if (i == -1)
            {
                string str = data.Substring(origI);
                return(new HtmlItem());
            }

            int    index = 0;
            string inner = arr[0];

            //grab the type
            if (inner.Contains(" "))
            {
                index     = StringTools.Grab(inner, index, "~ ", arr);
                item.Type = arr[0].Trim();

                //grab the others
                while ((index = StringTools.Grab(inner, index, "~=\"~\"", items)) != -1)
                {
                    item.Attributes[items[0].Trim()] = items[1];

                    if (index < inner.Length && inner[index] == ' ')
                    {
                        index++;
                    }
                }
            }

            else
            {
                item.Type = inner.Trim();
            }

            return(item);
        }
Exemple #9
0
        /// <summary>
        /// Finds the first matching item.
        /// </summary>
        /// <param name="match"></param>
        /// <returns></returns>
        public HtmlItem Get(HtmlMatch match)
        {
            HtmlItem item = Get(match.Type, match.Attributes.ToDictionary(i => i.AttributeName, i => i.AttributeValue));

            if (item == null)
            {
                return(null);
            }

            if (match.Matches(item))
            {
                return(item);
            }

            //right now if the attributes match but the count didn't match, it'll throw an exception. I should in the future add support for having it search for the next possible item, but then I have to add in a little extra magic.
            throw new NotImplementedException();
        }
Exemple #10
0
        public LinkedList <HtmlItem> GetRange(string type, Dictionary <string, string> attributes)
        {
            HtmlItem stop = Next(false);

            HtmlItem item = this;
            LinkedList <HtmlItem> list = new LinkedList <HtmlItem>();

            while ((item = item.Next()) != stop)
            {
                if (item.IsType(type) && item.HasAttributes(attributes))
                {
                    list.AddLast(item);
                }
            }

            return(list);
        }
Exemple #11
0
        /// <summary>
        /// Constructs a HtmlObject from the html text data.
        /// </summary>
        /// <param name="data"></param>
        public HtmlObject(string data)
        {
            if (!data.TrimEnd().EndsWith("</html>", StringComparison.CurrentCultureIgnoreCase))
            {
                throw new Exception("The provided string wasn't a full html document.\n\n" + data + "\n\n");
            }

            HtmlItem king = new HtmlItem()
            {
                Type       = "",
                Attributes = new Dictionary <string, string>(),
                Children   = new List <HtmlItem>()
            };

            parse(data, king);

            if (king.Children.Count > 0)
            {
                Root = king.Children[0];
            }
        }
Exemple #12
0
        private bool parseInner(string data, HtmlItem parent, HtmlItem item)
        {
            while (i + 1 < data.Length)
            {
                //trim any leading space
                trimSpace(data);

                //if there's an inner tag
                if (isStart(data))
                {
                    //read inner tags
                    parse(data, item);

                    if (i + 1 < data.Length && isEnd(data, 1))
                    {
                        //if (!StringTools.Grab(data, i, "</~>", 1)[0].Equals(item.Type))
                        if (!item.IsType(StringTools.Grab(data, i, "</~>", 1)[0]))
                        {
                            return(true);
                        }

                        break;
                    }
                }

                //otherwise it just had a value
                else
                {
                    //find end tag
                    for (int x = i; x < data.Length; x++)
                    {
                        if (isEnd(data, x))
                        {
                            item.Value = data.Substring(i, x - i);
                            i          = x;
                            break;
                        }
                    }

                    //if (!StringTools.Grab(data, i, "</~>", 1)[0].Equals(item.Type))
                    if (!item.IsType(StringTools.Grab(data, i, "</~>", 1)[0]))
                    {
                        return(true);
                    }

                    break;
                }
            }

            //now advance past the end tag
            for (; i < data.Length; i++)
            {
                if (isClose(data, i))
                {
                    i++;
                    break;
                }
            }

            return(false);
        }
Exemple #13
0
        private string parse(string data, HtmlItem parent)
        {
            string value = null;

            while (i < data.Length)
            {
                //if at opening tag
                if (isStart(data))
                {
                    //create item
                    HtmlItem item = createItem(data, parent);

                    //if it's a list item, and the parent is also a list item
                    if (item.IsType("li") && parent.IsType("li"))
                    {
                        //add to parent's parent
                        parent.Parent.Children.Add(item);

                        //reassign the parent
                        item.Parent = parent.Parent;

                        //scan next stuff
                        item.Value = parse(data, item);

                        //return the value to exit this inner
                        return(value);
                    }

                    //add to parent
                    parent.Children.Add(item);

                    //if it's not self closing and it's not a break tag
                    if (data[i - 2] != '/' && !item.IsType("br") && !item.IsType("img"))
                    {
                        //set item value to inner data
                        item.Value = parse(data, item);
                    }
                }

                //else if at closing tag
                else if (isEnd(data, i))
                {
                    int x;

                    //if tag type matches parent
                    if ((x = StringTools.Grab(data, i, "</~>", arr)) != -1)
                    {
                        //if it was a </br> tag
                        if (arr[0].Equals("br", StringComparison.CurrentCultureIgnoreCase))
                        {
                            //add the break to the parent
                            parent.Children.Add(new HtmlItem()
                            {
                                Type       = arr[0],
                                Attributes = new Dictionary <string, string>(),
                                Children   = new List <HtmlItem>(),
                                Parent     = parent
                            });

                            //advance past close
                            i = x;

                            continue;
                        }

                        //if tag type matches parent
                        //else if (arr[0].Equals(parent.Type))
                        else if (parent.IsType(arr[0]))
                        {
                            //advance past close
                            i = x;
                        }
                    }

                    //return the value
                    return(value);
                }

                else if (i + 1 < data.Length && data[i] == '<' && data[i + 1] == '!')
                {
                    //move forward past the '>'
                    for (; i < data.Length && data[i] != '>'; i++)
                    {
                        ;
                    }
                    i++;
                }

                //else we just have the value
                else
                {
                    StringBuilder builder = new StringBuilder();

                    //move forward until '<'
                    bool firstWhitespace = true;
                    for (; i < data.Length && data[i] != '<'; i++)
                    {
                        //only include one whitespace character (removes multiple consecutive spaces) like proper HTML
                        if (char.IsWhiteSpace(data[i]))
                        {
                            if (firstWhitespace)
                            {
                                firstWhitespace = false;
                                builder.Append(' ');
                            }
                        }

                        else
                        {
                            firstWhitespace = true;
                            builder.Append(data[i]);
                        }
                    }

                    //temp = temp.Trim();

                    parent.Children.Add(new HtmlItem()
                    {
                        Type       = "Text",
                        Value      = builder.ToString(),
                        Attributes = new Dictionary <string, string>(),
                        Children   = new List <HtmlItem>(),
                        Parent     = parent
                    });
                }
            }

            return(value);
        }
Exemple #14
0
 /// <summary>
 /// Can be overriden. Typically returns a dictionary with all the attributes and values, but can be overriden to leave out select values like a unique link value.
 /// </summary>
 /// <param name="item"></param>
 /// <returns></returns>
 protected virtual Dictionary <string, string> copyAttributesWithValues(HtmlItem item)
 {
     return(new Dictionary <string, string>(item.Attributes));
 }
Exemple #15
0
 /// <summary>
 ///
 /// </summary>
 /// <param name="fromStart">This would be the top item like the "p" item from the search results.</param>
 /// <returns></returns>
 public HtmlInstruction CreateInstructions(HtmlItem fromStart)
 {
     return(createInstructions(fromStart.Next(HtmlItem.NextType.Down)));
 }
Exemple #16
0
 protected abstract HtmlInstruction Found(HtmlItem item);