Esempio n. 1
0
 public void SafelyChangeParent(PageElement parent)
 {
     if (this.Parent != null)
     {
         this.Parent.Children.Remove(this);
     }
     this.Parent = parent;
 }
Esempio n. 2
0
        public void SafelyAddChild(PageObject child)
        {
            PageElement tempPE = child as PageElement;

            if (tempPE != null)
            {
                tempPE.SafelyChangeParent(this);
            }
            Children.Add(child);
        }
Esempio n. 3
0
        public override PageObject Create(char?lastChar, char currentChar, char?nextChar)
        {
            if (this.IsPossibleStart(lastChar, currentChar, nextChar))
            {
                PageElement result = new PageElement(this.Page);
                result.html.Append(currentChar);

                //if (nextChar.HasValue && nextChar.Value != '/')
                //    result.OpeningTag = true;
                return(result);
            }
            return(null);
        }
Esempio n. 4
0
 string recursiveGetTextualObjects(PageElement currentObject, StringBuilder stringBuilder)
 {
     foreach (var child in currentObject.Children)
     {
         PageElement tempPE = child as PageElement;
         if (tempPE != null)
         {
             if (tempPE.ElementName == "script")
             {
                 continue;
             }
             recursiveGetTextualObjects(tempPE, stringBuilder);
         }
         else
         {
             stringBuilder.Append(child.html);
         }
     }
     return(stringBuilder.ToString());
 }
Esempio n. 5
0
        public override void AddCharacter(char?lastChar, char currentChar, char?nextChar)
        {
            html.Append(currentChar);

            if (currentChar == '=')
            {
                nextIsValue = true;
            }

            if (IsEndingCharacter(lastChar, currentChar, nextChar))
            {
                IsCompleted = true;
                string tempString = html.ToString();
                // First check if it is a closing element, if not check if it is a self closed, if not it is a opening element
                if (tempString[1] == '/')
                {
                    TagType     = PageElementTagType.Closing;
                    ElementName = tempString.Substring(2, tempString.Length - 3).ToLower();
                }
                else if (tempString[tempString.Length - 2] == '/')
                {
                    TagType     = PageElementTagType.SelfClosed;
                    ElementName = tempString.Substring(1, tempString.IndexOfAny(new char[] { ' ', '/' })).ToLower();
                }
                else
                {
                    TagType     = PageElementTagType.Opening;
                    ElementName = tempString.Substring(1, tempString.IndexOfAny(new char[] { ' ', '/', '>' }) - 1).ToLower();
                }

                if (TagType == PageElementTagType.Opening)
                {
                    this.Page.ElementsStack.Push(this);
                }
                else if (TagType == PageElementTagType.Closing)
                {
                    PageElement current = this.Page.ElementsStack.Pop();
                    while (current.ElementName != this.ElementName)
                    {
                        PageElement tempElement = current;
                        tempElement.TagType = PageElementTagType.Broken;
                        current             = this.Page.ElementsStack.Pop();
                        current.AddChildren(tempElement.Children);
                        tempElement.Children.Clear();
                    }
                    current.Parent.SafelyAddChild(this);
                }

                if (hasLink && ElementName == "a")
                {
                    bool nextLink = false;
                    foreach (var attribute in Attributes)
                    {
                        if (nextLink && attribute as ElementAttributeValue != null)
                        {
                            string tempLink = fixLink(attribute.html.ToString());
                            if (tempLink != null)
                            {
                                Page.PageLinks.Add(tempLink);
                            }
                            nextLink = false;
                        }
                        if (attribute.html.ToString().ToLower() == "href")
                        {
                            nextLink = true;
                        }
                    }
                }
            }

            for (int j = possibles.Count - 1; j >= 0; j--)
            {
                possibles[j].AddCharacter(lastChar, currentChar, nextChar);

                if (possibles[j].IsCompleted)
                {
                    if (possibles[j].html.ToString().ToLower() == "href")
                    {
                        hasLink = true;
                    }

                    if (possibles[j] as ElementAttributeValue != null)
                    {
                        inValue     = false;
                        nextIsValue = false;
                    }

                    Attributes.Add(possibles[j]);
                    possibles.Remove(possibles[j]);
                }
            }

            if (!inValue)
            {
                possibles.AddRange(ObjectFactory(lastChar, currentChar, nextChar));
            }
        }
Esempio n. 6
0
        internal void Parse(string pageString)
        {
            // This goes one character at a time through the entire html string
            // Its initial use is to determine text areas as either elements or element values
            // It will also handle parenting these elements

            Page.Children.Clear();

            PageElement tempContainer = new PageElement(Page);

            Page.ElementsStack.Push(tempContainer);
            List <PageObject> currentObjects = new List <FoogleEngine.PageObject>();
            //List<PageObject> completedObjects = new List<PageObject>();
            char?lastChar = null;
            char currentChar;
            char?nextChar = null;

            for (int i = 0; i < pageString.Length; i++)
            {
                currentChar = pageString[i];
                if ((i + 1) == pageString.Length)
                {
                    nextChar = null;
                }
                else
                {
                    nextChar = pageString[i + 1];
                }

                for (int j = currentObjects.Count - 1; j >= 0; j--)
                {
                    currentObjects[j].AddCharacter(lastChar, currentChar, nextChar);

                    if (currentObjects[j].IsCompleted)
                    {
                        if (currentObjects[j] as PageElement == null || ((PageElement)currentObjects[j]).TagType != PageElement.PageElementTagType.Closing)
                        {
                            tempContainer.SafelyAddChild(currentObjects[j]);

                            if (currentObjects[j] as PageElementValue != null)
                            {
                                if (tempContainer.ElementName != "script")
                                {
                                    Page.SearchableText += currentObjects[j].html.ToString().ToLower();
                                }
                                if (tempContainer.ElementName == "title")
                                {
                                    Page.Title += currentObjects[j].html.ToString();
                                }
                            }
                        }
                        tempContainer = Page.ElementsStack.Peek();
                        //Page.ElementsStack.Peek().Children.Add(currentObjects[j]);
                        // if currentParent is null set it to first page element
                    }

                    if (currentObjects[j].IsCompleted || !currentObjects[j].IsOK)
                    {
                        currentObjects.Remove(currentObjects[j]);
                    }
                }


                currentObjects.AddRange(ObjectFactory(lastChar, currentChar, nextChar));

                lastChar = currentChar;
            }

            tempContainer.Parent  = null;
            tempContainer.TagType = PageElement.PageElementTagType.Broken;
            for (int i = tempContainer.Children.Count - 1; i >= 0; i--)
            {
                Page.Children.Add(tempContainer.Children[i]);
                PageElement tempPE = tempContainer.Children[i] as PageElement;
                if (tempPE != null)
                {
                    tempPE.Parent = null;
                }
                tempContainer.Children.Remove(tempContainer.Children[i]);
            }
            Page.Children.Add(tempContainer);
            Page.Children.Reverse();
        }