public void SafelyChangeParent(PageElement parent) { if (this.Parent != null) { this.Parent.Children.Remove(this); } this.Parent = parent; }
public void SafelyAddChild(PageObject child) { PageElement tempPE = child as PageElement; if (tempPE != null) { tempPE.SafelyChangeParent(this); } Children.Add(child); }
public override PageObject Create(char?lastChar, char currentChar, char?nextChar) { if (this.IsPossibleStart(lastChar, currentChar, nextChar)) { PageElement result = new PageElement(this.Page); result.html.Append(currentChar); //if (nextChar.HasValue && nextChar.Value != '/') // result.OpeningTag = true; return(result); } return(null); }
string recursiveGetTextualObjects(PageElement currentObject, StringBuilder stringBuilder) { foreach (var child in currentObject.Children) { PageElement tempPE = child as PageElement; if (tempPE != null) { if (tempPE.ElementName == "script") { continue; } recursiveGetTextualObjects(tempPE, stringBuilder); } else { stringBuilder.Append(child.html); } } return(stringBuilder.ToString()); }
public override void AddCharacter(char?lastChar, char currentChar, char?nextChar) { html.Append(currentChar); if (currentChar == '=') { nextIsValue = true; } if (IsEndingCharacter(lastChar, currentChar, nextChar)) { IsCompleted = true; string tempString = html.ToString(); // First check if it is a closing element, if not check if it is a self closed, if not it is a opening element if (tempString[1] == '/') { TagType = PageElementTagType.Closing; ElementName = tempString.Substring(2, tempString.Length - 3).ToLower(); } else if (tempString[tempString.Length - 2] == '/') { TagType = PageElementTagType.SelfClosed; ElementName = tempString.Substring(1, tempString.IndexOfAny(new char[] { ' ', '/' })).ToLower(); } else { TagType = PageElementTagType.Opening; ElementName = tempString.Substring(1, tempString.IndexOfAny(new char[] { ' ', '/', '>' }) - 1).ToLower(); } if (TagType == PageElementTagType.Opening) { this.Page.ElementsStack.Push(this); } else if (TagType == PageElementTagType.Closing) { PageElement current = this.Page.ElementsStack.Pop(); while (current.ElementName != this.ElementName) { PageElement tempElement = current; tempElement.TagType = PageElementTagType.Broken; current = this.Page.ElementsStack.Pop(); current.AddChildren(tempElement.Children); tempElement.Children.Clear(); } current.Parent.SafelyAddChild(this); } if (hasLink && ElementName == "a") { bool nextLink = false; foreach (var attribute in Attributes) { if (nextLink && attribute as ElementAttributeValue != null) { string tempLink = fixLink(attribute.html.ToString()); if (tempLink != null) { Page.PageLinks.Add(tempLink); } nextLink = false; } if (attribute.html.ToString().ToLower() == "href") { nextLink = true; } } } } for (int j = possibles.Count - 1; j >= 0; j--) { possibles[j].AddCharacter(lastChar, currentChar, nextChar); if (possibles[j].IsCompleted) { if (possibles[j].html.ToString().ToLower() == "href") { hasLink = true; } if (possibles[j] as ElementAttributeValue != null) { inValue = false; nextIsValue = false; } Attributes.Add(possibles[j]); possibles.Remove(possibles[j]); } } if (!inValue) { possibles.AddRange(ObjectFactory(lastChar, currentChar, nextChar)); } }
internal void Parse(string pageString) { // This goes one character at a time through the entire html string // Its initial use is to determine text areas as either elements or element values // It will also handle parenting these elements Page.Children.Clear(); PageElement tempContainer = new PageElement(Page); Page.ElementsStack.Push(tempContainer); List <PageObject> currentObjects = new List <FoogleEngine.PageObject>(); //List<PageObject> completedObjects = new List<PageObject>(); char?lastChar = null; char currentChar; char?nextChar = null; for (int i = 0; i < pageString.Length; i++) { currentChar = pageString[i]; if ((i + 1) == pageString.Length) { nextChar = null; } else { nextChar = pageString[i + 1]; } for (int j = currentObjects.Count - 1; j >= 0; j--) { currentObjects[j].AddCharacter(lastChar, currentChar, nextChar); if (currentObjects[j].IsCompleted) { if (currentObjects[j] as PageElement == null || ((PageElement)currentObjects[j]).TagType != PageElement.PageElementTagType.Closing) { tempContainer.SafelyAddChild(currentObjects[j]); if (currentObjects[j] as PageElementValue != null) { if (tempContainer.ElementName != "script") { Page.SearchableText += currentObjects[j].html.ToString().ToLower(); } if (tempContainer.ElementName == "title") { Page.Title += currentObjects[j].html.ToString(); } } } tempContainer = Page.ElementsStack.Peek(); //Page.ElementsStack.Peek().Children.Add(currentObjects[j]); // if currentParent is null set it to first page element } if (currentObjects[j].IsCompleted || !currentObjects[j].IsOK) { currentObjects.Remove(currentObjects[j]); } } currentObjects.AddRange(ObjectFactory(lastChar, currentChar, nextChar)); lastChar = currentChar; } tempContainer.Parent = null; tempContainer.TagType = PageElement.PageElementTagType.Broken; for (int i = tempContainer.Children.Count - 1; i >= 0; i--) { Page.Children.Add(tempContainer.Children[i]); PageElement tempPE = tempContainer.Children[i] as PageElement; if (tempPE != null) { tempPE.Parent = null; } tempContainer.Children.Remove(tempContainer.Children[i]); } Page.Children.Add(tempContainer); Page.Children.Reverse(); }