public bool Matches(HtmlItem item) { //if it's the right type if (item.IsType(Type)) { //check if there's the right number of attributes if (AttributesCount != -1 && item.Attributes.Count != AttributesCount) { return(false); } //if we have to check the properties if (Attributes != null) { //if any property fails to match, the whole thing doesn't match for (int i = 0; i < Attributes.Length; i++) { if (!Attributes[i].Matches(item)) { return(false); } } } return(true); } return(false); }
/// <summary> /// Finds the first match for the type, and must match the attributes if they're specified. Option to disable recursive search will only search on the next level if set to false. /// </summary> /// <param name="type"></param> /// <param name="attributes"></param> /// <returns></returns> public HtmlItem Get(string type, Dictionary <string, string> attributes = null, bool continueUp = false, bool recursive = true) { //look through children HtmlItem i; if ((i = get(type, attributes, recursive)) != null) { return(i); } //if we can continue up if (continueUp) { HtmlItem n = next(); if (n != null) { //first check if it matches if (n.IsType(type) && n.HasAttributes(attributes)) { return(n); } return(n.Get(type, attributes, continueUp, recursive)); } } return(null); }
private HtmlItem get(string type, Dictionary <string, string> attributes, bool recursive) { //look through children for (int i = 0; i < Children.Count; i++) { if (Children[i].IsType(type)) { if (Children[i].HasAttributes(attributes)) { return(Children[i]); } } if (recursive) { HtmlItem item = Children[i].get(type, attributes, recursive); if (item != null) { return(item); } } } return(null); }
/// <summary> /// Processes all the instructions. Returns null if failed to grab it. /// </summary> /// <returns></returns> public string Get(HtmlItem item) { switch (Instruction) { case InstructionType.GetAnyItem: case InstructionType.GetNextItem: //if there's no next instruction, there was an error if (Next == null) { return(null); } //if we get any item, we search as far down as necessary. Otherwise we just search one level HtmlItem nextItem = item.Get(Type, AttributesToMatch, false, Instruction == InstructionType.GetAnyItem); if (nextItem == null) { return(null); } return(Next.Get(nextItem)); case InstructionType.GetAttributeValue: string answer; if (item.Attributes.TryGetValue(AttributeToFetch, out answer)) { return(answer); } return(null); case InstructionType.GetValue: return(item.GetText()); case InstructionType.GetImmediateValue: return(item.GetImmediateText()); case InstructionType.Start: if (Next == null) { return(null); } return(Next.Get(item)); } return(null); }
public bool Matches(HtmlItem item) { //if we're just checking for the property existing if (AttributeValue == null) { return(item.Attributes.ContainsKey(AttributeName)); } //otherwise we need to check if the value matches too return(item.Attributes.Contains(new KeyValuePair <string, string>(AttributeName, AttributeValue))); }
/// <summary> /// Returns a new dictionary with only the keys, not the values /// </summary> /// <param name="item"></param> /// <returns></returns> protected Dictionary <string, string> copyAttributeKeys(HtmlItem item) { Dictionary <string, string> attributes = new Dictionary <string, string>(); foreach (var pair in item.Attributes) { attributes[pair.Key] = null; } return(attributes); }
private HtmlInstruction createInstructions(HtmlItem item) { if (item == null) { return(null); } //if (nextInstruction != null) //{ // HtmlInstruction instruction = HtmlInstruction.CreateGetNextItem(item.Type, null); // instruction.Next = nextInstruction; // return instruction; //} do { //if this is our end point HtmlInstruction nextInstruction = Found(item); if (nextInstruction == null) { nextInstruction = createInstructions(item.Next(HtmlItem.NextType.Down)); } if (nextInstruction != null) { Dictionary <string, string> attributes = null; if (UseAttributeKeys && item.Attributes.Count > 0) { if (UseAttributeValues) { attributes = copyAttributesWithValues(item); } else { attributes = copyAttributeKeys(item); } } HtmlInstruction instruction = HtmlInstruction.CreateGetNextItem(item.Type, attributes); instruction.Next = nextInstruction; return(instruction); } } while ((item = item.Next(HtmlItem.NextType.SameLevel)) != null); return(null); }
private HtmlItem createItem(string data, HtmlItem parent) { HtmlItem item = new HtmlItem() { Attributes = new Dictionary <string, string>(), Children = new List <HtmlItem>(), Parent = parent }; //move to end of tag while grabbing contents inside tag int origI = i; i = StringTools.Grab(data, i, "<~>", arr); if (i == -1) { string str = data.Substring(origI); return(new HtmlItem()); } int index = 0; string inner = arr[0]; //grab the type if (inner.Contains(" ")) { index = StringTools.Grab(inner, index, "~ ", arr); item.Type = arr[0].Trim(); //grab the others while ((index = StringTools.Grab(inner, index, "~=\"~\"", items)) != -1) { item.Attributes[items[0].Trim()] = items[1]; if (index < inner.Length && inner[index] == ' ') { index++; } } } else { item.Type = inner.Trim(); } return(item); }
/// <summary> /// Finds the first matching item. /// </summary> /// <param name="match"></param> /// <returns></returns> public HtmlItem Get(HtmlMatch match) { HtmlItem item = Get(match.Type, match.Attributes.ToDictionary(i => i.AttributeName, i => i.AttributeValue)); if (item == null) { return(null); } if (match.Matches(item)) { return(item); } //right now if the attributes match but the count didn't match, it'll throw an exception. I should in the future add support for having it search for the next possible item, but then I have to add in a little extra magic. throw new NotImplementedException(); }
public LinkedList <HtmlItem> GetRange(string type, Dictionary <string, string> attributes) { HtmlItem stop = Next(false); HtmlItem item = this; LinkedList <HtmlItem> list = new LinkedList <HtmlItem>(); while ((item = item.Next()) != stop) { if (item.IsType(type) && item.HasAttributes(attributes)) { list.AddLast(item); } } return(list); }
/// <summary> /// Constructs a HtmlObject from the html text data. /// </summary> /// <param name="data"></param> public HtmlObject(string data) { if (!data.TrimEnd().EndsWith("</html>", StringComparison.CurrentCultureIgnoreCase)) { throw new Exception("The provided string wasn't a full html document.\n\n" + data + "\n\n"); } HtmlItem king = new HtmlItem() { Type = "", Attributes = new Dictionary <string, string>(), Children = new List <HtmlItem>() }; parse(data, king); if (king.Children.Count > 0) { Root = king.Children[0]; } }
private bool parseInner(string data, HtmlItem parent, HtmlItem item) { while (i + 1 < data.Length) { //trim any leading space trimSpace(data); //if there's an inner tag if (isStart(data)) { //read inner tags parse(data, item); if (i + 1 < data.Length && isEnd(data, 1)) { //if (!StringTools.Grab(data, i, "</~>", 1)[0].Equals(item.Type)) if (!item.IsType(StringTools.Grab(data, i, "</~>", 1)[0])) { return(true); } break; } } //otherwise it just had a value else { //find end tag for (int x = i; x < data.Length; x++) { if (isEnd(data, x)) { item.Value = data.Substring(i, x - i); i = x; break; } } //if (!StringTools.Grab(data, i, "</~>", 1)[0].Equals(item.Type)) if (!item.IsType(StringTools.Grab(data, i, "</~>", 1)[0])) { return(true); } break; } } //now advance past the end tag for (; i < data.Length; i++) { if (isClose(data, i)) { i++; break; } } return(false); }
private string parse(string data, HtmlItem parent) { string value = null; while (i < data.Length) { //if at opening tag if (isStart(data)) { //create item HtmlItem item = createItem(data, parent); //if it's a list item, and the parent is also a list item if (item.IsType("li") && parent.IsType("li")) { //add to parent's parent parent.Parent.Children.Add(item); //reassign the parent item.Parent = parent.Parent; //scan next stuff item.Value = parse(data, item); //return the value to exit this inner return(value); } //add to parent parent.Children.Add(item); //if it's not self closing and it's not a break tag if (data[i - 2] != '/' && !item.IsType("br") && !item.IsType("img")) { //set item value to inner data item.Value = parse(data, item); } } //else if at closing tag else if (isEnd(data, i)) { int x; //if tag type matches parent if ((x = StringTools.Grab(data, i, "</~>", arr)) != -1) { //if it was a </br> tag if (arr[0].Equals("br", StringComparison.CurrentCultureIgnoreCase)) { //add the break to the parent parent.Children.Add(new HtmlItem() { Type = arr[0], Attributes = new Dictionary <string, string>(), Children = new List <HtmlItem>(), Parent = parent }); //advance past close i = x; continue; } //if tag type matches parent //else if (arr[0].Equals(parent.Type)) else if (parent.IsType(arr[0])) { //advance past close i = x; } } //return the value return(value); } else if (i + 1 < data.Length && data[i] == '<' && data[i + 1] == '!') { //move forward past the '>' for (; i < data.Length && data[i] != '>'; i++) { ; } i++; } //else we just have the value else { StringBuilder builder = new StringBuilder(); //move forward until '<' bool firstWhitespace = true; for (; i < data.Length && data[i] != '<'; i++) { //only include one whitespace character (removes multiple consecutive spaces) like proper HTML if (char.IsWhiteSpace(data[i])) { if (firstWhitespace) { firstWhitespace = false; builder.Append(' '); } } else { firstWhitespace = true; builder.Append(data[i]); } } //temp = temp.Trim(); parent.Children.Add(new HtmlItem() { Type = "Text", Value = builder.ToString(), Attributes = new Dictionary <string, string>(), Children = new List <HtmlItem>(), Parent = parent }); } } return(value); }
/// <summary> /// Can be overriden. Typically returns a dictionary with all the attributes and values, but can be overriden to leave out select values like a unique link value. /// </summary> /// <param name="item"></param> /// <returns></returns> protected virtual Dictionary <string, string> copyAttributesWithValues(HtmlItem item) { return(new Dictionary <string, string>(item.Attributes)); }
/// <summary> /// /// </summary> /// <param name="fromStart">This would be the top item like the "p" item from the search results.</param> /// <returns></returns> public HtmlInstruction CreateInstructions(HtmlItem fromStart) { return(createInstructions(fromStart.Next(HtmlItem.NextType.Down))); }
protected abstract HtmlInstruction Found(HtmlItem item);