private void ParseCountries(HtmlTag parent)
 {
     List<HtmlTag> tagList = null;
     string currentCountry = "";
     string currentState = "";
     string currentCity = "";
     parent.FilterForChildrenByNameAndAttribute("div", new KeyValuePair<string, string>("class", "colmask"), out tagList);
     if (tagList != null)
     {
         Dictionary<string, List<KeyValuePair<string, string>>> searchTagList =
                new Dictionary<string, List<KeyValuePair<string, string>>>();
         searchTagList["a"] = new List<KeyValuePair<string, string>>()
                                      { new KeyValuePair<string, string>("name", "*"),
                                        new KeyValuePair<string, string>("href", "*") };
         searchTagList["div"] = new List<KeyValuePair<string, string>>() { new KeyValuePair<string, string>("class", "state_delimiter") };
         foreach (HtmlTag child in tagList)
         {
             List<HtmlTag> stateList = null;
             child.FilterForChildrenByNameAndAttribute(searchTagList, out stateList);
             if (stateList != null)
             {
                 foreach (HtmlTag stateChild in stateList)
                 {
                     if (stateChild.Name == "a")
                     {
                         if (stateChild.Attributes.ContainsKey("name"))
                         {
                             //Country
                             stateChild.Attributes.TryGetValue("name", out currentCountry);
                             SectionToName.TryGetValue(currentCountry, out currentCountry);
                             LocationDictionary[currentCountry] = new Dictionary<string, Dictionary<string, string>>();
                         }
                         else
                         {
                             //City/Entry
                             String entry = null;
                             stateChild.Attributes.TryGetValue("href", out entry);
                             LocationDictionary[currentCountry][currentState][stateChild.Value] = entry;
                         }
                     }
                     else if (stateChild.Name == "div")
                     {
                         //State
                         currentState = stateChild.Value;
                         LocationDictionary[currentCountry][currentState] = new Dictionary<string, string>();
                     }
                 }
             }
         }
     }
 }
Example #2
0
        public bool FilterNodes(ParseFilter filter, out HtmlTag parent)
        {
            parent = null;
            foreach (HtmlTag tag in _nodes)
            {
                if (tag.Name == filter.Name)
                {
                    if (filter.isParent())
                        parent = tag.VanillaCopy(null);
                    else
                        parent = new HtmlTag();

                    return _FilterNodes(ref filter, ref parent, tag);
                }
            }
            return false;
        }
Example #3
0
        protected bool ParseTag( ref HtmlTag parent )
        {
            //Have we found our opening tag?
            bool found = false;
            //While we haven't found our corresponding opening tag and we still have room left:
            while (!found && !EOF())
            {
                //We didn't find it.  Damn.
                if (!MoveToNextTag())
                    break;

                //Temporary traversal index
                int index = _pos;
                SkipWhitespace(ref index);

                //The end-tag of a nested child
                if (_html[_pos + 1] == '/' && !NextTagIsComment())
                {
                    //Nested tag
                    parent.HadChildren = true;
                    HtmlTag child_tag = new HtmlTag();
                    //Closing tag start
                    child_tag.CloseTag_Start = _pos;
                    child_tag.Level = parent.Level + 1;
                    //This tag's parent is us.
                    child_tag.Parent = parent;
                    //Move passed the '</'
                    index += 2;
                    //Skip to first character of name
                    SkipWhitespace(ref index);
                    //Parse name
                    child_tag.Name = ParseTagName(ref index);
                    //Find the closing of this close tag
                    SkipWhitespace(ref index);
                    child_tag.CloseTag_Close = index;
                    //We don't want scripts
                    if (child_tag.Name == "script")
                        continue;
                    //Valid tag, parse.
                    ParseTag(ref child_tag);
                    //Check if there is something in between the end of this tag and our previous first child
                    index++;
                    SkipWhitespace(ref index);
                    if (parent.Children.Count > 0 && parent.Children[0].OpenTag_Start != index)
                        parent.MiscellaneousItems.Insert(0, _html.Substring(index, (parent.Children[0].OpenTag_Start - index)));
                    //Insert at head of children
                    parent.Children.Insert(0, child_tag);
                }
                else if (!NextTagIsComment())
                {
                    index++;
                    string name = ParseTagName(ref index);

                    if(name == "script")
                        continue;

                    SkipWhitespace(ref index);
                    if (name == parent.Name)
                    {
                        //Found it
                        parent.OpenTag_Start = _pos;
                        ParseTagAttributes(ref parent, ref index);
                        return true;
                    }
                    else
                    {
                        parent.HadChildren = true;

                        HtmlTag child_tag = new HtmlTag();
                        child_tag.Name = name;
                        child_tag.Level = parent.Level + 1;
                        child_tag.OpenTag_Start = _pos;
                        child_tag.Parent = parent;
                        ParseTagAttributes(ref child_tag, ref index);
                        if (child_tag.TrailingSlash && parent.Children.Count > 0 && parent.Children[0].OpenTag_Start != index)
                            parent.MiscellaneousItems.Insert(0, _html.Substring(index, (parent.Children[0].OpenTag_Start - index)));
                        parent.Children.Insert(0, child_tag);
                    }
                }
            }
            return false;
        }
Example #4
0
        private bool _FilterNodes(ref ParseFilter filter, ref HtmlTag new_parent, HtmlTag current_parent)
        {
            if (!filter.AllChildren)
            {
                if (filter.AcceptableChildren.Count == 0)
                {
                    if (current_parent.Children.Count == 0)
                        return true;

                    return false;
                }
            }

            foreach (HtmlTag tag in current_parent.Children)
            {
                ParseFilter child_filter = null;
                if (filter.AllChildren || filter.AcceptableChildren.TryGetValue(tag.Name, out child_filter))
                {
                    if (child_filter.isParent())
                    {
                        new_parent = tag.VanillaCopy(null);
                        if (_FilterNodes(ref child_filter, ref new_parent, tag))
                        {
                            filter = child_filter;
                            return true;
                        }
                    }
                    else
                    {
                        HtmlTag new_child = tag.VanillaCopy(new_parent);
                        if (_FilterNodes(ref child_filter, ref new_child, tag))
                        {
                            if (child_filter.isParent())
                            {
                                filter = child_filter;
                                new_parent = new_child;
                                return true;
                            }
                            new_parent.Children.Add(new_child);
                        }
                    }
                }
            }

            if (filter.AcceptableChildren.Count > 0 && new_parent.Children.Count == 0)
                return false;

            return true;
        }
Example #5
0
        /// <summary>
        /// Parses the contents of an HTML tag. The current position should
        /// be at the first character following the tag's opening less-than
        /// character.
        /// 
        /// Note: We parse to the end of the tag even if this tag was not
        /// requested by the caller. This ensures subsequent parsing takes
        /// place after this tag
        /// </summary>
        /// <param name="name">Name of the tag the caller is requesting,
        /// or "*" if caller is requesting all tags</param>
        /// <param name="tag">Returns information on this tag if it's one
        /// the caller is requesting</param>
        /// <returns>True if data is being returned for a tag requested by
        /// the caller or false otherwise</returns>
        protected bool ParseTag( ref HtmlTag parent )
        {
            // Special handling
            /*bool doctype = _scriptBegin = false;
            if (String.Compare(s, "!DOCTYPE", true) == 0)
                doctype = true;
            else if (String.Compare(s, "script", true) == 0)
                _scriptBegin = true;*/

            bool found = false;
            while (!found && !EOF())
            {
                if (!MoveToNextTag())
                    break;

                int index = _pos;
                SkipWhitespace(ref index);

                if (_html[_pos + 1] == '/' && !NextTagIsComment())
                {
                    //Nested tag
                    parent.HadChildren = true;
                    HtmlTag child_tag = new HtmlTag();
                    child_tag.CloseTag_Start = _pos;
                    child_tag.Parent = parent;
                    index += 2;
                    SkipWhitespace(ref index);
                    child_tag.Name = ParseTagName(ref index);

                    if (child_tag.Name == "script")
                        continue;

                    ParseTag(ref child_tag);
                    parent.Children.Insert(0, child_tag);
                }
                else if (!NextTagIsComment())
                {
                    index++;
                    string name = ParseTagName(ref index);

                    if(name == "script")
                        continue;

                    SkipWhitespace(ref index);
                    if (name == parent.Name)
                    {
                        //Found it
                        parent.OpenTag_Start = _pos;
                        ParseTagAttributes(ref parent, ref index);
                        return true;
                    }
                    else
                    {
                        parent.HadChildren = true;

                        HtmlTag child_tag = new HtmlTag();
                        child_tag.Name = name;
                        child_tag.OpenTag_Start = _pos;
                        child_tag.Parent = parent;
                        ParseTagAttributes(ref child_tag, ref index);
                        parent.Children.Insert(0, child_tag);
                    }
                }

                /*if (parent.Children.Count > 0)
                    _pos = parent.Children.First().OpenTag_Start;
                else
                    _pos = tag_position;*/
            }

            return false;
        }
Example #6
0
        /// <summary>
        /// JH: Parses entire html stream in to HtmlTag format
        /// </summary>
        public bool ParseHTML()
        {
            while (MoveToNextTag())
            {
                HtmlTag node = new HtmlTag();
                node.Parent = null;
                int index = _pos;
                if (_html[_pos + 1] == '/')
                {
                    node.CloseTag_Start = index;
                    index += 2;
                    node.Name = ParseTagName(ref index);
                    _pos = node.CloseTag_Start;
                    if (node.Name != null)
                        if (ParseTag(ref node))
                            _pos = node.OpenTag_Start;

                }
                else
                {
                    node.OpenTag_Start = _pos;
                    index++;
                    node.Name = ParseTagName(ref index);
                    ParseTagAttributes(ref node, ref index);
                }

                _nodes.Insert(0, node);

            }
            return true;
        }
Example #7
0
        /// <summary>
        /// Parses the contents of an HTML tag. The current position should
        /// be at the first character following the tag's opening less-than
        /// character.
        ///
        /// Note: We parse to the end of the tag even if this tag was not
        /// requested by the caller. This ensures subsequent parsing takes
        /// place after this tag
        /// </summary>
        /// <param name="name">Name of the tag the caller is requesting,
        /// or "*" if caller is requesting all tags</param>
        /// <param name="tag">Returns information on this tag if it's one
        /// the caller is requesting</param>
        /// <returns>True if data is being returned for a tag requested by
        /// the caller or false otherwise</returns>

        protected bool ParseTag(string name, ref HtmlTag tag)
        {
            // Get name of this tag
            string s = ParseTagName();

            // Special handling
            bool doctype = _scriptBegin = false;

            if (String.Compare(s, "!DOCTYPE", true) == 0)
            {
                doctype = true;
            }
            else if (String.Compare(s, "script", true) == 0)
            {
                _scriptBegin = true;
            }

            // Is this a tag requested by caller?
            bool requested = false;

            if (name == "*" || String.Compare(s, name, true) == 0)
            {
                // Yes, create new tag object
                tag            = new HtmlTag();
                tag.Name       = s;
                tag.Attributes = new Dictionary <string, string>();
                requested      = true;
            }

            // Parse attributes
            SkipWhitespace();
            while (Peek() != '>')
            {
                if (Peek() == '/')
                {
                    // Handle trailing forward slash 处理关闭标签
                    if (requested)
                    {
                        tag.TrailingSlash = true;
                    }
                    Move();
                    SkipWhitespace();
                    // If this is a script tag, it was closed
                    _scriptBegin = false;
                }
                else
                {
                    // Parse attribute name
                    s = (!doctype) ? ParseAttributeName() : ParseAttributeValue();
                    SkipWhitespace();
                    // Parse attribute value
                    string value = String.Empty;
                    if (Peek() == '=')
                    {
                        Move();
                        SkipWhitespace();
                        value = ParseAttributeValue();
                        SkipWhitespace();
                    }
                    // Add attribute to collection if requested tag
                    if (requested)
                    {
                        // This tag replaces existing tags with same name
                        if (tag.Attributes.Keys.Contains(s))
                        {
                            tag.Attributes.Remove(s);
                        }
                        tag.Attributes.Add(s, value);
                    }
                }
            }
            // Skip over closing '>'
            Move();

            return(requested);
        }
 private void FillLastFive(ref List<string> LastFiveEntriesSearched, HtmlTag parent)
 {
     int max_count = (parent.Children.Count < 5) ? parent.Children.Count : 5;
     for (int i = 0; i < max_count; i++)
     {
         HtmlTag temp_p = parent.Children[i];
         if (temp_p.Children.Count > 0)
         {
             HtmlTag temp_a = temp_p.Children[0];
             if (temp_a.Attributes.Count > 0)
             {
                 string temp_site = "";
                 if (temp_a.Attributes.TryGetValue("href", out temp_site))
                     LastFiveEntriesSearched.Insert(LastFiveEntriesSearched.Count, temp_site);
             }
         }
     }
 }
Example #9
0
        public void FilterForChildrenByNameAndAttribute(Dictionary<string, KeyValuePair<string, string>> tag_list, ref HtmlTag parent)
        {
            if (Children.Count == 0 || parent == null)
                return;

            for (int i = 0; i < Children.Count; i++)
            {
                KeyValuePair<string, string> valid_attribute = new KeyValuePair<string, string>();
                if (tag_list.TryGetValue(Children[i].Name, out valid_attribute))
                {
                    if (valid_attribute.Key == "*")
                    {
                        if (Children[i].Attributes.ContainsValue(valid_attribute.Value))
                            parent.Children.Add(Children[i]);
                    }
                    else if (valid_attribute.Value == "*")
                    {
                        if (Children[i].Attributes.ContainsKey(valid_attribute.Key))
                            parent.Children.Add(Children[i]);
                    }
                    else if (Children[i].Attributes.Contains(valid_attribute))
                        parent.Children.Add(Children[i]);
                }

                Children[i].FilterForChildrenByNameAndAttribute(tag_list, ref parent);
            }
        }
Example #10
0
        private void _FilterForChildrenByName(string name, ref HtmlTag parent)
        {
            if (parent == null)
                parent = this.VanillaCopy(null);

            if (this.Children.Count == 0)
                return;

            for (int i = 0; i < this.Children.Count; i++)
            {
                if (this.Children[i].Name == name)
                    parent.Children.Add(this.Children[i]);
                this.Children[i]._FilterForChildrenByName(name, ref parent);
            }
        }
Example #11
0
        public void FilterForChildrenByName(List<string> names, ref HtmlTag parent)
        {
            if (Children.Count == 0 || parent == null)
                return;

            for (int i = 0; i < Children.Count; i++)
            {
                if (names.Contains(Children[i].Name))
                    parent.Children.Add(Children[i]);

                Children[i].FilterForChildrenByName(names, ref parent);
            }
        }
Example #12
0
 public void FilterForChildrenByName(string name, out HtmlTag parent)
 {
     parent = this.VanillaCopy(null);
     this._FilterForChildrenByName(name, ref parent);
 }
Example #13
0
        /// <summary>
        /// Parses the next tag that matches the specified tag name 解析下一个标记,它指定标记名称匹配
        /// </summary>
        /// <param name="name">Name of the tags to parse ("*" = parse all tags) 标记的名称解析("*"=解析所有标签)
        /// </param>
        /// <param name="tag">Returns information on the next occurrence of the specified tag or null if none found 返回下一个出现的指定标签上的信息如果未找到,则为null</param>
        /// <returns>True if a tag was parsed or false if the end of the document was reached 文档解析状态成功或失败或到文档末尾</returns>
        public bool ParseNext(string name, out HtmlTag tag)
        {
            tag = null;

            // Nothing to do if no tag specified
            if (String.IsNullOrEmpty(name))
            {
                return(false);
            }

            // Loop until match is found or there are no more tags 循环查找匹配项
            while (MoveToNextTag())
            {
                // Skip opening '<'
                Move();

                // Examine first tag character
                char c = Peek();
                if (c == '!' && Peek(1) == '-' && Peek(2) == '-')
                {
                    // Skip over comments 跳过注释
                    const string endComment = "-->";
                    _pos = _html.IndexOf(endComment, _pos);
                    NormalizePosition();
                    Move(endComment.Length);
                }
                else if (c == '/')
                {
                    // Skip over closing tags 跳过关闭标签
                    _pos = _html.IndexOf('>', _pos);
                    NormalizePosition();
                    Move();
                }
                else
                {
                    // Parse tag 解析标签
                    bool result = ParseTag(name, ref tag);

                    // Because scripts may contain tag characters,
                    // we need special handling to skip over
                    // script contents
                    if (_scriptBegin)
                    {
                        const string endScript = "</script";
                        _pos = _html.IndexOf(endScript, _pos,
                                             StringComparison.OrdinalIgnoreCase);
                        NormalizePosition();
                        Move(endScript.Length);
                        SkipWhitespace();
                        if (Peek() == '>')
                        {
                            Move();
                        }
                    }

                    // Return true if requested tag was found
                    if (result)
                    {
                        return(true);
                    }
                }
            }
            return(false);
        }
Example #14
0
        protected void ParseTagAttributes(ref HtmlTag tag, ref int index)
        {
            while (_html[index] != '>')
            {
                if (_html[index] == '/')
                {
                    // Handle trailing forward slash
                    tag.TrailingSlash = true;
                    index++;
                    SkipWhitespace(ref index);
                    tag.CloseTag_Close = index;
                }
                else
                {
                    // Parse attribute name
                    string attribute_name = ParseAttributeName(ref index);
                    SkipWhitespace(ref index);
                    // Parse attribute value
                    string value = String.Empty;
                    if (_html[index] == '=')
                    {
                        index++;
                        SkipWhitespace(ref index);
                        value = ParseAttributeValue(ref index);
                        SkipWhitespace(ref index);
                    }
                    // This tag replaces existing tags with same name
                    if (tag.Attributes.Keys.Contains(attribute_name))
                        tag.Attributes.Remove(attribute_name);

                    tag.Attributes.Add(attribute_name, value);
                }
            }

            tag.OpenTag_Close = index;
            index++;
            int value_end = _html.IndexOf("<", index);
            if( value_end != -1 )
                tag.Value = _html.Substring(index, value_end - index);
        }
Example #15
0
        //title = <html><head><title>
        //body = <html><body><div id="userbody">
        private void SearchEntry(string entry_site, HtmlTag parent)
        {
            if (entry_site == String.Empty)
                return;
            //

            HtmlParser.HtmlParser parser = new HtmlParser.HtmlParser();
            if (!parser.ParseURL(entry_site, true, new string[] {"<br>"}))
                return;

            List<HtmlParser.HtmlTag> nodes = parser._nodes;
            HtmlTag new_parent = new HtmlTag();
            new_parent.Name = "Artificial Parent";

            HtmlParser.ParseFilter title_filter = HtmlParser.ParseFilter.Create("html(head(title[parent]))");
            HtmlParser.ParseFilter body_filter = HtmlParser.ParseFilter.Create("html(head(div[parent]))");

            HtmlParser.HtmlTag title_tag = null;
            parser.FilterNodes(title_filter, out title_tag);
            HtmlParser.HtmlTag body_tag = null;
            parser.FilterNodes(body_filter, out body_tag);

            for (int i = 0; i < nodes.Count; i++)
            {
                if (nodes[i].Name == "html")
                {
                    try
                    {
                        HtmlTag header_tag = null;
                        nodes[i].FilterForChildrenByName("title", out header_tag);
                        string title = header_tag.Children[0].Value;

                        Dictionary<string, KeyValuePair<string, string>> tag_list = new Dictionary<string, KeyValuePair<string, string>>();
                        tag_list.Add("div", new KeyValuePair<string, string>("id", "userbody"));
                        nodes[i].FilterForChildrenByNameAndAttribute(tag_list, ref new_parent);

                        //Deleted by author, expired, etc.
                        if(new_parent.Children.Count == 0)
                            continue;

                        string body = new_parent.Children[0].Value;
                        if (body != null && body != String.Empty)
                        {
                            body = body.ToLower();
                            foreach (string keyword in Details_.Keywords_)
                            {
                                if (!body.Contains(keyword) && !title.Contains(keyword))
                                {
                                    int start = entry_site.LastIndexOf('/');
                                    string output_file = entry_site.Substring(start, entry_site.Length - start);
                                    output_file += ".xml";
                                    System.IO.StreamWriter test_xml = new System.IO.StreamWriter(output_file, false);
                                    test_xml.WriteLine("Couldn't find: '" + keyword + "' in body: '" + body + "'");
                                    test_xml.WriteLine(parser.ToString());
                                    test_xml.Close();
                                    return;
                                }
                            }
                            matchingEntriesFound++;
                            Parent_.UpdateEntries(parent.ToString());
                        }
                    }
                    catch (Exception error)
                    {
                        Logger.Instance.Log(error.ToString(), Details_.City_, LogType.ltError);
                    }
                }
                entries_searched_++;
                Parent_.UpdateTotalSearched();
            }
        }
Example #16
0
        /// <summary>
        /// JH: Parses entire html stream in to HtmlTag format
        /// </summary>
        public bool ParseHTML()
        {
            //Find our first tag
            while (MoveToNextTag())
            {
                //Create a new tag
                HtmlTag node = new HtmlTag();
                node.Parent = null;
                node.Level = 0;
                //Our temporary forward-traversal index
                int index = _pos;
                //This is a closing tag
                if (_html[_pos + 1] == '/')
                {
                    node.CloseTag_Start = index;
                    //Move passed the '</'
                    index += 2;
                    //Parse tag name from current index
                    node.Name = ParseTagName(ref index);
                    //Reset to the index of the closing '</'
                    _pos = node.CloseTag_Start;
                    //If this was a valid closing tag, parse until we find our opening tag
                    if (node.Name != null)
                        if (ParseTag(ref node))
                            _pos = node.OpenTag_Start;//Found our opening tag

                }
                else
                {
                    //This is an opening tag.
                    node.OpenTag_Start = _pos;
                    //Move passed the '<'
                    index++;
                    //Get our name
                    node.Name = ParseTagName(ref index);
                    //Parse our attributes.
                    ParseTagAttributes(ref node, ref index);
                }
                //Add this tag to our list of top-level tags
                _nodes.Insert(0, node);

            }
            return true;
        }
Example #17
0
        public HtmlTag VanillaCopy(HtmlTag NewParent)
        {
            HtmlTag copy = new HtmlTag();
            copy.Attributes = this.Attributes;
            copy.Name = this.Name;
            copy.Parent = NewParent;
            copy.TrailingSlash = this.TrailingSlash;
            copy.Value = this.Value;
            copy.OpenTag_Start = this.OpenTag_Start;
            copy.OpenTag_Close = this.OpenTag_Close;
            copy.CloseTag_Start = this.CloseTag_Start;

            return copy;
        }
 private void ParseSectionNames(HtmlTag parent)
 {
     List<HtmlTag> tagList = null;
     parent.FilterForChildrenByNameAndAttribute("div", new KeyValuePair<string, string>("class", "jump_to_continents"), out tagList);
     if (tagList != null)
     {
         HtmlTag locationsStuff = tagList[0];
         foreach (HtmlTag child in locationsStuff.Children)
         {
             String key = String.Empty;
             if (child.Attributes.TryGetValue("href", out key))
             {
                 key = key.Substring(1, key.Length - 1);
                 SectionToName.Add(key, child.Value);
             }
         }
     }
 }
Example #19
0
    public void DownloadLocations()
    {
        string site = "http://www.craigslist.org/about/sites";
        
        HtmlParser.HtmlParser parser = new HtmlParser.HtmlParser();

        if (!parser.ParseURL(site, false, new string[] {}))
            return;

        List<HtmlParser.HtmlTag> nodes = parser._nodes;
        HtmlParser.HtmlTag new_parent = null;

        for (int i = 0; i < nodes.Count; i++)
        {
            if (nodes[i].Name == "html")
            {
                nodes[i].FilterForChildrenByName("div", out new_parent);
                List<string> class_names = new List<string>();
                class_names.Add("jump_to_continents");
                class_names.Add("colmask");
                Dictionary<string, List<string>> filter_for = new Dictionary<string, List<string>>();
                filter_for.Add("class", class_names);
                new_parent.FilterOutChildrenByAttribute(filter_for);

                Dictionary<string, string> abbr_to_area = new Dictionary<string, string>();

                for (int z = 0; z < new_parent.Children.Count; z++)
                {
                    HtmlTag Child = new_parent.Children[z];
                    if(Child.Attributes.Contains(new KeyValuePair<string,string>("class", "jump_to_continents")))
                    {
                        for(int a = 0; a < Child.Children.Count; a++)
                        {
                            string abbreviation = Child.Children[a].Attributes.ElementAt(0).Value;
                            abbreviation = abbreviation.Substring(1, abbreviation.Length - 1);
                            abbr_to_area.Add(abbreviation, Child.Children[a].Value);
                        }
                    }
                    else if(Child.Attributes.Contains(new KeyValuePair<string,string>("class", "colmask")))
                    {
                        HtmlParser.HtmlTag NewerChild = new HtmlParser.HtmlTag();
                        Dictionary<string, KeyValuePair<string, string>> tag_list = new Dictionary<string, KeyValuePair<string, string>>();
                        tag_list.Add("h1", new KeyValuePair<string, string>("class", "continent_header"));
                        tag_list.Add("div", new KeyValuePair<string, string>("class", "state_delimiter"));
                        tag_list.Add("a", new KeyValuePair<string, string>("href", "*"));
                        Child.FilterForChildrenByNameAndAttribute(tag_list, ref NewerChild);

                        string current_area = "";
                        string current_state = "";
                        for (int count_newer_children = 0; count_newer_children < NewerChild.Children.Count; count_newer_children++ )
                        {
                            HtmlParser.HtmlTag temp_child = NewerChild.Children[count_newer_children];
                            bool val_found = true;
                            switch (temp_child.Name)
                            {
                                    /*area*/
                                case "h1":
                                    {
                                        string temp_area = temp_child.Children[0].Attributes.ElementAt(0).Value;
                                        if (!abbr_to_area.TryGetValue(temp_area, out current_area))
                                        {
                                            val_found = false;
                                            break;
                                        }

                                        if (!LocationDictionary.ContainsKey(current_area))
                                            LocationDictionary.Add(current_area, new Dictionary<string, Dictionary<string, string>>());
                                    }
                                    break;
                                    /*state*/
                                case "div":
                                    {
                                        current_state = temp_child.Value;
                                        if (current_state == null)
                                            current_state = "Unspecified";
                                        try
                                        {
                                            if (!LocationDictionary[current_area].ContainsKey(current_state))
                                                LocationDictionary[current_area].Add(current_state, new Dictionary<string, string>());
                                        }
                                        catch (System.Exception ex)
                                        {
                                            string error = ex.ToString();
                                        }
                                    }
                                    break;
                                    /*city*/
                                case "a":
                                    {
                                        string city = temp_child.Value;
                                        string website = temp_child.Attributes.ElementAt(0).Value;
                                        LocationDictionary[current_area][current_state].Add(city, website);
                                    }
                                    break;
                            }
                            if( !val_found )
                                break;
                        }
                        new_parent.Children[z] = NewerChild;
                    }
                }
            }
        }
    }