public void ParseUrl(string url) { McdiCardParser cardParser = new McdiCardParser(database); UrlScraper scraper = new UrlScraper(); string html = scraper.Scrape(McdiSiteParser.PREFIX + url); int startpos = html.IndexOf("class=\"even\""); string leftToken = "<td><a href=\""; string splitToken = "\">"; string rightToken = "</a>"; // Process all the cards int curpos = startpos; while (html.IndexOf(leftToken, curpos) != -1) { int leftIndex = html.IndexOf(leftToken, curpos); int splitIndex = html.IndexOf(splitToken, leftIndex); int rightIndex = html.IndexOf(rightToken, splitIndex); string cardUrl = html.Substring(leftIndex + leftToken.Length, splitIndex - leftIndex - leftToken.Length); string cardName = html.Substring(splitIndex + splitToken.Length, rightIndex - splitIndex - splitToken.Length); // Process the card //System.Console.WriteLine(cardName + ":" + cardUrl); cardParser.ParseUrl(cardUrl); curpos = rightIndex; } }
public void Parse() { { string url = "sitemap.html"; UrlScraper scraper = new UrlScraper(); string html = scraper.Scrape(McdiSiteParser.PREFIX + url); int startpos = html.IndexOf("<h2>"); //int endpos = startpos + 400; int endpos = html.IndexOf("<h2>", startpos + 1); string leftToken = "<li><a href=\""; string split1Token = "\">"; string split2Token = "</a> <small style=\"color: #aaa;\">"; string rightToken = "</small></li>"; // While what we are about to process is english... int curpos = startpos; while (html.IndexOf(leftToken, curpos) < endpos) { int leftIndex = html.IndexOf(leftToken, curpos); int split1Index = html.IndexOf(split1Token, leftIndex); int split2Index = html.IndexOf(split2Token, split1Index); int rightIndex = html.IndexOf(rightToken, split2Index); Edition edition = new Edition(); edition.SourceUrl = html.Substring(leftIndex + leftToken.Length, split1Index - leftIndex - leftToken.Length); edition.Name = html.Substring(split1Index + split1Token.Length, split2Index - split1Index - split1Token.Length); edition.Abbreviation = html.Substring(split2Index + split2Token.Length, rightIndex - split2Index - split2Token.Length); // Collect database.Editions.Add(edition); curpos = rightIndex; } } // Process the sets foreach (Edition ed in database.Editions) { System.Console.WriteLine("====" + ed + ":" + ed.SourceUrl + "===="); McdiEditionParser editionParser = new McdiEditionParser(database); editionParser.ParseUrl(ed.SourceUrl); //break; } }