/// <summary> /// This method retrives all the links on a page (or in a frame, /// in the case of Webtender.com). /// </summary> /// <param name="sourceCode">The raw HTML source code from getSourceCode().</param> /// <returns>string</returns> public static string[] getLinksToScrape(string sourceCode) { string snipOfSourceCode = ""; string linkRoot = "http://webtender.com"; string links = ""; string[] snipValue = {}; int numLinks = TextTool.CountStringOccurrences(sourceCode, "<LI>"); snipOfSourceCode = textFollowing(sourceCode, "<UL>"); snipValue = snipOfSourceCode.Split('T'); // Begins separating the link text. snipOfSourceCode = textFollowing(sourceCode, "<UL>"); snipValue = Regex.Split(snipOfSourceCode, "</A>"); // Builds each link into the snipValue array. for (int count = 0; count < numLinks; count++) { string[] temp = {}; temp = Regex.Split(snipValue[count], "\""); snipValue[count] = linkRoot + temp[1]; } // DEBUG: The following loop is for testing purposes. for (int count = 0; count < numLinks; count++) { links += snipValue[count]; } return(snipValue); }
/// <summary> /// Carves out the recipe information: title, measurement of each ingredient, /// name of each ingredient, mixing instructions, and the "creator's comments", /// referred to in this method as "comments". /// </summary> /// <param name="sourceCode">This parameter comes from the getSourceCode() method /// and provides the raw HTML page source.</param> /// <returns>string[]</returns> public static string getRecipeDetails(string sourceCode) { string testString; string snipOfSourceCode; string smallSnip; string[] snipValue; string[] recipeDetails = {}; string recipeSQLstatement = "INSERT INTO TestTable (recipeName, meas, ingrName, mixInstructions) VALUES (\""; // Gets recipe title try { // Gets recipe title. snipOfSourceCode = textFollowing(sourceCode, "<H1>"); snipValue = snipOfSourceCode.Split('<'); testString = recipeSQLstatement += snipValue[0] + "\", "; // Gets measurement of first ingredient smallSnip = textFollowing(sourceCode, "<LI>"); snipValue = Regex.Split(smallSnip, " <A"); recipeSQLstatement += "\"" + snipValue[0] + "\", "; // Gets name of first ingredient snipOfSourceCode = textFollowing(snipValue[1], ">"); snipValue = Regex.Split(snipOfSourceCode, "</A"); recipeSQLstatement += "\"" + snipValue[0] + "\", "; // Gets mixing instructions snipOfSourceCode = textFollowing(smallSnip, "<P>"); snipValue = Regex.Split(snipOfSourceCode, "</P>"); recipeSQLstatement += "\"" + snipValue[0] + "\""; recipeSQLstatement += ");"; } catch { MessageBox.Show("It appears that you are scraping a links page \nwithout checking the box.", "Error", MessageBoxButtons.OK, MessageBoxIcon.Exclamation, MessageBoxDefaultButton.Button1); } // Gets measurements and ingredients int numIngredients = TextTool.CountStringOccurrences(sourceCode, "<LI>"); //numIngredients -= 5; numIngredients = 1; // DEBUG: Testing on a non-relational table for (int counter = 0; counter < numIngredients; counter++) { } return(recipeSQLstatement); }