protected void Button1_Click(object sender, EventArgs e) { string myplaceofpostcode; int indexofpostcode; string myplaceofnamepostcode; List <Postcode> Gotallthepostcodes; WebClient mywebClient; int filesizeKbytes; string htmlpage; //get all postal codes Postcodes postcodes = new Postcodes(); Gotallthepostcodes = postcodes.Getallpostcodes("SN"); foreach (var mylistofpostcodes in Gotallthepostcodes) { myplaceofpostcode = mylistofpostcodes.nameofplace; indexofpostcode = mylistofpostcodes.postcodeindex; myplaceofnamepostcode = mylistofpostcodes.postcode; Response.Write("<br/> Place Name = " + myplaceofpostcode + "<br/> Index Of Post Code = " + indexofpostcode + "<br/> Actual Post Code = " + myplaceofnamepostcode); } //Get to the top level selling page for zoopla, start at page zero mywebClient = new CustomWebClient(); mywebClient.Headers[HttpRequestHeader.Authorization] = "Basic "; //+ base64String; htmlpage = mywebClient.DownloadString(wholeurl); //qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqq // string pagepattern = "<a href=" + '"' + "/for-sale/property/sn/\\?page_size=\\d{0,5}&q=ab&results_sort=lowest_price&search_source=home&radius=0&pn=(?<grouppagenumer>\\d{0,5})" + '"'; string pagepattern = "<a href=" + '"' + "/for-sale/houses/sn/\\?beds_min=\\d{0,3}&price_max=\\d{0,10}&property_type=houses&price_min=\\d{0,10}&page_size=\\d{0,5}&q=sn&results_sort=lowest_price&search_source=refine&radius=\\d{0,3}&pn=(?<grouppagenumer>\\d{0,5})" + '"'; //ttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttt string totalurl = "https://www.zoopla.co.uk"; Regex regexlastpagenumber = new Regex(pagepattern); int biggestnumber = 0; int pagenumber = 0; foreach (Match mymatchpage in regexlastpagenumber.Matches(htmlpage)) { Response.Write("<br/><br/>Page Numbers That Match = " + mymatchpage.Groups["grouppagenumer"].Value + "<br/>"); //pagenumber = mymatchpage.Groups["grouppagenumer"].Value.Max(); pagenumber = int.Parse(mymatchpage.Groups["grouppagenumer"].Value); if (pagenumber > biggestnumber) { biggestnumber = pagenumber; } } Response.Write("Maximum Page Number = " + biggestnumber.ToString()); //return; Response.Write("Start of href"); string getnumberofpages = "<br/><br/><a href=" + '"' + "https://www.zoopla.co.uk/for-sale/property/sn/?q=sn&search_source=home&radius=0&pn=3" + '"' + ">" + "Next Page Number To Click" + "</a><br/><br/>"; Response.Write(getnumberofpages); // Regex rgxgroup = new Regex(pagepattern); // Match match = Regex.Match(htmlpage, "mypattern"); Response.Write("End of href"); //Response.Write("<br/>url used = " + htmlpage + "<br/>"); //Find references //https://www.zoopla.co.uk/for-sale/details/53283801 //<a href="/for-sale/details/53288433?search_identifier=dda9bf2d323c9ba5b89f632a8b764c35" class="listing-results-price text-price"> Getwebparts getpathref = new Getwebparts(); List <getpropref> allreference = getpathref.Getpropertyreferences(htmlpage, "<a href=" + '"' + "/for-sale/details/", '"' + " "); //List<getpropref> allreference = getpathref.Getpropertyreferences(htmlpage, "£"," "); //List<getpropref> allreference = getpathref.Getpropertyreferences(htmlpage, "<a href=" + '"' + "/for-sale/details/", "?"); Getwebparts getpathrefregex = new Getwebparts(); //qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqq // string mypattern = @"<a href=" + '"' + "/for-sale/details/\\d{8}?"; //string mypattern = "<a href=" + '"' + "/for-sale/details/\\d{8}"; //string mypattern = "<a class="photo-hover" href="/for-sale/details/53305860"> //string mypattern = "<a class=" + '"' + "photo-hover" + '"' + " href=" + '"' + "/for-sale/details/\\d{8}"; //string mypattern = "<a class=" + '"' + "photo-hover" + '"' + " href=" + '"' + "/for-sale/details/\\d{8}"; //string mypattern = "<a class=" + '"' + "photo-hover" + '"' + " href=" + '"' + "(?<groupmario>/for-sale/details/\\d{8})"; string mypattern = "(?<groupclass><a class=)" + '"' + "(?<grouphover>photo-hover)" + '"' + "(?<grouphref> href=)" + '"' + "(?<groupmario>/for-sale/details/\\d{8})"; // Match extractgroupmario; Regex rgxgroup = new Regex(mypattern); Match match = Regex.Match(htmlpage, "mypattern"); string website = "https://www.zoopla.co.uk"; foreach (Match mymatchgroup in rgxgroup.Matches(htmlpage)) { // ListBox1.Items.Add(mymatchgroup.Value); ListBox1.Items.Add(mymatchgroup.Groups["groupmario"].Value); ListBox1.Items.Add(mymatchgroup.Groups["grouphref"].Value); ListBox1.Items.Add(mymatchgroup.Groups["grouphover"].Value); ListBox1.Items.Add(mymatchgroup.Groups["groupclass"].Value); ListBox1.Items.Add(mymatchgroup.Groups[0].Value); } foreach (Match mymatchgroup in rgxgroup.Matches(htmlpage)) { ListBox2.Items.Add(website + mymatchgroup.Groups["groupmario"].Value); } int hitc = 0; string fullurl; Downloadpage mydetailshtmlpage; string myhtmlreturned; bool savethefile; MyRegextraction Alldescriptions; MyRegextraction Theprice; MyRegextraction Thesummary; MyRegextraction Thebulletts; MyRegextraction Theimages; MyRegextraction Thesvgs; string fileName; string imageUrl; string myregeximages; Stream streamdata; WebClient client; Bitmap bitmap; int returnedrows; foreach (Match mymatchgroup in rgxgroup.Matches(htmlpage)) { ListBox3.Items.Add("<a href=" + '"' + website + mymatchgroup.Groups["groupmario"].Value + '"' + ">" + " Hit Number = " + hitc + "</a>"); hitc += 1; Response.Write("<br/><a href=" + '"' + website + mymatchgroup.Groups["groupmario"].Value + '"' + "target=" + '"' + "_blank" + '"' + ">" + website + mymatchgroup.Groups["groupmario"].Value + " Hit Number = " + hitc + "</a>"); fullurl = website + mymatchgroup.Groups["groupmario"].Value; mydetailshtmlpage = new Downloadpage(); myhtmlreturned = mydetailshtmlpage.Downloadhtmlpage(fullurl); string imageUrldownload; imageUrldownload = "https://pbprodimages.azureedge.net/images/medium/2a00f1ab-a7cb-4315-b247-c3d40636f041.jpg"; //Download SVG images without saving to disk place into database //mmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmSVGmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmm //string mysvgimages = "\\s{0,50}<li class=\"dp-features-list__item\">.*\\n\\s{0,100}.*\\n\\s{0,100}</li>"; //s{0,150}\\.*\\"https://[a-zA-z].*.svg //string mysvgimages = "s{0,150}\\.*\\\"(?<groupclass>https://[a-zA-z].*.svg)"; //"\\s{0,100}src=\\\"(?<mariogroup>).*svg", //string mysvgimages = "(?<groupclass>\\s{0,100})src=\\\"(<?<groupclass>.*svg)"; string mysvgimages = "<img\\s.*=\"(?<groupclass>.*svg)"; //string mysvgimages = "\\s{0,150}\\.*.src=\\"(?<groupclass>https://[a-zA-z].*.svg)"; Thesvgs = new MyRegextraction(); List <MyRegextraction> Gotsvg = Thesvgs.GetDescription(myhtmlreturned, mysvgimages); foreach (var showsvg in Gotsvg) { Response.Write("<br/>Returned SVG Points = " + showsvg.PropertyDescription + "<br/>"); } bool imstatus; Insertsvgtodatabase Insimagedb = new Insertsvgtodatabase(); //imstatus = Insimagedb.Insimagetodb("https://r.zoocdn.com/assets/safari-pinned-tab.svg", false); imstatus = Insimagedb.Insimagetodb(Gotsvg, false); imageUrl = "https://lc.zoocdn.com/32d3e36d37e1b758b4fa096e9078fd3bd1742ade.jpg"; //client = new WebClient(); //byte[] imagebytes = client.DownloadData(imageUrldownload); //filesizeKbytes = imagebytes.Length; //SVG for this property imageUrl = "https://lc.zoocdn.com/32d3e36d37e1b758b4fa096e9078fd3bd1742ade.jpg"; // fileName = "testjpg"; // fileName = "C:\\Compress\\" + fileName + ".jpg"; // streamdata = client.OpenRead(imageUrl); // bitmap = new Bitmap(streamdata); // bitmap.Save(fileName); // bitmap.Dispose(); //Get Bullet Poimts string myregexbulletpoints = "\\s{0,50}<li class=\"dp-features-list__item\">.*\\n\\s{0,100}.*\\n\\s{0,100}</li>"; Thebulletts = new MyRegextraction(); List <MyRegextraction> Gotbulletts = Thebulletts.GetDescription(myhtmlreturned, myregexbulletpoints); foreach (var showbulletts in Gotbulletts) { Response.Write("<br/>Returned Bullett Points = " + showbulletts.PropertyDescription + "<br/>"); } //Get Post Code //zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz //Get the features string myregexfeatures = "\\s{0,50}<span class=\"dp-features-list__text\">.*"; Thesummary = new MyRegextraction(); List <MyRegextraction> Gotfeatures = Thesummary.GetDescription(myhtmlreturned, myregexfeatures); foreach (var showfeatures in Gotfeatures) { Response.Write("<br/>Returned features = " + showfeatures.PropertyDescription + "<br/>"); } //Pick up the property summary //Put in the groups to extract the address // string mypattern = "(?<groupclass><a class=)" + '"' + "(?<grouphover>photo-hover)" + '"' + "(?<grouphref> href=)" + '"' + "(?<groupmario>/for-sale/details/\\d{8})"; string myregexsummary = "\\s{0,50}<h2 class=\"ui-property-summary__address\">" + "(?<groupaddress>.*)" + "," + "(?<grouppostcode>.*)</h2>"; Thesummary = new MyRegextraction(); List <MyRegextraction> Gotsummary = Thesummary.GetDescription(myhtmlreturned, myregexsummary); //Download svg https://r.zoocdn.com/assets/safari-pinned-tab.svg // string Fileurl = "https://r.zoocdn.com/assets/safari-pinned-tab.svg"; // WebClient GetSvgFile = new WebClient(); // byte[] gotsvgdata = GetSvgFile.DownloadData(Fileurl); // GetSvgFile.DownloadFile(Fileurl, "C:\\Users\\Public\\Documents\\guidsvgfilename.svg"); //WebClient client.downloadfile //Get All SGV type images. //Get all the pictures myregeximages = "\\s{0,100}.*<img data-src=\"(?<groupimagesrc>.*.jpg)"; Theimages = new MyRegextraction(); List <MyRegextraction> Gotimages = Theimages.GetDescription(myhtmlreturned, myregeximages); int myindex = 0; bool insertsuccess; insertimagetodatabase Insertimagetodbase = new insertimagetodatabase(); foreach (var showimages in Gotimages) { myindex += 1; if (myindex % 2 == 0) { Response.Write("Number is even"); //This is the List<T> item which contains the correct image path - download the image itself //Then insert it into the database Response.Write("<br/>Full Images Path = " + showimages.PropertyDescription); insertsuccess = Insertimagetodbase.Insertimagetodbase(showimages.PropertyDescription, "stuff"); if (insertsuccess) { Response.Write("Successful Database Update"); } else { Response.Write("Unsuccessful Database Update"); } } else { Response.Write("<br/>Number is odd"); } } foreach (var showsummary in Gotsummary) { Response.Write("<br/>Returned Summary = " + showsummary.PropertyDescription + "<br/>"); } //Get The Properties price string myregexprice = "\\s{0,50}<p class=\"ui-pricing__main-price ui-text-t4\">" + "£" + "\\d{0,10}" + "," + "\\d{0,10}" + "</p>"; Theprice = new MyRegextraction(); List <MyRegextraction> Gotprice = Theprice.GetDescription(myhtmlreturned, myregexprice); foreach (var showprice in Gotprice) { Response.Write("<br/>Returned Price = " + showprice.PropertyDescription + "<br/>"); } //Now pick up the description for each property string myregpat = "\\n\\s{0,50}<div class=\"dp-description__text\">.*\\n\\s{0,50" + "}.*\\n\\s{0,50}</div>"; Alldescriptions = new MyRegextraction(); List <MyRegextraction> Gotdes = Alldescriptions.GetDescription(myhtmlreturned, myregpat); foreach (var showdes in Gotdes) { Response.Write("<br/>Returned Descriptions = " + showdes.PropertyDescription + "<br/>"); } String[] myarrayurlimage = new String[4]; myarrayurlimage[0] = "https://lc.zoocdn.com/32d3e36d37e1b758b4fa096e9078fd3bd1742ade.jpg"; myarrayurlimage[1] = "https://lc.zoocdn.com/c2a8a5af5cec2db187ae1a37d4f8d3965e9d5b87.jpg"; myarrayurlimage[2] = "https://media.rightmove.co.uk/dir/crop/10:9-16:9/78k/77900/73713541/77900_MAR190232_IMG_06_0000_max_476x317.jpg"; myarrayurlimage[3] = "https://pbprodimages.azureedge.net/images/medium/2a00f1ab-a7cb-4315-b247-c3d40636f041.jpg"; savethefile = true; string filename = "abc"; //ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc Savetodatabase additemtodatabase = new Savetodatabase(); returnedrows = additemtodatabase.Savephototodatabase(myarrayurlimage, filename, savethefile); } // List<getpropref> patermatchpropertyreference = getpathrefregex.patermatchpropertyreference(htmlpage, "<a href=" + '"' + "/for-sale/details/\\d\\d\\d\\d\\d\\d\\d\\d[?]"); // List<getpropref> patermatchpropertyreference = getpathrefregex.patermatchpropertyreference(htmlpage, "<a href=" + '"' + "/for-sale/details/\\d{8}?search_identifier="); List <getpropref> patermatchpropertyreference = getpathrefregex.patermatchpropertyreference(htmlpage, mypattern); int mycounter = 0; string buildancor = ""; foreach (var mytotrefregex in patermatchpropertyreference) { //Response.Write("<br/>Path To Detail Descriptions Using REGEX = " + mytotrefregex.propertyreference.ToString() + " Value from zero based start = " + mytotrefregex.matchindex.ToString() + "<br/>"); //Response.Write("<br/><br/>"); //<a href="https://www.w3schools.com/html/">Visit our HTML tutorial</a> //buildancor = "<br/>" + mytotrefregex.propertyreference.ToString() + '"' + "https://ciscokidicantec.mario.wakeham.name/" + '"' + ">My Google Web Site " + mytotrefregex.matchindex.ToString() + "<a/>" + "<br/>"; //buildancor = "<br/>" + mytotrefregex.propertyreference.ToString() + '"' + "https://ciscokidicantec.mario.wakeham.name/" + '"' + ">My Google Web Site " + mytotrefregex.matchindex.ToString() + "<a/>" + "<br/>"; buildancor = "<br/><br/>" + mytotrefregex.propertyreference.ToString(); Response.Write(buildancor); mycounter++; } // foreach (var mytotref in allreference) // { // Response.Write("<br/>Path To Detail Descriptions = " + mytotref.propertyreference + "<br/>"); // } }
protected void Button3_Click(object sender, EventArgs e) { //Start with Zoopla.co.uk get all the photos by using the post code. So opem the post code table and read them 1 at a time until they are all in the post code array class. //Then we will iterate through the post codes one at a time to get the photos. So lets start by creating a Class List and saving the post code in there. List <Postcode> postcodelist = new List <Postcode>(); Postcode postcodeinstance = new Postcode(); try { string myconnpostcodeStr = ConfigurationManager.ConnectionStrings["estateportalConnectionString"].ConnectionString; MySqlConnection connpostcode = new MySqlConnection(myconnpostcodeStr); string commandstring = "SELECT * FROM postcodes WHERE TRIM(POSTCODE) = 'SN'"; // string commandstring = "SELECT * FROM postcodes WHERE TRIM(POSTCODE) = 'BN' OR TRIM(POSTCODE) = 'BD' OR TRIM(POSTCODE) = 'CB'"; MySqlCommand mypostcodecmd = new MySqlCommand(commandstring, connpostcode); mypostcodecmd.CommandType = System.Data.CommandType.Text; connpostcode.Open(); // string areapostcode; MySqlDataReader rdrpostcode = mypostcodecmd.ExecuteReader(); while (rdrpostcode.Read()) { postcodelist.Add(new Postcode { postcodeindex = (int)rdrpostcode["indexpostcode"], postcode = (string)rdrpostcode["postcode"], nameofplace = (string)rdrpostcode["codeareadescription"] }); } connpostcode.Close(); connpostcode.Dispose(); }catch (Exception postcodeex) { Response.Write("Error Message = " + postcodeex.Message); } //spinsertwithblobwithinjson this is the stored procedure for multiple image inserts using json //jsonlongblob class defines the List structure getting ready to serialise the json structure. foreach (var testpostname in postcodelist) { Response.Write("<br/>Post Code = " + testpostname.postcode + " Post Code Name Of Place = " + testpostname.nameofplace); } String[] arrayurlimage = new String[4]; arrayurlimage[0] = "https://pbprodimages.azureedge.net/images/medium/2a00f1ab-a7cb-4315-b247-c3d40636f041.jpg"; arrayurlimage[1] = "https://media.rightmove.co.uk/dir/crop/10:9-16:9/78k/77900/73713541/77900_MAR190232_IMG_06_0000_max_476x317.jpg"; arrayurlimage[2] = "https://lc.zoocdn.com/32d3e36d37e1b758b4fa096e9078fd3bd1742ade.jpg"; arrayurlimage[3] = "https://lc.zoocdn.com/c2a8a5af5cec2db187ae1a37d4f8d3965e9d5b87.jpg"; string fileName = ""; WebClient client; WebClient jsonclient; Stream streamdata; Bitmap bitmap; int fileindex = 0; string connStr = ConfigurationManager.ConnectionStrings["estateportalConnectionString"].ConnectionString; MySqlConnection conn = new MySqlConnection(connStr); MySqlConnection myConn = new MySqlConnection(connStr); myConn.ConnectionString = connStr; //xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx int myrecordseffected = 0; MySqlCommand cmd = new MySqlCommand(); string rtn = "spinsertwithblobwithinjson"; //Connecting to MySQL. cmd = new MySqlCommand(rtn, myConn); cmd.CommandType = System.Data.CommandType.StoredProcedure; fileindex = 23456; Guid myguid; Guid myguidindex; List <jsonlongblobarray> jsonblobarray = new List <jsonlongblobarray>(); //Get the first post code from the list, and cycle round getting the images. //https://www.zoopla.co.uk/for-sale/property/al/?q=ab&search_source=home&radius=0&pn=2 //https://www.zoopla.co.uk/for-sale/property/al/?q=ab&results_sort=newest_listings&search_source=home //https://www.zoopla.co.uk/for-sale/property/sw19/?q=ab&results_sort=newest_listings&search_source=home this is Wimbledon London //Start splitting up the url into the request item, lets start with thewse items // Base URL - https://www.zoopla.co.uk/for-sale/property // Post Code Area /ab/ - e.g. Aberdeen. // Sort Order - results_sort=lowest_price // Page Number; //start searching for pagination syntax - This appears to href with a page number attached using pn=2 //for-sale/property/sw19/?identifier=sw19&q=ab&search_source=home&radius=0&pn=2 //let use some string extraction techniques //zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz /* string inputString = "one two three four five"; * string resultString = string.Join(" ", inputString * .Split(' ') * .Select(x => new String(x.Reverse().ToArray()))); * * Response.Write(resultString); */ //Lets start with the first post code and string[] urlparts; string correctpostcodeurl; string htmlpage; //byte[] htmlpage; WebClient currentwebcontent; int ihits = 0; int ihitsendpos = 0; int interator = 0; //https://www.zoopla.co.uk/for-sale/property/sn/?page_size=1&q=sn&search_source=home&radius=0 string wholeurl = "https://www.zoopla.co.uk/for-sale/property/sw19/?page_size=1&q=ab&results_sort=newest_listings&search_source=home&radius=0&pn=0"; string retString; //Create an i stance of a class list for the image download paths List <imagedownloadpath> downloadpath = new List <imagedownloadpath>(); imagedownloadpath postcodeinstancepathdownload = new imagedownloadpath(); string mynewpagenumber = ""; int currenturlpagenumber = -1; int splitpos; string retpagenostring; int wholeurllength; // string wholeurl; currenturlpagenumber = 1; CustomWebClient mywebClient; foreach (var currentpostcode in postcodelist) { // if (currentpostcode.postcode.Trim() == "AL") // { // break; // } for (int iq = 0; iq <= 0; iq++) { //amend the page number of the url, pn to increase by 1. //length of whole url string //look for 'pn=' wholeurllength = wholeurl.Length; splitpos = wholeurl.IndexOf("pn=", 0); retpagenostring = wholeurl.Substring(splitpos + 3, (wholeurllength - splitpos - 3)); currenturlpagenumber = int.Parse(retpagenostring); mynewpagenumber = (iq + 1).ToString(); //Get to the end of 'pn=' retpagenostring = wholeurl.Substring(0, splitpos + 3); //Now concatinate in the new page number retpagenostring = retpagenostring + mynewpagenumber; urlparts = retpagenostring.Split('/'); urlparts[5] = currentpostcode.postcode.ToLower().Trim(); correctpostcodeurl = String.Join("/", urlparts); currentwebcontent = new WebClient(); // currentwebcontent.Headers // client.DownloadFile("http://yoursite.com/page.html", @"C:\localfile.html"); // Or you can get the file content without saving it //xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx //htmlpage = currentwebcontent.DownloadString(correctpostcodeurl); //At this point have a class which overides the HttpRequestHeader - a new instance of CustomWebClient the new class rather than //WebClient. Otherwise end up with a newtork limitation error. mywebClient = new CustomWebClient(); mywebClient.Headers[HttpRequestHeader.Authorization] = "Basic "; //+ base64String; htmlpage = mywebClient.DownloadString(correctpostcodeurl); Response.Write("<br/>url used = " + htmlpage + "<br/>"); string texttosearch = "src=" + '"' + "https://lid.zoocdn.com"; string endtext = ".jpg"; ihits = 0; interator = 0; int detailspos = 0; string returndetails = ""; while ((ihits = htmlpage.IndexOf(texttosearch, ihits)) != -1) { // Print out the substring. interator++; ihitsendpos = htmlpage.IndexOf(endtext, ihits); retString = htmlpage.Substring(ihits + 5, ihitsendpos - (ihits + 1)); // Increment the index. ihits++; downloadpath.Add(new imagedownloadpath { downloadpath = retString, detaildescriptionpath = returndetails }); Response.Write("<br/>Hit Number = " + interator + " Hit Positions = " + ihits + " Text Returned = " + retString); } //Start Looking For Description, Price etc //In this case the property details are on another page pointed to by an Price anchor. //This looks like, href = "/for-sale/details/53228060?search_identifier=2bed39587b1b7ffa738240d054f6a46a" also £ //<a href="/for-sale/details/ ihits = 0; //string detaildescriptionsearch = "<a class=" + '"' + "listing-results-price text-price" + '"' + " href=" + '"' + "//for-sale//details//"; string detaildescriptionsearch = " £"; string endstringtext = ((char)10).ToString(); int myiter = 0; while ((detailspos = htmlpage.IndexOf(detaildescriptionsearch, detailspos)) != -1) { detailspos = htmlpage.IndexOf(detaildescriptionsearch, detailspos); ihitsendpos = htmlpage.IndexOf(endstringtext, detailspos); returndetails = htmlpage.Substring(detailspos + detaildescriptionsearch.Length, ihitsendpos - (detailspos + detaildescriptionsearch.Length)); detailspos++; myiter++; //, price, numberofbedrooms // downloadpath.Add(new imagedownloadpath { downloadpath = returndetails, detaildescriptionpath = returndetails }); Response.Write("<br/>Hit Number = " + myiter + " Hit Positions = " + detailspos + " Text Returned = " + returndetails); } //mmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmm //int first = htmlpage.IndexOf(texttosearch, ihits); //int last = htmlpage.LastIndexOf(texttosearch);;p //string str2 = htmlpage.Substring(first, last - first); // currentwebcontent.Dispose(); // break; Response.Write("<br/>Current Page Number = " + iq); } //return; } //zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz // return; foreach (var pathlistitem in downloadpath) { fileindex += 1; client = new WebClient(); if (CheckBox1.Checked) { fileName = "C:\\Compress\\" + "downloaded " + fileindex.ToString() + ".jpg"; streamdata = client.OpenRead(pathlistitem.downloadpath); bitmap = new Bitmap(streamdata); bitmap.Save(fileName); bitmap.Dispose(); } client.Dispose(); } jsonlongblob creatjson = new jsonlongblob(); jsonlongblobarray creatjsonarrayimage = new jsonlongblobarray(); jsonclient = new WebClient(); byte[] imagebytes; // int wholeurllength = 0; //xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx // urlparts[5] = currentpostcode.postcode.ToLower().Trim(); // correctpostcodeurl = String.Join("/", urlparts); foreach (var pathlistitem in downloadpath) { imagebytes = jsonclient.DownloadData(pathlistitem.downloadpath); int filesizeKbytes = imagebytes.Length; myguid = Guid.NewGuid(); myguidindex = Guid.NewGuid(); //Add each image with index, guid etc to the list class, To start with we will have 4 image items - later it will be a page load for a //particular post code. Then we will call the sp again for the next page until all that post code is completed. Then we go onto next post code. // jsonblob.Add(new jsonlongblob { myindex = i.ToString(), imagelongblob = imagebytes.ToString(), myguid = myguid.ToString() }); jsonblobarray.Add(new jsonlongblobarray { Myindex = myguidindex, Imagelongblob = imagebytes, Myguid = myguid, Mypostalcodeplace = pathlistitem.downloadpath }); } // Response.Write("<br/> jsonblob Count = " + jsonblob.Count + "<br />"); /* * foreach (var mycreatjson in jsonblobarray) * { * Response.Write("<br />" + mycreatjson.Myindex + "<br />"); * * for (int i = 0; i < mycreatjson.Imagelongblob.Length; i++) * { * Response.Write(mycreatjson.Imagelongblob[i]); * } */ /* Response.Write(mycreatjson.imagelongblob + "<br />"); * Response.Write(mycreatjson.Myguid + "<br /><br />"); * } * */ //Having now got pictures, index, guid etc we now need to make this list of itel into a json structure by serialising the list as below. JavaScriptSerializer javaScriptSerializer = new JavaScriptSerializer(); JavaScriptSerializer javaScriptSerializerarrayimage = new JavaScriptSerializer(); // string jsonString; string jsonStringarray; // javaScriptSerializer = new JavaScriptSerializer(); // jsonString = javaScriptSerializer.Serialize(jsonblob); javaScriptSerializerarrayimage = new JavaScriptSerializer(); javaScriptSerializerarrayimage.MaxJsonLength = Int32.MaxValue; jsonStringarray = javaScriptSerializerarrayimage.Serialize(jsonblobarray); Guid indexguid; indexguid = Guid.NewGuid(); cmd.Parameters.Add("@myjson", MySqlDbType.LongBlob); cmd.Parameters.Add("@myguid", MySqlDbType.VarChar, 36); cmd.Parameters.Add("@imageindex", MySqlDbType.VarChar, 36); cmd.Parameters.Add("@postalcodeplace", MySqlDbType.VarChar, 255); foreach (jsonlongblobarray mycreatjsonarrayimage in jsonblobarray) { myConn.Open(); myguid = Guid.NewGuid(); cmd.Parameters["@myguid"].Value = myguid; cmd.Parameters["@myjson"].Value = mycreatjsonarrayimage.Imagelongblob; cmd.Parameters["@postalcodeplace"].Value = mycreatjsonarrayimage.Mypostalcodeplace; indexguid = Guid.NewGuid(); cmd.Parameters["@imageindex"].Value = indexguid; MySqlDataReader rdr = cmd.ExecuteReader(); myrecordseffected = rdr.RecordsAffected; myConn.Close(); } //To check the serialisation we will deserialize the newly created json structure back to a new different list in our class jsonlongblob //So lets create this new emty list to check and see the json as a list structure again by deserilaizeing as below. /* keep this,however there isa problem with the size of the data. * List<jsonlongblob> myImages = new List<jsonlongblob>(); * JavaScriptSerializer myjavaScriptSerializer = new JavaScriptSerializer(); * myImages = (List<jsonlongblob>)myjavaScriptSerializer.Deserialize(jsonString, typeof(List<jsonlongblob>)); * * List<jsonlongblobarray> MyImagesarray = new List<jsonlongblobarray>(); * JavaScriptSerializer myjavaScriptSerializerarray = new JavaScriptSerializer(); * MyImagesarray = (List<jsonlongblobarray>)myjavaScriptSerializerarray.Deserialize(jsonStringarray, typeof(List<jsonlongblobarray>)); * * * //Its worth noting that although we can create an array of json structures MYSQL currently does not accept arrays. And in any case * //I am not sure if there is the capacity of passing that many images across in one go to MYSQL and then the Stored Procedure will have to * //then do the database INSERT, if all that effort would be worth while. * * //Now we have a set of list structures. We can print each item of the structure out. Either using a foreach or array structure * foreach (var mydeserializeImages in MyImagesarray) * { * Response.Write("<br/>Primary Key Index = " + mydeserializeImages.Myindex); * Response.Write("<br />File BLOB = " + mydeserializeImages.Imagelongblob); * Response.Write("<br />Image Guid = " + mydeserializeImages.Myguid + "<br/>"); * } * * // < asp:Image ID = "Image1" runat = "server" ImageUrl = '<%#"data:Image/png;base64," + Convert.ToBase64String((byte[])Eval("imagelongblob"))%>' /> */ cmd.Dispose(); // myConn.Close(); myConn.Dispose(); jsonclient.Dispose(); // GridView1.DataSource = MyImagesarray; // GridView1.DataBind(); }