/// <summary> /// Used to search multiple pages and combine them with details /// </summary> /// <param name="url"></param> /// <returns></returns> private GoogleSearchResults RecursiveResultDetailed(string url) { var ret = new GoogleSearchResults { Results = new List <GoogleSearchResult>(), Promotions = new List <GoogleSearchResult>(), Labels = new Dictionary <string, string>(), TotalResults = 0, SearchTime = 0 }; GoogleJsonFormat jResult = null; try { using (var webClient = new WebClient()) { webClient.Encoding = Encoding.UTF8; var json = webClient.DownloadString(url); jResult = JsonConvert.DeserializeObject <GoogleJsonFormat>(json); } } catch (Exception) { //This happens if you reach your daily or user limit } if (jResult != null) { //handle the first page if (jResult.items != null) { ret.Results = jResult.items.Select(r => new GoogleSearchResult() { Mime = r.mime, Url = r.link, Title = r.title, Description = r.snippet }).ToList(); } if (jResult.queries.nextPage != null && jResult.queries.nextPage.Any()) { //there is a next page string oldstart = null; if (Options.ContainsKey("start")) { oldstart = Options["start"]; } Options["start"] = jResult.queries.nextPage[0].startIndex.ToString(); try { //let the simpler function handle the next page. var nextUrl = QueryUrl(); var theRestOfThePages = RecursiveResults(nextUrl, 2); ret.Results.AddRange(theRestOfThePages); } catch (ArgumentOutOfRangeException) { //it won't search past 100 results } if (oldstart != null) { Options["start"] = oldstart; } else { Options.Remove("start"); } } if (jResult.promotions != null) { ret.Promotions = jResult.promotions.Select(r => new GoogleSearchResult() { Mime = "text/html", Url = r.link, Title = r.title, Description = r.bodyLines[0].title }).ToList(); } if (jResult.context.facets != null) { foreach (var labelGroup in jResult.context.facets) { foreach (var label in labelGroup) { ret.Labels.Add(label.label, label.anchor); } } } ret.TotalResults = long.Parse(jResult.searchInformation.totalResults); ret.SearchTime = jResult.searchInformation.searchTime; } return(ret); }
/// <summary> /// Used to search multiple pages and combine them with details /// </summary> /// <param name="url"></param> /// <returns></returns> private GoogleSearchResults RecursiveResultDetailed(string url) { var ret = new GoogleSearchResults { Results = new List <GoogleSearchResult>(), Promotions = new List <GoogleSearchResult>(), Labels = new Dictionary <string, string>(), TotalResults = 0, SearchTime = 0 }; if (Method == GoogleSearchMethod.CSE) { var xResults = XDocument.Load(url); var data = xResults.Root; if (data == null) { return(ret); } var nsOpenSearch = data.GetNamespaceOfPrefix("opensearch"); var nsCse = data.GetNamespaceOfPrefix("cse"); var nsAtom = data.GetDefaultNamespace(); //get the search information var searchInformation = data.Element(nsCse + "searchInformation"); if (searchInformation != null) { var culture = new CultureInfo("en-us"); var totalResults = searchInformation.Element(nsCse + "totalResults"); if (totalResults != null) { ret.TotalResults = long.Parse(totalResults.Value, culture); } var searchTime = searchInformation.Element(nsCse + "searchTime"); if (searchTime != null) { ret.SearchTime = double.Parse(searchTime.Value, culture); } } //Get labels foreach (var label in data.Descendants(nsCse + "facet")) { var item = label.Element(nsCse + "item"); if (item != null) { var key = item.Attribute("label")?.Value; var value = item.Attribute("anchor")?.Value; ret.Labels[key] = value; } } //Get Promotions foreach (var promotion in data.Descendants(nsCse + "promotion")) { var tmpResult = new GoogleSearchResult(); var title = promotion.Element(nsAtom + "title"); if (title != null) { tmpResult.Title = title.Value; } var link = promotion.Element(nsAtom + "link"); if (link?.Attribute("href") != null) { tmpResult.Url = link.Attribute("href")?.Value; } var description = promotion.Element(nsCse + "bodyLine"); if (description != null) { tmpResult.Description = description.Attribute("title")?.Value; } ret.Promotions.Add(tmpResult); } //get search Results //foreach (var searchResult in data.Descendants(nsAtom+"entry")) //{ // ret.Results.Add(ParseCseResult(searchResult)); //} ret.Results.AddRange(data.Descendants(nsAtom + "entry").Select(ParseCseResult)); if (1 < _maxPages) { var nextPage = data.Elements(nsOpenSearch + "Query").FirstOrDefault(e => e.Attribute("role").Value == "cse:nextPage"); if (nextPage != null) { string oldstart = null; if (Options.ContainsKey("start")) { oldstart = Options["start"]; } Options["start"] = nextPage.Attribute("startIndex").Value; try { var nextUrl = QueryUrl(); var theRestOfThePages = RecursiveResults(nextUrl, 2); ret.Results.AddRange(theRestOfThePages); } // ReSharper disable once UnusedVariable catch (ArgumentOutOfRangeException) { //it won't search past 100 results } if (oldstart != null) { Options["start"] = oldstart; } else { Options.Remove("start"); } } } } if (Method == GoogleSearchMethod.XML) { throw new ArgumentException("The XML API is no longer available as it was only for Google Site Search"); } return(ret); }
/// <summary> /// Used to search multiple pages and combine them with details /// </summary> /// <param name="url"></param> /// <returns></returns> private GoogleSearchResults RecursiveResultDetailed(string url) { var ret = new GoogleSearchResults { Results = new List<GoogleSearchResult>(), Promotions = new List<GoogleSearchResult>(), Labels = new Dictionary<string, string>(), TotalResults = 0, SearchTime = 0 }; if (Method == GoogleSearchMethod.CSE) { var xResults = XDocument.Load(url); var data = xResults.Root; if (data == null) { return ret; } var nsOpenSearch = data.GetNamespaceOfPrefix("opensearch"); var nsCse = data.GetNamespaceOfPrefix("cse"); var nsAtom = data.GetDefaultNamespace(); //get the search information var searchInformation = data.Element(nsCse + "searchInformation"); if (searchInformation != null) { var totalResults = searchInformation.Element(nsCse + "totalResults"); if (totalResults != null) ret.TotalResults = long.Parse(totalResults.Value); var searchTime = searchInformation.Element(nsCse + "searchTime"); if (searchTime != null) ret.SearchTime = double.Parse(searchTime.Value); } //Get labels foreach (var label in data.Descendants(nsCse + "facet")) { var item = label.Element(nsCse + "item"); if (item != null) { var key = item.Attribute("label").Value; var value = item.Attribute("anchor").Value; ret.Labels[key] = value; } } //Get Promotions foreach (var promotion in data.Descendants(nsCse + "promotion")) { var tmpResult = new GoogleSearchResult(); var title = promotion.Element(nsAtom + "title"); if (title != null) tmpResult.Title = title.Value; var link = promotion.Element(nsAtom + "link"); if (link != null) { if (link.Attribute("href") != null) { tmpResult.Url = link.Attribute("href").Value; } } var description = promotion.Element(nsCse + "bodyLine"); if (description != null) { tmpResult.Description = description.Attribute("title").Value; } ret.Promotions.Add(tmpResult); } //get search Results //foreach (var searchResult in data.Descendants(nsAtom+"entry")) //{ // ret.Results.Add(ParseCseResult(searchResult)); //} ret.Results.AddRange(data.Descendants(nsAtom + "entry").Select(ParseCseResult)); if (1 < _maxPages) { var nextPage = data.Elements(nsOpenSearch + "Query").FirstOrDefault(e => e.Attribute("role").Value == "cse:nextPage"); if (nextPage != null) { string oldstart = null; if (Options.ContainsKey("start")) { oldstart = Options["start"]; } Options["start"] = nextPage.Attribute("startIndex").Value; var nextUrl = QueryUrl(); var theRestOfThePages = RecursiveResults(nextUrl, 2); ret.Results.AddRange(theRestOfThePages); if (oldstart != null) { Options["start"] = oldstart; } else { Options.Remove("start"); } } } } if (Method == GoogleSearchMethod.XML) { var xResults = XDocument.Load(url); //get labels foreach (var label in xResults.Descendants("FacetItem")) { var key = label.Element("label"); if (key != null) { var lblDesc = label.Element("anchor_text"); if (lblDesc != null) { ret.Labels[key.Value] = lblDesc.Value; } } } var data = xResults.Descendants("RES").FirstOrDefault(); if (data == null) return ret; var totalResults = data.Element("M"); if (totalResults != null) { ret.TotalResults = long.Parse(totalResults.Value); } //ret.Results.AddRange(data.Descendants("R").Select(ParseXmlResult)); foreach (var r in data.Descendants("R")) { var tmpResults = ParseXmlResult(r); var slR = r.Element("SL_RESULTS"); if (slR != null) { //promotion var bodyLine = slR.Descendants("BODY_LINE").FirstOrDefault(); if (bodyLine != null) { var t = bodyLine.Descendants("T").FirstOrDefault(); if (t != null) tmpResults.Description = t.Value; ret.Promotions.Add(tmpResults); } } else { ret.Results.Add(tmpResults); } } var nb = data.Element("NB"); if (nb == null) return ret; var nu = nb.Element("NU"); if (nu == null) return ret; if (1 < _maxPages) { ret.Results.AddRange(RecursiveResults("http://www.google.com" + nu.Value,2)); } } return ret; }
/// <summary> /// Used to search multiple pages and combine them with details /// </summary> /// <param name="url"></param> /// <returns></returns> private GoogleSearchResults RecursiveResultDetailed(string url) { var ret = new GoogleSearchResults { Results = new List <GoogleSearchResult>(), Promotions = new List <GoogleSearchResult>(), Labels = new Dictionary <string, string>(), TotalResults = 0, SearchTime = 0 }; if (Method == GoogleSearchMethod.CSE) { var xResults = XDocument.Load(url); var data = xResults.Root; if (data == null) { return(ret); } var nsOpenSearch = data.GetNamespaceOfPrefix("opensearch"); var nsCse = data.GetNamespaceOfPrefix("cse"); var nsAtom = data.GetDefaultNamespace(); //get the search information var searchInformation = data.Element(nsCse + "searchInformation"); if (searchInformation != null) { var totalResults = searchInformation.Element(nsCse + "totalResults"); if (totalResults != null) { ret.TotalResults = long.Parse(totalResults.Value); } var searchTime = searchInformation.Element(nsCse + "searchTime"); if (searchTime != null) { ret.SearchTime = double.Parse(searchTime.Value); } } //Get labels foreach (var label in data.Descendants(nsCse + "facet")) { var item = label.Element(nsCse + "item"); if (item != null) { var key = item.Attribute("label").Value; var value = item.Attribute("anchor").Value; ret.Labels[key] = value; } } //Get Promotions foreach (var promotion in data.Descendants(nsCse + "promotion")) { var tmpResult = new GoogleSearchResult(); var title = promotion.Element(nsAtom + "title"); if (title != null) { tmpResult.Title = title.Value; } var link = promotion.Element(nsAtom + "link"); if (link != null) { if (link.Attribute("href") != null) { tmpResult.Url = link.Attribute("href").Value; } } var description = promotion.Element(nsCse + "bodyLine"); if (description != null) { tmpResult.Description = description.Attribute("title").Value; } ret.Promotions.Add(tmpResult); } //get search Results //foreach (var searchResult in data.Descendants(nsAtom+"entry")) //{ // ret.Results.Add(ParseCseResult(searchResult)); //} ret.Results.AddRange(data.Descendants(nsAtom + "entry").Select(ParseCseResult)); if (1 < _maxPages) { var nextPage = data.Elements(nsOpenSearch + "Query").FirstOrDefault(e => e.Attribute("role").Value == "cse:nextPage"); if (nextPage != null) { string oldstart = null; if (Options.ContainsKey("start")) { oldstart = Options["start"]; } Options["start"] = nextPage.Attribute("startIndex").Value; var nextUrl = QueryUrl(); var theRestOfThePages = RecursiveResults(nextUrl, 2); ret.Results.AddRange(theRestOfThePages); if (oldstart != null) { Options["start"] = oldstart; } else { Options.Remove("start"); } } } } if (Method == GoogleSearchMethod.XML) { var xResults = XDocument.Load(url); //get labels foreach (var label in xResults.Descendants("FacetItem")) { var key = label.Element("label"); if (key != null) { var lblDesc = label.Element("anchor_text"); if (lblDesc != null) { ret.Labels[key.Value] = lblDesc.Value; } } } var data = xResults.Descendants("RES").FirstOrDefault(); if (data == null) { return(ret); } var totalResults = data.Element("M"); if (totalResults != null) { ret.TotalResults = long.Parse(totalResults.Value); } //ret.Results.AddRange(data.Descendants("R").Select(ParseXmlResult)); foreach (var r in data.Descendants("R")) { var tmpResults = ParseXmlResult(r); var slR = r.Element("SL_RESULTS"); if (slR != null) { //promotion var bodyLine = slR.Descendants("BODY_LINE").FirstOrDefault(); if (bodyLine != null) { var t = bodyLine.Descendants("T").FirstOrDefault(); if (t != null) { tmpResults.Description = t.Value; } ret.Promotions.Add(tmpResults); } } else { ret.Results.Add(tmpResults); } } var nb = data.Element("NB"); if (nb == null) { return(ret); } var nu = nb.Element("NU"); if (nu == null) { return(ret); } if (1 < _maxPages) { ret.Results.AddRange(RecursiveResults("http://www.google.com" + nu.Value, 2)); } } return(ret); }