private GoogleSearchResult ParseXmlResult(XElement r) { var tmpR = new GoogleSearchResult(); var xAttribute = r.Attribute("MIME"); if (xAttribute != null) { tmpR.Mime = xAttribute.Value; } var xElement = r.Element("U"); if (xElement != null) { tmpR.Url = xElement.Value; } var element = r.Element("T"); if (element != null) { tmpR.Title = element.Value; } var xElement1 = r.Element("S");// won't work for promoted results but they are just shown inline with normal results if (xElement1 != null) { tmpR.Description = xElement1.Value; } return(tmpR); }
private GoogleSearchResult ParseCseResult(XElement searchResult) { var nsCse = searchResult.GetNamespaceOfPrefix("cse"); var nsAtom = searchResult.GetDefaultNamespace(); var tmpResult = new GoogleSearchResult(); var title = searchResult.Element(nsAtom + "title"); if (title != null) { tmpResult.Title = title.Value; } var link = searchResult.Element(nsAtom + "link"); if (link != null) { if (link.Attribute("href") != null) { tmpResult.Url = link.Attribute("href").Value; } } var description = searchResult.Element(nsAtom + "summary"); if (description != null) { tmpResult.Description = description.Value; } var mime = searchResult.Element(nsCse + "mime"); if (mime != null) { tmpResult.Mime = mime.Value; } return(tmpResult); }
/// <summary> /// Used to search multiple pages and combine them with details /// </summary> /// <param name="url"></param> /// <returns></returns> private GoogleSearchResults RecursiveResultDetailed(string url) { var ret = new GoogleSearchResults { Results = new List<GoogleSearchResult>(), Promotions = new List<GoogleSearchResult>(), Labels = new Dictionary<string, string>(), TotalResults = 0, SearchTime = 0 }; if (Method == GoogleSearchMethod.CSE) { var xResults = XDocument.Load(url); var data = xResults.Root; if (data == null) { return ret; } var nsOpenSearch = data.GetNamespaceOfPrefix("opensearch"); var nsCse = data.GetNamespaceOfPrefix("cse"); var nsAtom = data.GetDefaultNamespace(); //get the search information var searchInformation = data.Element(nsCse + "searchInformation"); if (searchInformation != null) { var totalResults = searchInformation.Element(nsCse + "totalResults"); if (totalResults != null) ret.TotalResults = long.Parse(totalResults.Value); var searchTime = searchInformation.Element(nsCse + "searchTime"); if (searchTime != null) ret.SearchTime = double.Parse(searchTime.Value); } //Get labels foreach (var label in data.Descendants(nsCse + "facet")) { var item = label.Element(nsCse + "item"); if (item != null) { var key = item.Attribute("label").Value; var value = item.Attribute("anchor").Value; ret.Labels[key] = value; } } //Get Promotions foreach (var promotion in data.Descendants(nsCse + "promotion")) { var tmpResult = new GoogleSearchResult(); var title = promotion.Element(nsAtom + "title"); if (title != null) tmpResult.Title = title.Value; var link = promotion.Element(nsAtom + "link"); if (link != null) { if (link.Attribute("href") != null) { tmpResult.Url = link.Attribute("href").Value; } } var description = promotion.Element(nsCse + "bodyLine"); if (description != null) { tmpResult.Description = description.Attribute("title").Value; } ret.Promotions.Add(tmpResult); } //get search Results //foreach (var searchResult in data.Descendants(nsAtom+"entry")) //{ // ret.Results.Add(ParseCseResult(searchResult)); //} ret.Results.AddRange(data.Descendants(nsAtom + "entry").Select(ParseCseResult)); if (1 < _maxPages) { var nextPage = data.Elements(nsOpenSearch + "Query").FirstOrDefault(e => e.Attribute("role").Value == "cse:nextPage"); if (nextPage != null) { string oldstart = null; if (Options.ContainsKey("start")) { oldstart = Options["start"]; } Options["start"] = nextPage.Attribute("startIndex").Value; var nextUrl = QueryUrl(); var theRestOfThePages = RecursiveResults(nextUrl, 2); ret.Results.AddRange(theRestOfThePages); if (oldstart != null) { Options["start"] = oldstart; } else { Options.Remove("start"); } } } } if (Method == GoogleSearchMethod.XML) { var xResults = XDocument.Load(url); //get labels foreach (var label in xResults.Descendants("FacetItem")) { var key = label.Element("label"); if (key != null) { var lblDesc = label.Element("anchor_text"); if (lblDesc != null) { ret.Labels[key.Value] = lblDesc.Value; } } } var data = xResults.Descendants("RES").FirstOrDefault(); if (data == null) return ret; var totalResults = data.Element("M"); if (totalResults != null) { ret.TotalResults = long.Parse(totalResults.Value); } //ret.Results.AddRange(data.Descendants("R").Select(ParseXmlResult)); foreach (var r in data.Descendants("R")) { var tmpResults = ParseXmlResult(r); var slR = r.Element("SL_RESULTS"); if (slR != null) { //promotion var bodyLine = slR.Descendants("BODY_LINE").FirstOrDefault(); if (bodyLine != null) { var t = bodyLine.Descendants("T").FirstOrDefault(); if (t != null) tmpResults.Description = t.Value; ret.Promotions.Add(tmpResults); } } else { ret.Results.Add(tmpResults); } } var nb = data.Element("NB"); if (nb == null) return ret; var nu = nb.Element("NU"); if (nu == null) return ret; if (1 < _maxPages) { ret.Results.AddRange(RecursiveResults("http://www.google.com" + nu.Value,2)); } } return ret; }
private GoogleSearchResult ParseXmlResult(XElement r) { var tmpR = new GoogleSearchResult(); var xAttribute = r.Attribute("MIME"); if (xAttribute != null) tmpR.Mime = xAttribute.Value; var xElement = r.Element("U"); if (xElement != null) tmpR.Url = xElement.Value; var element = r.Element("T"); if (element != null) tmpR.Title = element.Value; var xElement1 = r.Element("S");// won't work for promoted results but they are just shown inline with normal results if (xElement1 != null) tmpR.Description = xElement1.Value; return tmpR; }
private GoogleSearchResult ParseCseResult(XElement searchResult) { var nsCse = searchResult.GetNamespaceOfPrefix("cse"); var nsAtom = searchResult.GetDefaultNamespace(); var tmpResult = new GoogleSearchResult(); var title = searchResult.Element(nsAtom + "title"); if (title != null) tmpResult.Title = title.Value; var link = searchResult.Element(nsAtom + "link"); if (link != null) { if (link.Attribute("href") != null) { tmpResult.Url = link.Attribute("href").Value; } } var description = searchResult.Element(nsAtom + "summary"); if (description != null) { tmpResult.Description = description.Value; } var mime = searchResult.Element(nsCse + "mime"); if (mime != null) { tmpResult.Mime = mime.Value; } return tmpResult; }
/// <summary> /// Used to search multiple pages and combine them with details /// </summary> /// <param name="url"></param> /// <returns></returns> private GoogleSearchResults RecursiveResultDetailed(string url) { var ret = new GoogleSearchResults { Results = new List <GoogleSearchResult>(), Promotions = new List <GoogleSearchResult>(), Labels = new Dictionary <string, string>(), TotalResults = 0, SearchTime = 0 }; if (Method == GoogleSearchMethod.CSE) { var xResults = XDocument.Load(url); var data = xResults.Root; if (data == null) { return(ret); } var nsOpenSearch = data.GetNamespaceOfPrefix("opensearch"); var nsCse = data.GetNamespaceOfPrefix("cse"); var nsAtom = data.GetDefaultNamespace(); //get the search information var searchInformation = data.Element(nsCse + "searchInformation"); if (searchInformation != null) { var culture = new CultureInfo("en-us"); var totalResults = searchInformation.Element(nsCse + "totalResults"); if (totalResults != null) { ret.TotalResults = long.Parse(totalResults.Value, culture); } var searchTime = searchInformation.Element(nsCse + "searchTime"); if (searchTime != null) { ret.SearchTime = double.Parse(searchTime.Value, culture); } } //Get labels foreach (var label in data.Descendants(nsCse + "facet")) { var item = label.Element(nsCse + "item"); if (item != null) { var key = item.Attribute("label")?.Value; var value = item.Attribute("anchor")?.Value; ret.Labels[key] = value; } } //Get Promotions foreach (var promotion in data.Descendants(nsCse + "promotion")) { var tmpResult = new GoogleSearchResult(); var title = promotion.Element(nsAtom + "title"); if (title != null) { tmpResult.Title = title.Value; } var link = promotion.Element(nsAtom + "link"); if (link?.Attribute("href") != null) { tmpResult.Url = link.Attribute("href")?.Value; } var description = promotion.Element(nsCse + "bodyLine"); if (description != null) { tmpResult.Description = description.Attribute("title")?.Value; } ret.Promotions.Add(tmpResult); } //get search Results //foreach (var searchResult in data.Descendants(nsAtom+"entry")) //{ // ret.Results.Add(ParseCseResult(searchResult)); //} ret.Results.AddRange(data.Descendants(nsAtom + "entry").Select(ParseCseResult)); if (1 < _maxPages) { var nextPage = data.Elements(nsOpenSearch + "Query").FirstOrDefault(e => e.Attribute("role").Value == "cse:nextPage"); if (nextPage != null) { string oldstart = null; if (Options.ContainsKey("start")) { oldstart = Options["start"]; } Options["start"] = nextPage.Attribute("startIndex").Value; try { var nextUrl = QueryUrl(); var theRestOfThePages = RecursiveResults(nextUrl, 2); ret.Results.AddRange(theRestOfThePages); } // ReSharper disable once UnusedVariable catch (ArgumentOutOfRangeException) { //it won't search past 100 results } if (oldstart != null) { Options["start"] = oldstart; } else { Options.Remove("start"); } } } } if (Method == GoogleSearchMethod.XML) { throw new ArgumentException("The XML API is no longer available as it was only for Google Site Search"); } return(ret); }
/// <summary> /// Used to search multiple pages and combine them with details /// </summary> /// <param name="url"></param> /// <returns></returns> private GoogleSearchResults RecursiveResultDetailed(string url) { var ret = new GoogleSearchResults { Results = new List <GoogleSearchResult>(), Promotions = new List <GoogleSearchResult>(), Labels = new Dictionary <string, string>(), TotalResults = 0, SearchTime = 0 }; if (Method == GoogleSearchMethod.CSE) { var xResults = XDocument.Load(url); var data = xResults.Root; if (data == null) { return(ret); } var nsOpenSearch = data.GetNamespaceOfPrefix("opensearch"); var nsCse = data.GetNamespaceOfPrefix("cse"); var nsAtom = data.GetDefaultNamespace(); //get the search information var searchInformation = data.Element(nsCse + "searchInformation"); if (searchInformation != null) { var totalResults = searchInformation.Element(nsCse + "totalResults"); if (totalResults != null) { ret.TotalResults = long.Parse(totalResults.Value); } var searchTime = searchInformation.Element(nsCse + "searchTime"); if (searchTime != null) { ret.SearchTime = double.Parse(searchTime.Value); } } //Get labels foreach (var label in data.Descendants(nsCse + "facet")) { var item = label.Element(nsCse + "item"); if (item != null) { var key = item.Attribute("label").Value; var value = item.Attribute("anchor").Value; ret.Labels[key] = value; } } //Get Promotions foreach (var promotion in data.Descendants(nsCse + "promotion")) { var tmpResult = new GoogleSearchResult(); var title = promotion.Element(nsAtom + "title"); if (title != null) { tmpResult.Title = title.Value; } var link = promotion.Element(nsAtom + "link"); if (link != null) { if (link.Attribute("href") != null) { tmpResult.Url = link.Attribute("href").Value; } } var description = promotion.Element(nsCse + "bodyLine"); if (description != null) { tmpResult.Description = description.Attribute("title").Value; } ret.Promotions.Add(tmpResult); } //get search Results //foreach (var searchResult in data.Descendants(nsAtom+"entry")) //{ // ret.Results.Add(ParseCseResult(searchResult)); //} ret.Results.AddRange(data.Descendants(nsAtom + "entry").Select(ParseCseResult)); if (1 < _maxPages) { var nextPage = data.Elements(nsOpenSearch + "Query").FirstOrDefault(e => e.Attribute("role").Value == "cse:nextPage"); if (nextPage != null) { string oldstart = null; if (Options.ContainsKey("start")) { oldstart = Options["start"]; } Options["start"] = nextPage.Attribute("startIndex").Value; var nextUrl = QueryUrl(); var theRestOfThePages = RecursiveResults(nextUrl, 2); ret.Results.AddRange(theRestOfThePages); if (oldstart != null) { Options["start"] = oldstart; } else { Options.Remove("start"); } } } } if (Method == GoogleSearchMethod.XML) { var xResults = XDocument.Load(url); //get labels foreach (var label in xResults.Descendants("FacetItem")) { var key = label.Element("label"); if (key != null) { var lblDesc = label.Element("anchor_text"); if (lblDesc != null) { ret.Labels[key.Value] = lblDesc.Value; } } } var data = xResults.Descendants("RES").FirstOrDefault(); if (data == null) { return(ret); } var totalResults = data.Element("M"); if (totalResults != null) { ret.TotalResults = long.Parse(totalResults.Value); } //ret.Results.AddRange(data.Descendants("R").Select(ParseXmlResult)); foreach (var r in data.Descendants("R")) { var tmpResults = ParseXmlResult(r); var slR = r.Element("SL_RESULTS"); if (slR != null) { //promotion var bodyLine = slR.Descendants("BODY_LINE").FirstOrDefault(); if (bodyLine != null) { var t = bodyLine.Descendants("T").FirstOrDefault(); if (t != null) { tmpResults.Description = t.Value; } ret.Promotions.Add(tmpResults); } } else { ret.Results.Add(tmpResults); } } var nb = data.Element("NB"); if (nb == null) { return(ret); } var nu = nb.Element("NU"); if (nu == null) { return(ret); } if (1 < _maxPages) { ret.Results.AddRange(RecursiveResults("http://www.google.com" + nu.Value, 2)); } } return(ret); }