private GoogleSearchResult ParseXmlResult(XElement r)
        {
            var tmpR       = new GoogleSearchResult();
            var xAttribute = r.Attribute("MIME");

            if (xAttribute != null)
            {
                tmpR.Mime = xAttribute.Value;
            }
            var xElement = r.Element("U");

            if (xElement != null)
            {
                tmpR.Url = xElement.Value;
            }
            var element = r.Element("T");

            if (element != null)
            {
                tmpR.Title = element.Value;
            }
            var xElement1 = r.Element("S");// won't work for promoted results but they are just shown inline with normal results

            if (xElement1 != null)
            {
                tmpR.Description = xElement1.Value;
            }
            return(tmpR);
        }
        private GoogleSearchResult ParseCseResult(XElement searchResult)
        {
            var nsCse     = searchResult.GetNamespaceOfPrefix("cse");
            var nsAtom    = searchResult.GetDefaultNamespace();
            var tmpResult = new GoogleSearchResult();
            var title     = searchResult.Element(nsAtom + "title");

            if (title != null)
            {
                tmpResult.Title = title.Value;
            }
            var link = searchResult.Element(nsAtom + "link");

            if (link != null)
            {
                if (link.Attribute("href") != null)
                {
                    tmpResult.Url = link.Attribute("href").Value;
                }
            }
            var description = searchResult.Element(nsAtom + "summary");

            if (description != null)
            {
                tmpResult.Description = description.Value;
            }
            var mime = searchResult.Element(nsCse + "mime");

            if (mime != null)
            {
                tmpResult.Mime = mime.Value;
            }
            return(tmpResult);
        }
        /// <summary>
        /// Used to search multiple pages and combine them with details
        /// </summary>
        /// <param name="url"></param>
        /// <returns></returns>
        private GoogleSearchResults RecursiveResultDetailed(string url)
        {
            var ret = new GoogleSearchResults
            {
                Results = new List<GoogleSearchResult>(),
                Promotions = new List<GoogleSearchResult>(),
                Labels = new Dictionary<string, string>(),
                TotalResults = 0,
                SearchTime = 0
            };
            if (Method == GoogleSearchMethod.CSE)
            {
                var xResults = XDocument.Load(url);
                var data = xResults.Root;
                if (data == null)
                {
                    return ret;
                }
                var nsOpenSearch = data.GetNamespaceOfPrefix("opensearch");
                var nsCse = data.GetNamespaceOfPrefix("cse");
                var nsAtom = data.GetDefaultNamespace();

                    //get the search information
                    var searchInformation = data.Element(nsCse + "searchInformation");
                    if (searchInformation != null)
                    {
                        var totalResults = searchInformation.Element(nsCse + "totalResults");
                        if (totalResults != null)
                            ret.TotalResults = long.Parse(totalResults.Value);
                        var searchTime = searchInformation.Element(nsCse + "searchTime");
                        if (searchTime != null)
                            ret.SearchTime = double.Parse(searchTime.Value);
                    }
                    //Get labels
                    foreach (var label in data.Descendants(nsCse + "facet"))
                    {
                        var item = label.Element(nsCse + "item");
                        if (item != null)
                        {
                            var key = item.Attribute("label").Value;
                            var value = item.Attribute("anchor").Value;
                            ret.Labels[key] = value;
                        }
                    }
                    //Get Promotions
                    foreach (var promotion in data.Descendants(nsCse + "promotion"))
                    {
                        var tmpResult = new GoogleSearchResult();
                        var title = promotion.Element(nsAtom + "title");
                        if (title != null) tmpResult.Title = title.Value;
                        var link = promotion.Element(nsAtom + "link");
                        if (link != null)
                        {
                            if (link.Attribute("href") != null)
                            {
                                tmpResult.Url = link.Attribute("href").Value;
                            }
                        }
                        var description = promotion.Element(nsCse + "bodyLine");
                        if (description != null)
                        {
                            tmpResult.Description = description.Attribute("title").Value;
                        }
                        ret.Promotions.Add(tmpResult);
                    }

                //get search Results
                //foreach (var searchResult in data.Descendants(nsAtom+"entry"))
                //{
                //    ret.Results.Add(ParseCseResult(searchResult));
                //}
                ret.Results.AddRange(data.Descendants(nsAtom + "entry").Select(ParseCseResult));
                if (1 < _maxPages)
                {
                    var nextPage = data.Elements(nsOpenSearch + "Query").FirstOrDefault(e => e.Attribute("role").Value == "cse:nextPage");
                    if (nextPage != null)
                    {
                        string oldstart = null;
                        if (Options.ContainsKey("start"))
                        {
                            oldstart = Options["start"];
                        }
                        Options["start"] = nextPage.Attribute("startIndex").Value;
                        var nextUrl = QueryUrl();
                        var theRestOfThePages = RecursiveResults(nextUrl, 2);
                        ret.Results.AddRange(theRestOfThePages);
                        if (oldstart != null)
                        {
                            Options["start"] = oldstart;
                        }
                        else
                        {
                            Options.Remove("start");
                        }
                    }
                }
            }
            if (Method == GoogleSearchMethod.XML)
            {
                var xResults = XDocument.Load(url);
                //get labels
                foreach (var label in xResults.Descendants("FacetItem"))
                {
                    var key = label.Element("label");
                    if (key != null)
                    {
                        var lblDesc = label.Element("anchor_text");
                        if (lblDesc != null)
                        {
                            ret.Labels[key.Value] = lblDesc.Value;
                        }
                    }
                }

                var data = xResults.Descendants("RES").FirstOrDefault();
                if (data == null) return ret;
                var totalResults = data.Element("M");
                if (totalResults != null)
                {
                    ret.TotalResults = long.Parse(totalResults.Value);
                }
                //ret.Results.AddRange(data.Descendants("R").Select(ParseXmlResult));
                foreach (var r in data.Descendants("R"))
                {
                    var tmpResults = ParseXmlResult(r);
                    var slR = r.Element("SL_RESULTS");
                    if (slR != null)
                    {
                        //promotion
                        var bodyLine = slR.Descendants("BODY_LINE").FirstOrDefault();
                        if (bodyLine != null)
                        {
                            var t = bodyLine.Descendants("T").FirstOrDefault();
                            if (t != null) tmpResults.Description = t.Value;
                            ret.Promotions.Add(tmpResults);
                        }

                    }
                    else
                    {
                        ret.Results.Add(tmpResults);
                    }
                }
                var nb = data.Element("NB");
                if (nb == null) return ret;
                var nu = nb.Element("NU");
                if (nu == null) return ret;
                if (1 < _maxPages)
                {
                    ret.Results.AddRange(RecursiveResults("http://www.google.com" + nu.Value,2));
                }
            }
            return ret;
        }
 private GoogleSearchResult ParseXmlResult(XElement r)
 {
     var tmpR = new GoogleSearchResult();
     var xAttribute = r.Attribute("MIME");
     if (xAttribute != null) tmpR.Mime = xAttribute.Value;
     var xElement = r.Element("U");
     if (xElement != null) tmpR.Url = xElement.Value;
     var element = r.Element("T");
     if (element != null) tmpR.Title = element.Value;
     var xElement1 = r.Element("S");// won't work for promoted results but they are just shown inline with normal results
     if (xElement1 != null) tmpR.Description = xElement1.Value;
     return tmpR;
 }
 private GoogleSearchResult ParseCseResult(XElement searchResult)
 {
     var nsCse = searchResult.GetNamespaceOfPrefix("cse");
     var nsAtom = searchResult.GetDefaultNamespace();
     var tmpResult = new GoogleSearchResult();
     var title = searchResult.Element(nsAtom + "title");
     if (title != null) tmpResult.Title = title.Value;
     var link = searchResult.Element(nsAtom + "link");
     if (link != null)
     {
         if (link.Attribute("href") != null)
         {
             tmpResult.Url = link.Attribute("href").Value;
         }
     }
     var description = searchResult.Element(nsAtom + "summary");
     if (description != null)
     {
         tmpResult.Description = description.Value;
     }
     var mime = searchResult.Element(nsCse + "mime");
     if (mime != null)
     {
         tmpResult.Mime = mime.Value;
     }
     return tmpResult;
 }
        /// <summary>
        /// Used to search multiple pages and combine them with details
        /// </summary>
        /// <param name="url"></param>
        /// <returns></returns>
        private GoogleSearchResults RecursiveResultDetailed(string url)
        {
            var ret = new GoogleSearchResults
            {
                Results      = new List <GoogleSearchResult>(),
                Promotions   = new List <GoogleSearchResult>(),
                Labels       = new Dictionary <string, string>(),
                TotalResults = 0,
                SearchTime   = 0
            };

            if (Method == GoogleSearchMethod.CSE)
            {
                var xResults = XDocument.Load(url);
                var data     = xResults.Root;
                if (data == null)
                {
                    return(ret);
                }
                var nsOpenSearch = data.GetNamespaceOfPrefix("opensearch");
                var nsCse        = data.GetNamespaceOfPrefix("cse");
                var nsAtom       = data.GetDefaultNamespace();

                //get the search information
                var searchInformation = data.Element(nsCse + "searchInformation");
                if (searchInformation != null)
                {
                    var culture      = new CultureInfo("en-us");
                    var totalResults = searchInformation.Element(nsCse + "totalResults");
                    if (totalResults != null)
                    {
                        ret.TotalResults = long.Parse(totalResults.Value, culture);
                    }
                    var searchTime = searchInformation.Element(nsCse + "searchTime");
                    if (searchTime != null)
                    {
                        ret.SearchTime = double.Parse(searchTime.Value, culture);
                    }
                }
                //Get labels
                foreach (var label in data.Descendants(nsCse + "facet"))
                {
                    var item = label.Element(nsCse + "item");
                    if (item != null)
                    {
                        var key   = item.Attribute("label")?.Value;
                        var value = item.Attribute("anchor")?.Value;
                        ret.Labels[key] = value;
                    }
                }
                //Get Promotions
                foreach (var promotion in data.Descendants(nsCse + "promotion"))
                {
                    var tmpResult = new GoogleSearchResult();
                    var title     = promotion.Element(nsAtom + "title");
                    if (title != null)
                    {
                        tmpResult.Title = title.Value;
                    }
                    var link = promotion.Element(nsAtom + "link");
                    if (link?.Attribute("href") != null)
                    {
                        tmpResult.Url = link.Attribute("href")?.Value;
                    }
                    var description = promotion.Element(nsCse + "bodyLine");
                    if (description != null)
                    {
                        tmpResult.Description = description.Attribute("title")?.Value;
                    }
                    ret.Promotions.Add(tmpResult);
                }

                //get search Results
                //foreach (var searchResult in data.Descendants(nsAtom+"entry"))
                //{
                //    ret.Results.Add(ParseCseResult(searchResult));
                //}
                ret.Results.AddRange(data.Descendants(nsAtom + "entry").Select(ParseCseResult));
                if (1 < _maxPages)
                {
                    var nextPage = data.Elements(nsOpenSearch + "Query").FirstOrDefault(e => e.Attribute("role").Value == "cse:nextPage");
                    if (nextPage != null)
                    {
                        string oldstart = null;
                        if (Options.ContainsKey("start"))
                        {
                            oldstart = Options["start"];
                        }
                        Options["start"] = nextPage.Attribute("startIndex").Value;
                        try
                        {
                            var nextUrl           = QueryUrl();
                            var theRestOfThePages = RecursiveResults(nextUrl, 2);
                            ret.Results.AddRange(theRestOfThePages);
                        }
                        // ReSharper disable once UnusedVariable
                        catch (ArgumentOutOfRangeException)
                        {
                            //it won't search past 100 results
                        }
                        if (oldstart != null)
                        {
                            Options["start"] = oldstart;
                        }
                        else
                        {
                            Options.Remove("start");
                        }
                    }
                }
            }
            if (Method == GoogleSearchMethod.XML)
            {
                throw new ArgumentException("The XML API is no longer available as it was only for Google Site Search");
            }
            return(ret);
        }
Exemple #7
0
        /// <summary>
        /// Used to search multiple pages and combine them with details
        /// </summary>
        /// <param name="url"></param>
        /// <returns></returns>
        private GoogleSearchResults RecursiveResultDetailed(string url)
        {
            var ret = new GoogleSearchResults
            {
                Results      = new List <GoogleSearchResult>(),
                Promotions   = new List <GoogleSearchResult>(),
                Labels       = new Dictionary <string, string>(),
                TotalResults = 0,
                SearchTime   = 0
            };

            if (Method == GoogleSearchMethod.CSE)
            {
                var xResults = XDocument.Load(url);
                var data     = xResults.Root;
                if (data == null)
                {
                    return(ret);
                }
                var nsOpenSearch = data.GetNamespaceOfPrefix("opensearch");
                var nsCse        = data.GetNamespaceOfPrefix("cse");
                var nsAtom       = data.GetDefaultNamespace();

                //get the search information
                var searchInformation = data.Element(nsCse + "searchInformation");
                if (searchInformation != null)
                {
                    var totalResults = searchInformation.Element(nsCse + "totalResults");
                    if (totalResults != null)
                    {
                        ret.TotalResults = long.Parse(totalResults.Value);
                    }
                    var searchTime = searchInformation.Element(nsCse + "searchTime");
                    if (searchTime != null)
                    {
                        ret.SearchTime = double.Parse(searchTime.Value);
                    }
                }
                //Get labels
                foreach (var label in data.Descendants(nsCse + "facet"))
                {
                    var item = label.Element(nsCse + "item");
                    if (item != null)
                    {
                        var key   = item.Attribute("label").Value;
                        var value = item.Attribute("anchor").Value;
                        ret.Labels[key] = value;
                    }
                }
                //Get Promotions
                foreach (var promotion in data.Descendants(nsCse + "promotion"))
                {
                    var tmpResult = new GoogleSearchResult();
                    var title     = promotion.Element(nsAtom + "title");
                    if (title != null)
                    {
                        tmpResult.Title = title.Value;
                    }
                    var link = promotion.Element(nsAtom + "link");
                    if (link != null)
                    {
                        if (link.Attribute("href") != null)
                        {
                            tmpResult.Url = link.Attribute("href").Value;
                        }
                    }
                    var description = promotion.Element(nsCse + "bodyLine");
                    if (description != null)
                    {
                        tmpResult.Description = description.Attribute("title").Value;
                    }
                    ret.Promotions.Add(tmpResult);
                }

                //get search Results
                //foreach (var searchResult in data.Descendants(nsAtom+"entry"))
                //{
                //    ret.Results.Add(ParseCseResult(searchResult));
                //}
                ret.Results.AddRange(data.Descendants(nsAtom + "entry").Select(ParseCseResult));
                if (1 < _maxPages)
                {
                    var nextPage = data.Elements(nsOpenSearch + "Query").FirstOrDefault(e => e.Attribute("role").Value == "cse:nextPage");
                    if (nextPage != null)
                    {
                        string oldstart = null;
                        if (Options.ContainsKey("start"))
                        {
                            oldstart = Options["start"];
                        }
                        Options["start"] = nextPage.Attribute("startIndex").Value;
                        var nextUrl           = QueryUrl();
                        var theRestOfThePages = RecursiveResults(nextUrl, 2);
                        ret.Results.AddRange(theRestOfThePages);
                        if (oldstart != null)
                        {
                            Options["start"] = oldstart;
                        }
                        else
                        {
                            Options.Remove("start");
                        }
                    }
                }
            }
            if (Method == GoogleSearchMethod.XML)
            {
                var xResults = XDocument.Load(url);
                //get labels
                foreach (var label in xResults.Descendants("FacetItem"))
                {
                    var key = label.Element("label");
                    if (key != null)
                    {
                        var lblDesc = label.Element("anchor_text");
                        if (lblDesc != null)
                        {
                            ret.Labels[key.Value] = lblDesc.Value;
                        }
                    }
                }


                var data = xResults.Descendants("RES").FirstOrDefault();
                if (data == null)
                {
                    return(ret);
                }
                var totalResults = data.Element("M");
                if (totalResults != null)
                {
                    ret.TotalResults = long.Parse(totalResults.Value);
                }
                //ret.Results.AddRange(data.Descendants("R").Select(ParseXmlResult));
                foreach (var r in data.Descendants("R"))
                {
                    var tmpResults = ParseXmlResult(r);
                    var slR        = r.Element("SL_RESULTS");
                    if (slR != null)
                    {
                        //promotion
                        var bodyLine = slR.Descendants("BODY_LINE").FirstOrDefault();
                        if (bodyLine != null)
                        {
                            var t = bodyLine.Descendants("T").FirstOrDefault();
                            if (t != null)
                            {
                                tmpResults.Description = t.Value;
                            }
                            ret.Promotions.Add(tmpResults);
                        }
                    }
                    else
                    {
                        ret.Results.Add(tmpResults);
                    }
                }
                var nb = data.Element("NB");
                if (nb == null)
                {
                    return(ret);
                }
                var nu = nb.Element("NU");
                if (nu == null)
                {
                    return(ret);
                }
                if (1 < _maxPages)
                {
                    ret.Results.AddRange(RecursiveResults("http://www.google.com" + nu.Value, 2));
                }
            }
            return(ret);
        }