private Hashtable extract(string url, ref Hashtable ht) { Hashtable newHt = new Hashtable(); HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); string html = http.get(url); doc.LoadHtml(html); string xpath = Build(conf.itemsCollectionIndices); var collection = doc.DocumentNode.SelectNodes(xpath) .Select(p => p.InnerHtml) .ToList(); foreach (string item in collection) { try { doc.LoadHtml(item); xpath = Build(conf.itemsNameIndices); string itemName = HttpUtility.HtmlDecode(doc.DocumentNode.SelectSingleNode(xpath).InnerText); xpath = Build(conf.itemsUrlIndices); string itemUrl = doc.DocumentNode.SelectSingleNode(xpath).Attributes["href"].Value; string itemImage = string.Empty; if (conf.itemsImageIndices != string.Empty) { xpath = Build(conf.itemsImageIndices); itemImage = doc.DocumentNode.SelectSingleNode(xpath).Attributes["src"].Value; } if (checkURL(itemUrl)) { html = http.get(itemUrl); doc.LoadHtml(html); List <object> partsList = new List <object>(); xpath = Build(conf.itemUrlSourceIndices); string itemFirstPartName = string.Empty; string itemFirstUrlSource = string.Empty; string[] xp = xpath.Split(new string[] { ";" }, StringSplitOptions.RemoveEmptyEntries); if (xp.Length >= 1) { itemFirstUrlSource = doc.DocumentNode .SelectSingleNode(xp[0] + xp[1]) .Attributes["src"].Value; } partsList.Add(new { name = "Source 1", url = HttpUtility.HtmlDecode(itemFirstUrlSource) }); xpath = Build(conf.itemDescriptionIndices); string itemDescription = doc.DocumentNode.SelectSingleNode(xpath).InnerText; string key = itemName.ToLower(); if (!conf.nameAsUnique && ht.ContainsKey(key)) { int i = 1; while (true) { if (ht.ContainsKey(key)) { key = key + "_" + i; } else { break; } } } if (conf.itemPartCollectionIndices != string.Empty) { xpath = Build(conf.itemPartCollectionIndices); try { var parts = doc.DocumentNode.SelectNodes("//div[@class='keremiya_part']/a") .Select(p => p.InnerText + "S|P" + p.Attributes["href"].Value) .ToList(); foreach (string part in parts) { string[] a = part.Split(new string[] { "S|P" }, StringSplitOptions.RemoveEmptyEntries); string partName = a[0]; html = http.get(a[1]); doc.LoadHtml(html); string subUrlSource = string.Empty; xpath = Build(conf.itemUrlSourceIndices); xp = xpath.Split(new string[] { ";" }, StringSplitOptions.RemoveEmptyEntries); if (xp.Length >= 1) { subUrlSource = doc.DocumentNode .SelectSingleNode(xp[0] + xp[1]) .Attributes["src"].Value; } partsList.Add(new { name = HttpUtility.HtmlDecode(partName), url = HttpUtility.HtmlDecode(subUrlSource), }); } if (partsList.Count > 1) { dynamic expl = partsList[partsList.Count - 1]; dynamic first = partsList[0]; string nameScheme = expl.name; nameScheme = nameScheme.TrimStart().Split(' ')[0]; nameScheme = nameScheme + " 1"; partsList[0] = new { name = nameScheme, url = expl.url }; } } catch (Exception e) { Console.WriteLine(e.ToString()); } } dynamic obj = new { key = key, name = itemName, url = HttpUtility.HtmlDecode(itemUrl), image = HttpUtility.HtmlDecode(itemImage), description = clearStartDescription(HttpUtility.HtmlDecode(itemDescription)), parts = partsList, countParts = partsList.Count.ToString(), domain = new Uri(url).Host, date = DateTime.Now.ToString() }; JavaScriptSerializer ser = new JavaScriptSerializer(); obj = ser.DeserializeObject(ser.Serialize(obj)); ht.Add(key, obj); //Nouveaux newHt.Add(key, obj); } } catch (Exception e) { Console.WriteLine(e.ToString()); } } return(newHt); }
public string Get() { return(hr.get(Router.url("get_command"))); }