public static Dictionary <string, string> StNode(HtmlNodeCollection htmlNode) { var keyValue = new Dictionary <string, string>(); string key = ""; string value = ""; foreach (var item in htmlNode.Take(3)) { key = item.Attributes["value"].Value; value = item.InnerHtml; keyValue.Add(key, value); } return(keyValue); }
public List <String> Parse() { if (content == null) { return(new List <String>()); } doc.LoadHtml(content); HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes(xPathUrl); List <String> result = new List <String>(); if (nodes != null) { foreach (HtmlNode node in nodes.Take(preferences.count)) { result.Add(node.InnerText); } } return(result); }
public List <KeyValuePair <string, string> > Search(string search, int amount) { List <KeyValuePair <string, string> > results = new List <KeyValuePair <string, string> >(); HtmlWeb web = new HtmlWeb(); HtmlDocument doc = web.Load(GetSearchURL(search)); HtmlNodeCollection articles = doc.DocumentNode.SelectNodes("//ul[@class='results-list no-bullet']").First().SelectNodes("//article"); if (articles != null) { foreach (HtmlNode item in articles.Take(amount)) { results.Add(new KeyValuePair <string, string>(item.ChildNodes[1].FirstChild.InnerText, GetObsidianURL() + item.ChildNodes[1].Attributes.First().Value)); } return(results); } else { return(null); } }
public static List <SeedMagnetSearchModel> SerachListView(string content, CookieContainer cc) { List <SeedMagnetSearchModel> ret = new List <SeedMagnetSearchModel>(); try { var serachContent = "https://btsow.club/search/" + content; var htmlRet = HtmlManager.GetHtmlWebClient("https://btsow.club", serachContent, null, true); if (htmlRet.Success) { HtmlAgilityPack.HtmlDocument htmlDocument = new HtmlAgilityPack.HtmlDocument(); htmlDocument.LoadHtml(htmlRet.Content); string xpath = "//div[@class='row']"; HtmlNodeCollection nodes = htmlDocument.DocumentNode.SelectNodes(xpath); foreach (var node in nodes.Take(nodes.Count - 1)) { var text = node.ChildNodes[1].ChildNodes[1].InnerText.Trim(); var size = FileUtility.GetFileSizeFromString(node.ChildNodes[3].InnerText.Trim()); var date = node.ChildNodes[5].InnerText.Trim(); var a = node.ChildNodes[1].OuterHtml; var url = a.Substring(a.IndexOf("\"") + 1); url = url.Substring(0, url.IndexOf("\"")); SeedMagnetSearchModel temp = new SeedMagnetSearchModel { Title = text, Size = size, Date = DateTime.Parse(date), Url = url, Source = SearchSeedSiteEnum.Btsow }; ret.Add(temp); } foreach (var r in ret) { var subHtmlRet = HtmlManager.GetHtmlWebClient("https://btsow.club", r.Url, cc, false); if (subHtmlRet.Success) { htmlDocument = new HtmlAgilityPack.HtmlDocument(); htmlDocument.LoadHtml(subHtmlRet.Content); xpath = "//textarea[@class='magnet-link hidden-xs']"; HtmlNode node = htmlDocument.DocumentNode.SelectSingleNode(xpath); if (node != null) { r.MagUrl = node.InnerText; } } } } } catch (Exception ee) { } return(ret); }
public static void Initiate(HashSet <String> trainingDocs = null) { if (trainingDocs == null) { trainingDocsNames = new HashSet <String>(allDocsNames); } else { //Set the set of training documents names trainingDocsNames = trainingDocs; } //Reset the Dom Pool Vars TargetNodes = new HashSet <HtmlNode>(); TargetNodesPrecision = new HashSet <HtmlNode>(); NonTargetNodes = new HashSet <HtmlNode>(); TESTTargetNodes = new HashSet <HtmlNode>(); TESTTargetNodesPrecision = new HashSet <HtmlNode>(); TESTNonTargetNodes = new HashSet <HtmlNode>(); TESTSeenTargetNodes = new HashSet <HtmlNode>(); TESTSeenTargetNodesPrecision = new HashSet <HtmlNode>(); TESTSeenNonTargetNodes = new HashSet <HtmlNode>(); //Reset the query result cache queryResultCache = new Dictionary <string, HashSet <HtmlNode> >(); int minSelected = 100; foreach (String srcDomName in trainingDocsNames) { HtmlNode srcDom = null; docsAndNames.TryGetValue(srcDomName, out srcDom); HtmlNodeCollection selected = srcDom.SelectNodes("//*[@" + selectionAttribute + "]"); if (selected == null || selected.Count <= 0) { continue; } if (selected.Count() < minSelected) { minSelected = selected.Count(); } } foreach (String srcDomName in trainingDocsNames) { HtmlNode srcDom = null; docsAndNames.TryGetValue(srcDomName, out srcDom); HtmlNodeCollection selected = srcDom.SelectNodes("//*[@" + selectionAttribute + "]"); if (selected == null) { continue; } TargetNodes.UnionWith(selected.Take(minSelected)); TargetNodesPrecision.UnionWith(selected); HtmlNodeCollection selectedChildren = srcDom.SelectNodes("//*[@" + selectionAttribute + "]//* | //*[@" + optionalSelectionAttribute + "] | //*[@" + optionalSelectionAttribute + "]//*"); if (selectedChildren != null) { TargetNodesPrecision.UnionWith(selectedChildren); } //select the rest and add them to HtmlNodeCollection all = srcDom.SelectNodes("//*"); HashSet <HtmlNode> nonTarget = new HashSet <HtmlNode>(all); nonTarget.ExceptWith(selected); if (selectedChildren != null) { nonTarget.ExceptWith(selectedChildren); } NonTargetNodes.UnionWith(nonTarget); } foreach (String srcDomName in allDocsNames.Except(trainingDocsNames)) { HtmlNode srcDom = null; docsAndNames.TryGetValue(srcDomName, out srcDom); HtmlNodeCollection selected = srcDom.SelectNodes("//*[@" + selectionAttribute + "]"); TESTTargetNodes.UnionWith(selected); TESTTargetNodesPrecision.UnionWith(selected); HtmlNodeCollection selectedChildren = srcDom.SelectNodes("//*[@" + selectionAttribute + "]//* | //*[@" + optionalSelectionAttribute + "] | //*[@" + optionalSelectionAttribute + "]//*"); if (selectedChildren != null) { TESTTargetNodesPrecision.UnionWith(selectedChildren); } //select the rest and add them to HtmlNodeCollection all = srcDom.SelectNodes("//*"); HashSet <HtmlNode> nonTarget = new HashSet <HtmlNode>(all); nonTarget.ExceptWith(selected); if (selectedChildren != null) { nonTarget.ExceptWith(selectedChildren); } TESTNonTargetNodes.UnionWith(nonTarget); } if (testDocsAndNames.Count() > 0) { foreach (String srcDomName in testDocsAndNames.Keys.Intersect(trainingDocsNames)) { HtmlNode srcDom = null; testDocsAndNames.TryGetValue(srcDomName, out srcDom); HtmlNodeCollection selected = srcDom.SelectNodes("//*[@" + selectionAttribute + "]"); TESTSeenTargetNodes.UnionWith(selected); TESTSeenTargetNodesPrecision.UnionWith(selected); HtmlNodeCollection selectedChildren = srcDom.SelectNodes("//*[@" + selectionAttribute + "]//* | //*[@" + optionalSelectionAttribute + "] | //*[@" + optionalSelectionAttribute + "]//*"); if (selectedChildren != null) { TESTSeenTargetNodesPrecision.UnionWith(selectedChildren); } //select the rest and add them to HtmlNodeCollection all = srcDom.SelectNodes("//*"); HashSet <HtmlNode> nonTarget = new HashSet <HtmlNode>(all); nonTarget.ExceptWith(selected); if (selectedChildren != null) { nonTarget.ExceptWith(selectedChildren); } TESTSeenNonTargetNodes.UnionWith(nonTarget); } } }