Пример #1
0
        public static Dictionary <string, string> StNode(HtmlNodeCollection htmlNode)
        {
            var    keyValue = new Dictionary <string, string>();
            string key = ""; string value = "";

            foreach (var item in htmlNode.Take(3))
            {
                key   = item.Attributes["value"].Value;
                value = item.InnerHtml;
                keyValue.Add(key, value);
            }

            return(keyValue);
        }
Пример #2
0
        public List <String> Parse()
        {
            if (content == null)
            {
                return(new List <String>());
            }
            doc.LoadHtml(content);

            HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes(xPathUrl);

            List <String> result = new List <String>();

            if (nodes != null)
            {
                foreach (HtmlNode node in nodes.Take(preferences.count))
                {
                    result.Add(node.InnerText);
                }
            }
            return(result);
        }
Пример #3
0
        public List <KeyValuePair <string, string> > Search(string search, int amount)
        {
            List <KeyValuePair <string, string> > results = new List <KeyValuePair <string, string> >();

            HtmlWeb web = new HtmlWeb();

            HtmlDocument doc = web.Load(GetSearchURL(search));

            HtmlNodeCollection articles = doc.DocumentNode.SelectNodes("//ul[@class='results-list no-bullet']").First().SelectNodes("//article");

            if (articles != null)
            {
                foreach (HtmlNode item in articles.Take(amount))
                {
                    results.Add(new KeyValuePair <string, string>(item.ChildNodes[1].FirstChild.InnerText, GetObsidianURL() + item.ChildNodes[1].Attributes.First().Value));
                }

                return(results);
            }
            else
            {
                return(null);
            }
        }
Пример #4
0
        public static List <SeedMagnetSearchModel> SerachListView(string content, CookieContainer cc)
        {
            List <SeedMagnetSearchModel> ret = new List <SeedMagnetSearchModel>();

            try
            {
                var serachContent = "https://btsow.club/search/" + content;
                var htmlRet       = HtmlManager.GetHtmlWebClient("https://btsow.club", serachContent, null, true);

                if (htmlRet.Success)
                {
                    HtmlAgilityPack.HtmlDocument htmlDocument = new HtmlAgilityPack.HtmlDocument();
                    htmlDocument.LoadHtml(htmlRet.Content);

                    string xpath = "//div[@class='row']";

                    HtmlNodeCollection nodes = htmlDocument.DocumentNode.SelectNodes(xpath);

                    foreach (var node in nodes.Take(nodes.Count - 1))
                    {
                        var text = node.ChildNodes[1].ChildNodes[1].InnerText.Trim();
                        var size = FileUtility.GetFileSizeFromString(node.ChildNodes[3].InnerText.Trim());
                        var date = node.ChildNodes[5].InnerText.Trim();
                        var a    = node.ChildNodes[1].OuterHtml;
                        var url  = a.Substring(a.IndexOf("\"") + 1);
                        url = url.Substring(0, url.IndexOf("\""));

                        SeedMagnetSearchModel temp = new SeedMagnetSearchModel
                        {
                            Title  = text,
                            Size   = size,
                            Date   = DateTime.Parse(date),
                            Url    = url,
                            Source = SearchSeedSiteEnum.Btsow
                        };

                        ret.Add(temp);
                    }

                    foreach (var r in ret)
                    {
                        var subHtmlRet = HtmlManager.GetHtmlWebClient("https://btsow.club", r.Url, cc, false);

                        if (subHtmlRet.Success)
                        {
                            htmlDocument = new HtmlAgilityPack.HtmlDocument();
                            htmlDocument.LoadHtml(subHtmlRet.Content);

                            xpath = "//textarea[@class='magnet-link hidden-xs']";

                            HtmlNode node = htmlDocument.DocumentNode.SelectSingleNode(xpath);

                            if (node != null)
                            {
                                r.MagUrl = node.InnerText;
                            }
                        }
                    }
                }
            }
            catch (Exception ee)
            {
            }

            return(ret);
        }
Пример #5
0
        public static void Initiate(HashSet <String> trainingDocs = null)
        {
            if (trainingDocs == null)
            {
                trainingDocsNames = new HashSet <String>(allDocsNames);
            }
            else
            {
                //Set the set of training documents names
                trainingDocsNames = trainingDocs;
            }
            //Reset the Dom Pool Vars
            TargetNodes          = new HashSet <HtmlNode>();
            TargetNodesPrecision = new HashSet <HtmlNode>();
            NonTargetNodes       = new HashSet <HtmlNode>();

            TESTTargetNodes          = new HashSet <HtmlNode>();
            TESTTargetNodesPrecision = new HashSet <HtmlNode>();
            TESTNonTargetNodes       = new HashSet <HtmlNode>();

            TESTSeenTargetNodes          = new HashSet <HtmlNode>();
            TESTSeenTargetNodesPrecision = new HashSet <HtmlNode>();
            TESTSeenNonTargetNodes       = new HashSet <HtmlNode>();
            //Reset the query result cache
            queryResultCache = new Dictionary <string, HashSet <HtmlNode> >();

            int minSelected = 100;

            foreach (String srcDomName in trainingDocsNames)
            {
                HtmlNode srcDom = null;
                docsAndNames.TryGetValue(srcDomName, out srcDom);
                HtmlNodeCollection selected = srcDom.SelectNodes("//*[@" + selectionAttribute + "]");
                if (selected == null || selected.Count <= 0)
                {
                    continue;
                }
                if (selected.Count() < minSelected)
                {
                    minSelected = selected.Count();
                }
            }

            foreach (String srcDomName in trainingDocsNames)
            {
                HtmlNode srcDom = null;
                docsAndNames.TryGetValue(srcDomName, out srcDom);
                HtmlNodeCollection selected = srcDom.SelectNodes("//*[@" + selectionAttribute + "]");
                if (selected == null)
                {
                    continue;
                }
                TargetNodes.UnionWith(selected.Take(minSelected));
                TargetNodesPrecision.UnionWith(selected);
                HtmlNodeCollection selectedChildren = srcDom.SelectNodes("//*[@" + selectionAttribute + "]//* | //*[@" + optionalSelectionAttribute + "] | //*[@" + optionalSelectionAttribute + "]//*");
                if (selectedChildren != null)
                {
                    TargetNodesPrecision.UnionWith(selectedChildren);
                }
                //select the rest and add them to
                HtmlNodeCollection all       = srcDom.SelectNodes("//*");
                HashSet <HtmlNode> nonTarget = new HashSet <HtmlNode>(all);
                nonTarget.ExceptWith(selected);
                if (selectedChildren != null)
                {
                    nonTarget.ExceptWith(selectedChildren);
                }
                NonTargetNodes.UnionWith(nonTarget);
            }

            foreach (String srcDomName in allDocsNames.Except(trainingDocsNames))
            {
                HtmlNode srcDom = null;
                docsAndNames.TryGetValue(srcDomName, out srcDom);
                HtmlNodeCollection selected = srcDom.SelectNodes("//*[@" + selectionAttribute + "]");
                TESTTargetNodes.UnionWith(selected);
                TESTTargetNodesPrecision.UnionWith(selected);
                HtmlNodeCollection selectedChildren = srcDom.SelectNodes("//*[@" + selectionAttribute + "]//* | //*[@" + optionalSelectionAttribute + "] | //*[@" + optionalSelectionAttribute + "]//*");
                if (selectedChildren != null)
                {
                    TESTTargetNodesPrecision.UnionWith(selectedChildren);
                }
                //select the rest and add them to
                HtmlNodeCollection all       = srcDom.SelectNodes("//*");
                HashSet <HtmlNode> nonTarget = new HashSet <HtmlNode>(all);
                nonTarget.ExceptWith(selected);
                if (selectedChildren != null)
                {
                    nonTarget.ExceptWith(selectedChildren);
                }
                TESTNonTargetNodes.UnionWith(nonTarget);
            }

            if (testDocsAndNames.Count() > 0)
            {
                foreach (String srcDomName in testDocsAndNames.Keys.Intersect(trainingDocsNames))
                {
                    HtmlNode srcDom = null;
                    testDocsAndNames.TryGetValue(srcDomName, out srcDom);
                    HtmlNodeCollection selected = srcDom.SelectNodes("//*[@" + selectionAttribute + "]");
                    TESTSeenTargetNodes.UnionWith(selected);
                    TESTSeenTargetNodesPrecision.UnionWith(selected);
                    HtmlNodeCollection selectedChildren = srcDom.SelectNodes("//*[@" + selectionAttribute + "]//* | //*[@" + optionalSelectionAttribute + "] | //*[@" + optionalSelectionAttribute + "]//*");
                    if (selectedChildren != null)
                    {
                        TESTSeenTargetNodesPrecision.UnionWith(selectedChildren);
                    }
                    //select the rest and add them to
                    HtmlNodeCollection all       = srcDom.SelectNodes("//*");
                    HashSet <HtmlNode> nonTarget = new HashSet <HtmlNode>(all);
                    nonTarget.ExceptWith(selected);
                    if (selectedChildren != null)
                    {
                        nonTarget.ExceptWith(selectedChildren);
                    }
                    TESTSeenNonTargetNodes.UnionWith(nonTarget);
                }
            }
        }