Exemple #1
0
        public LectureCont[] GetLectures()
        {
            List <LectureCont> lectures = new List <LectureCont>();
            HtmlDocument       doc      = new HtmlDocument();

            doc.LoadHtml(GetPage());

            HtmlNodeCollection days = doc.DocumentNode.SelectNodes("//div[@id='schedule']/div[@class='day']");

            foreach (HtmlNode day in days)
            {
                HtmlNodeCollection events = day.SelectNodes("div[@class='event']");

                if (events != null && events.Count() > 0)
                {
                    string   dateString = day.SelectSingleNode("div[@class='date']").InnerText;
                    DateTime date       = DateTime.ParseExact(dateString, "dd/MM/yyyy", provider);
                    foreach (HtmlNode ev in events)
                    {
                        lectures.Add(ParseLecture(ev, date));
                    }
                }
            }

            return(lectures.ToArray());
        }
Exemple #2
0
        private static void DowloadItemsFromWeb(int i, string now)
        {
            string urlas = url + i;
            var    html  = GetHtmlString(urlas);

            if (String.IsNullOrEmpty(html))
            {
                Console.WriteLine($"{i} not found");
                return;
            }

            var doc = new HtmlAgilityPack.HtmlDocument();

            doc.LoadHtml(html);

            HtmlNodeCollection commentNodes = doc.DocumentNode.SelectNodes("//div[@class='" + "comment" + "']");

            int commentsFound = 0;

            if (commentNodes != null)
            {
                List <Items> list = new List <Items>();
                commentsFound = commentNodes.Count();

                foreach (HtmlNode node in commentNodes)
                {
                    var value = node.SelectSingleNode(".//a");

                    // string name = value.InnerText;
                    string linkhtml = value.OuterHtml;

                    var item = list.FirstOrDefault(x => x.Key.Equals(linkhtml));

                    if (item != null)
                    {
                        item.Count = item.Count + 1;
                    }
                    else
                    {
                        list.Add(new Items()
                        {
                            Key = linkhtml, Count = 1
                        });
                    }
                }


                SaveItems(list, urlas, now);
            }


            SaveUser(urlas, commentsFound, now);

            //Console.WriteLine($"Key: {i} done");

            //  Thread.Sleep(1000);
        }
        }//end of ValidNumbers method

        public void Subject(HtmlDocument doc)
        {
            HtmlNodeCollection subjectStyleNode   = doc.DocumentNode.SelectNodes("//div[contains(@id,'div0event')]");
            HtmlNodeCollection subjectDetailsNode = doc.DocumentNode.SelectNodes("//div[contains(@id,'div0event')]//b");
            //HtmlNodeCollection testNode = doc.DocumentNode.SelectNodes("//div[contains(@id,'div0event')]//b//br");

            HtmlNode subjectSingleNode;

            int[] validNum           = new int[8];
            int   readElementInTotal = 3;

            for (int readElement = 0; readElement < subjectStyleNode.Count(); readElement++)
            {
                subjectSingleNode = subjectStyleNode[readElement];
                string subjectStyle = subjectSingleNode.Attributes["style"].Value;

                string subjectShortName   = subjectDetailsNode[readElement].InnerHtml;
                string subjectFullName    = subjectDetailsNode[readElement].SelectSingleNode("following-sibling::text()[1]").InnerText;
                string subjectRoomAndSize = subjectDetailsNode[readElement].SelectSingleNode("following-sibling::text()[2]").InnerText;

                string subjectRoomName      = "";
                int    subjectRoomSize      = 0;
                int    subjectRoomSizeTaken = 0;

                SeperateRoomAndSize(subjectRoomAndSize, ref subjectRoomName, ref subjectRoomSize, ref subjectRoomSizeTaken);

                //analyze the position of each subject, and details of each subject
                validNum = ValidNumbers(subjectStyle, readElementInTotal);

                //send these nodes to subject list
                WebpageSubject subjectList = new WebpageSubject();

                subjectList.LeftPX  = int.Parse(subjectStyle.Substring(validNum[0], validNum[1]));
                subjectList.TopPX   = int.Parse(subjectStyle.Substring(validNum[2], validNum[3]));
                subjectList.WidthPX = int.Parse(subjectStyle.Substring(validNum[4], validNum[5]));

                //send subject details to subject list
                subjectList.SubjectFullName  = subjectFullName;
                subjectList.SubjectShortName = subjectShortName;

                subjectList.RoomName      = subjectRoomName;
                subjectList.RoomSize      = subjectRoomSize;
                subjectList.RoomSizeTaken = subjectRoomSizeTaken;

                subjectContainer.Add(subjectList);
                //Console.Write(subjectContainer[0].RoomName);
            }



            //foreach (HtmlNode items in subjectNode)
            //{
            //    Console.WriteLine(items.InnerHtml);
            //    Console.WriteLine(t);
            //}
        }
Exemple #4
0
        private List <string> ExtractDocumentLinks(string propertyValue, string propertyAlias)
        {
            List <string> stringList   = new List <string>();
            HtmlDocument  htmlDocument = new HtmlDocument();

            htmlDocument.LoadHtml(propertyValue);
            string[] strArray = CurrentConfiguration.SearchSettings.RichTextFileTypes.Split(new string[1]
            {
                ","
            }, StringSplitOptions.RemoveEmptyEntries);
            HtmlNodeCollection htmlNodeCollection = htmlDocument.DocumentNode.SelectNodes("//a[@href]");

            if (htmlNodeCollection == null || htmlNodeCollection.Count() == 0)
            {
                this.log.AddLogentry(SolisSearch.Log.Enum.LogLevel.Debug, string.Format("No href items found in property {0}", (object)propertyAlias), (Exception)null);
                return(stringList);
            }
            this.log.AddLogentry(SolisSearch.Log.Enum.LogLevel.Debug, string.Format("Found {0} href items in property {1}", (object)htmlNodeCollection.Count(), (object)propertyAlias), (Exception)null);
            using (IEnumerator <HtmlNode> enumerator = ((IEnumerable <HtmlNode>)htmlNodeCollection).GetEnumerator())
            {
                while (((IEnumerator)enumerator).MoveNext())
                {
                    HtmlNode current = enumerator.Current;
                    try
                    {
                        string str1 = current.Attributes["href"].Value;
                        string str2 = this.cmsIndexer.GetMediaFriendlyUrl(str1);
                        if (!string.IsNullOrEmpty(str2) && str2.Contains("?"))
                        {
                            str2 = str2.Substring(0, str2.IndexOf("?", StringComparison.Ordinal));
                        }
                        if (this.cmsIndexer.IsFileToIndex(str1, str2))
                        {
                            string str3 = Path.GetExtension(str2).Trim('.');
                            if (((IEnumerable <string>)strArray).Contains <string>(str3))
                            {
                                stringList.Add(str1);
                                this.log.AddLogentry(SolisSearch.Log.Enum.LogLevel.Info, string.Format("Extracting RichText document links, found {0}", (object)str1), (Exception)null);
                            }
                            else
                            {
                                this.log.AddLogentry(SolisSearch.Log.Enum.LogLevel.Debug, string.Format("File extension {0} is not configured as media type to index, skipping", (object)str3), (Exception)null);
                            }
                        }
                    }
                    catch (Exception ex)
                    {
                        this.log.AddLogentry(SolisSearch.Log.Enum.LogLevel.Error, "Error resolving if media should be indexed for anchor " + current.InnerText, ex);
                    }
                }
            }
            return(stringList);
        }
Exemple #5
0
        public void Parser()
        {
            HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
            var url = "http://10.8.0.110:8080/";

            doc.LoadHtml(getRequest(url));
            HtmlNodeCollection c = doc.DocumentNode.SelectNodes("//div[@class='tab-pane fade in active']/pre");

            textBox1.Text = c.Count().ToString() + Environment.NewLine;
            for (var i = 0; i < c.Count(); i++)
            {
                var st = c[i].InnerText.Normalize();
                //string version_template = "[0-9]+.[0-9]+.[0-9]+";
                string          date_template = "[0-9]+-[0-9]+-[0-9]+";
                var             re            = new Regex(@date_template);
                MatchCollection mc            = re.Matches(st);
                foreach (Match mat in mc)
                {
                    //Console.WriteLine(mat.ToString());
                    textBox1.Text = textBox1.Text + mat.ToString() + Environment.NewLine;
                }
            }
        }
Exemple #6
0
        public override List <string> getWatchList(string user)
        {
            if (user == "" || user.Replace(" ", "") == "")
            {
                return(null);
            }
            List <string> watchList = new List <string>();

            try
            {
                bool   finished     = false;
                string watchlistUrl = string.Format("{0}watchlist/by/{1}/", FABase, user);
                int    pageN        = 1;
                while (!finished)
                {
                    string page = webHandler.getPage(watchlistUrl + pageN);

                    HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
                    doc.LoadHtml(page);

                    HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes("//table[@id='userpage-budlist']/tr");

                    if (nodes != null && nodes.Count() >= 2)
                    {
                        foreach (HtmlNode node in nodes)
                        {
                            HtmlNode subNode = node.SelectSingleNode(".//a");
                            if (subNode == null)
                            {
                                continue;
                            }
                            string username = subNode.GetAttributeValue("href", null);
                            username = username.Remove(0, 6);
                            username = username.TrimEnd('/');
                            watchList.Add(username);
                        }
                    }
                    else
                    {
                        finished = true;
                    }
                    pageN++;
                }
            }
            catch (Exception e)
            {
                Console.WriteLine(e);
            }
            return(watchList);
        }
Exemple #7
0
        public List <Actor> AnalysisActor(string html, string filmname, int FilmID)
        {
            ///html/body/div[5]/div/div/div[1]/div/dl[1]
            var          MovieList    = new List <Actor>();
            HtmlDocument htmlDocument = new HtmlDocument();

            htmlDocument.LoadHtml(html);////html/body/div[5]/div/div/div[1]/div/dl[1]/dd[1]/div[1]
            HtmlNodeCollection yanyuan = htmlDocument.DocumentNode.SelectNodes("//div[@class='actor_tit']");

            //演员
            for (int i = 1; i < yanyuan.Count(); i++)
            {
                try
                {
                    var aflag = yanyuan[i].ChildNodes[1].SelectSingleNode("a");
                    var act   = new Actor();
                    act.ActorHref = aflag.GetAttributeValue("href", "").Replace("\\\"", "").Transferred();
                    act.FilmName  = filmname.Transferred();
                    act.ActorType = "演员";
                    act.ActorName = (yanyuan[i].ChildNodes[1].ChildNodes[3]).ChildNodes[0].InnerHtml.Transferred();
                    act.FilmID    = FilmID;
                    MovieList.Add(act);
                }
                catch (Exception ex)
                {
                }
            }

            HtmlNodeCollection daoyan = htmlDocument.DocumentNode.SelectNodes("//div[@class='credits_list']");

            //导演
            for (int i = 1; i < daoyan.Count(); i++)
            {
                try
                {
                    var act = new Actor();
                    act.ActorHref = daoyan[i].ChildNodes[3].ChildNodes[1].GetAttributeValue("href", "").Replace("\\\"", "");
                    act.FilmName  = filmname;
                    act.ActorType = daoyan[i].ChildNodes[1].InnerText.Trim();
                    act.ActorName = daoyan[i].ChildNodes[3].InnerText.Trim();
                    act.FilmID    = FilmID;
                    MovieList.Add(act);
                }
                catch (Exception ex)
                {
                }
            }
            return(MovieList);
        }
Exemple #8
0
        /// <summary>Checks HTML pages for "Рисуноккартинка" classes without images</summary>
        public static void CheckEmptyPictures(List <string> htmlFiles)
        {
            foreach (string filePath in htmlFiles)
            {
                HtmlDocument htmlDoc = new HtmlDocument();
                htmlDoc.Load(filePath, true);

                HtmlNodeCollection pictureNodes = htmlDoc.DocumentNode.SelectNodes(".//p[@class='Рисуноккартинка']");

                if (pictureNodes != null && pictureNodes.Count(node => !Regex.Match(node.InnerHtml, @"\<img").Success) > 0)
                {
                    Program.Log($"В файле '{filePath}' содержится HTML элемент 'Рисуноккартинка' без изображения.");
                    Program.Log();
                }
            }
        }
Exemple #9
0
        public double GetDepartmentMark(HtmlNodeCollection departmentArticles, HtmlNodeCollection filterArticles, HtmlNodeCollection mainArticles)
        {
            int result = 1;

            if (filterArticles != null && departmentArticles.Count() > 0)
            {
                result = 2;
            }
            else if (filterArticles == null && departmentArticles == null)
            {
                result = 0;
            }


            return(result);
        }
Exemple #10
0
        /// <summary>
        /// method to retrive binaries from base webpage
        /// </summary>
        /// <param name="URL"></param>
        /// <param name="DownloadPath"></param>
        /// <param name="DownCollection"></param>
        /// <returns></returns>
        public static string GetBinaries(string URL, string DownloadPath, HtmlNodeCollection DownCollection)
        {
            var           testDoc = new HtmlDocument();
            Task <string> task    = AsyncUrlToTask(URL);

            task.Wait();

            testDoc.LoadHtml(task.Result);
            string binaries = "";

            foreach (HtmlNode link in testDoc.DocumentNode.SelectNodes(DownloadPath))
            {
                binaries += link.GetAttributeValue("href", string.Empty) + "\n";
            }

            string urlCount = DownCollection.Count().ToString();

            return(binaries);
        }
Exemple #11
0
        /// <summary>
        /// Obtain pricing information from Digikey website
        /// </summary>
        /// <returns></returns>
        public override PricingInfo[] GetPricingInfo()
        {
            try
            {
                List <PricingInfo> priceslist = new List <PricingInfo>(50);

                HtmlDocument htmlDoc = new HtmlDocument();
                htmlDoc.LoadHtml(WebPageData);

                HtmlNode           node      = htmlDoc.GetElementbyId("product-dollars");
                HtmlNodeCollection tablerows = node.SelectNodes("tbody//tr");

                foreach (HtmlNode row in tablerows)
                {
                    HtmlNodeCollection tablecolumns = row.SelectNodes("td");

                    if (!ReferenceEquals(tablecolumns, null))
                    {
                        if (tablecolumns.Count() > 2)
                        {
                            HtmlNode[] cols = tablecolumns.ToArray();

                            string minqtystr = cols[0].InnerText;
                            minqtystr = minqtystr.Replace(",", "");
                            int    minqty       = Int32.Parse(minqtystr);
                            double srcunitprice = Double.Parse(cols[1].InnerText);
                            double destprice    = Currency.Convert("USD", DefDestCurrency, srcunitprice);

                            PricingInfo p = new PricingInfo("USD", DefDestCurrency, srcunitprice, destprice, minqty, 999999);
                            priceslist.Add(p);
                        }
                    }
                }

                FixMaximumQtys(ref priceslist);

                return(priceslist.ToArray());
            }
            catch (Exception ex)
            {
                throw ex;
            }
        }
Exemple #12
0
        /// <summary>
        /// 解析国家地区
        /// </summary>
        /// <param name="page"></param>
        /// <returns></returns>
        public List <Movie> AnalysisCountry(string page, int pageIndex)
        {
            var          MovieList    = new List <Movie>();
            HtmlDocument htmlDocument = new HtmlDocument();

            htmlDocument.LoadHtml(page);
            HtmlNodeCollection collection = htmlDocument.DocumentNode.SelectNodes("/ul/li/div");

            for (int i = 0; i < collection.Count(); i++)
            {
                var detail = collection[i].SelectSingleNode("div/div[1]/a");
                var img    = collection[i].SelectSingleNode("div/div[1]/a/img");
                var movie  = new Movie();
                movie.FilmName  = img.GetAttributeValue("alt", "").Replace("\\\"", "");
                movie.PicUrl    = img.GetAttributeValue("src", "").Replace("\\\"", "");
                movie.DetailUrl = detail.GetAttributeValue("href", "").Replace("\\\"", "");
                movie.Page      = pageIndex;
                MovieList.Add(movie);
            }
            return(MovieList);
        }
        public bool iscontent(HtmlNode hn)
        {
            bool r = false;
            HtmlNodeCollection attrs1 = hn.SelectNodes("./span");

            if (attrs1 == null)
            {
                return(false);
            }
            if (attrs1.Count() <= 0)
            {
            }
            else
            {
                if (attrs1[0].Attributes[0].Value.Contains("background-color:#d9d9d9;"))
                {
                    r = true;
                }
            }
            return(r);
        }
        private HtmlNode tailRecursiveSearch(HtmlNode doc, Queue <KeyValuePair <string, int> > confKeys)
        {
            // Base case
            if (confKeys.Count == 0)
            {
                return(doc);
            }
            else
            {
                KeyValuePair <string, int> currentConf = confKeys.Dequeue();
                string key = currentConf.Key;
                int    val = currentConf.Value;

                HtmlNodeCollection collection = doc.SelectNodes(key);
                HtmlNode           result     = null;

                Queue <KeyValuePair <string, int> > tailQueue = new Queue <KeyValuePair <string, int> >(confKeys);

                if (collection != null && collection.Count() >= val)
                {
                    HtmlNode tempDoc = null;
                    for (int i = 0; i < val; i++)
                    {
                        tempDoc = collection[i];
                    }

                    result = tailRecursiveSearch(tempDoc, tailQueue);
                }
                else
                {
                    return(null);
                }

                return(result);
            }
        }
        private void ReadPowers(Creature c, HtmlNode monsterNode, MonsterStatType versionStat)
        {
            bool               IsTrait = false;
            HtmlNode           currentPar;
            HtmlNodeCollection paragraphs = null;

            switch (versionStat)
            {
            case MonsterStatType.Old:
                paragraphs = monsterNode.SelectNodes("//p");    //[@class='flavor alt']");

                break;

            case MonsterStatType.MM3:
                paragraphs = monsterNode.SelectNodes(("//h2|//p"));
                int pos = monsterNode.ParentNode.InnerText.IndexOf("Equipment:");
                if (pos > -1)
                {
                    pos += "Equipment:".Length;
                    int pos2 = monsterNode.ParentNode.InnerText.IndexOf("Published", pos);
                    c.Equipment = CleanHtml(monsterNode.ParentNode.InnerText.Substring(pos, pos2 - pos));
                }
                // currentPar = monsterNode.SelectSingleNode(("//h2"));
                //currentPar = currentPar.NextSibling;
                //  Debug.Assert(currentPar.Name == "h2");
                break;

            default:
                throw new ArgumentException("Invalid monsterStatType " + versionStat);
            }
            ActionType currentActionType = ActionType.None;

            for (int posParagraph = 0; posParagraph < paragraphs.Count(); posParagraph++)
            {
                currentPar = paragraphs[posParagraph];
                string innerHtmlPar = currentPar.InnerHtml.Trim();
                if (String.IsNullOrEmpty(innerHtmlPar.Trim()))
                {
                    continue;
                }
                if (innerHtmlPar.StartsWith("<b>Initiative</b>"))
                {
                    continue;
                }
                if (innerHtmlPar.StartsWith("<b>Str</b>"))
                {
                    ReadAbilities(c, innerHtmlPar);
                    continue;
                }

                if (innerHtmlPar.StartsWith("<b>Skills</b>"))
                {
                    ManageBlockAlignement(c, currentPar, versionStat);
                    continue;
                }
                if (innerHtmlPar.StartsWith("<b>Alignment") || (currentPar.Name == "b" && innerHtmlPar.StartsWith("Alignment")))
                {
                    ManageBlockAlignement(c, currentPar, versionStat);
                    continue;
                }
                if (innerHtmlPar.StartsWith("<b>Equipment"))// || (currentPar.Name == "b" && innerHtmlPar.StartsWith("Equipment")))
                {
                    ManageBlockEquipment(c, currentPar, versionStat);
                    continue;
                }
                if (innerHtmlPar.StartsWith("<b>Description"))
                {
                    ManageBlockDescription(c, currentPar);
                    currentPar = currentPar.NextSibling;
                    continue;
                }
                if (innerHtmlPar.StartsWith("Published in"))
                {
                    ManageBlockPublished(c, currentPar, versionStat);
                    currentPar = currentPar.NextSibling;
                    continue;
                }
                if (innerHtmlPar.StartsWith("Update"))
                {
                    string update = innerHtmlPar;
                    while (currentPar.NextSibling != null && currentPar.NextSibling.Name != "<p>")
                    {
                        currentPar = currentPar.NextSibling;
                        update    += Environment.NewLine + innerHtmlPar;
                    }
                    ManageblockUpdate(c, update, versionStat);
                    continue;
                }
                if (versionStat == MonsterStatType.MM3)
                {
                    if (currentPar.Name == "h2")
                    {
                        IsTrait = innerHtmlPar.Trim() == "Traits";
                        if (innerHtmlPar == "Standard Actions")
                        {
                            currentActionType = ActionType.Standard;
                        }
                        if (innerHtmlPar == "Minor Actions")
                        {
                            currentActionType = ActionType.Minor;
                        }
                        if (innerHtmlPar == "Move Actions")
                        {
                            currentActionType = ActionType.Move;
                        }
                        if (innerHtmlPar == "Triggered Actions")
                        {
                            currentActionType = ActionType.Reaction;
                        }
                        continue;
                    }
                }
                if (currentPar.Name != "p" || (currentPar.Attributes["class"].Value == "flavorIndent") || currentPar.InnerHtml.StartsWith("<i>")) //power details
                {
                    continue;
                }
                if (currentPar.InnerHtml.Contains("<b>Aura</b>"))
                {
                    continue;
                }
                CreaturePower cp = new CreaturePower();
                cp.Name = GetTextUntilTag(innerHtmlPar, "<b>");
                string afterName = GetTextUntilTag(innerHtmlPar, "</b>");
                cp.Keywords = GetTextUntilTag(innerHtmlPar, "<b>", 2) ?? String.Empty;
                string src = String.Empty;
                if (currentPar.FirstChild.Name == "img")
                {
                    src = currentPar.FirstChild.Attributes["src"].Value.ToString();
                }
                cp.Action = new PowerAction();
                if (_regBasicAttack.Match(src).Success)
                {
                    cp.Action.Use = PowerUseType.Basic;
                }
                if (src.Contains("symbol/Z2a.gif"))
                {
                    cp.Range = "Melee";
                }

                string powerTitleHtml = innerHtmlPar;
                currentPar = currentPar.NextSibling;
                switch (versionStat)
                {
                case MonsterStatType.Old:
                    ReadOldPower(currentPar, cp, String.IsNullOrEmpty(src)     //trait should have no action image
                                 , afterName, powerTitleHtml);
                    currentPar = currentPar.NextSibling;
                    while ((currentPar != null) && (currentPar.Name == "p") && (currentPar.Attributes["class"] != null) && (currentPar.Attributes["class"].Value.ToLower() == "flavorindent"))
                    {     // more details for action
                        cp.Details = cp.Details + Environment.NewLine + CleanHtml(currentPar.InnerText);
                        posParagraph++;
                        currentPar = currentPar.NextSibling;
                    }
                    break;

                case MonsterStatType.MM3:
                    if (IsTrait)
                    {
                        cp.Action = null;
                    }
                    else
                    {
                        cp.Action.Action = currentActionType;
                    }
                    currentPar = ReadMM3Power(currentPar, cp, IsTrait, powerTitleHtml.Replace("  ", " "));
                    break;

                default:
                    throw new ArgumentException("Invalid monsterStatType " + versionStat);
                }

                c.CreaturePowers.Add(cp);
            }
        }
Exemple #16
0
        public static List <EnvironmentCanadaData> Past24hr(bool fromCache)
        {
            string s = "", sDate = "",
           html = fromCache ?
                  WebScraper.GetHtmlFromCacheOrWeb(_urlPast24hr) :
                  WebScraper.GetHtmlFromWeb(_urlPast24hr);

            List <EnvironmentCanadaData> ecdList = new List <EnvironmentCanadaData>();

            if (html == null || html.Length < 25000)
            {
                return(ecdList);
            }

            HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(html); //note: var doc = new HtmlWeb().Load(file); allows  "//table/tbody/tr"

            foreach (HtmlNode tr in doc.DocumentNode.SelectNodes("//table/tbody"))
            {
                //77 Debug.WriteLine("\n============ tr.ChildNodes.Count {0}:", tr.ChildNodes.Count());

                foreach (HtmlNode r in tr.ChildNodes.Where(n => n.ChildNodes.Any()))
                {
                    HtmlNodeCollection c = r.ChildNodes;
                    int cnt = c.Count();

                    //77 Debug.Write($"\n{c.Count()}:");
                    int i = 0;
                    foreach (HtmlNode t in c)
                    {
                        Debug.Write($" {i++}:'{t.InnerText.Replace("\n", "").Replace("\t", "").Replace("    ", " ").Replace("   ", " ").Replace("  ", " ").Trim()}' ");
                    }
                    if (c.Count() == 3)
                    {
                        sDate = c[1].InnerText.Trim();
                    }
                    else if (c.Count() >= 17)
                    {
                        try
                        {
                            EnvironmentCanadaData e4 = new EnvironmentCanadaData {
                                TempAir = -999
                            };
                            e4.TakenAt = Convert.ToDateTime(sDate + ' ' + c[1].InnerText);

                            e4.Conditions = c[3].InnerText;

                            string[] c5 = c[5].InnerText.Split(new char[] { '\n' }, StringSplitOptions.RemoveEmptyEntries);
                            if (c5.Count() > 1)
                            {
                                e4.TempAir = double.Parse(c5[1].Trim(' ').Trim('\n').Trim(' ').Trim('\n').Trim('↑').Trim('↓').Trim('(').Trim(')').Replace("(", "").Trim());
                            }
                            else
                            {
                                e4.TempAir = double.Parse(c[5].InnerText.Trim(' ').Trim('\n').Trim(' ').Trim('\n').Trim('↑').Trim('↓').Trim());
                            }

                            string[] c7 = c[7].InnerText.Split(new char[] { '\n' }, StringSplitOptions.RemoveEmptyEntries);
                            if (c7.Count() > 1)
                            {
                                e4.Humidity = double.Parse(c7[1].Trim(' ').Trim('\n').Trim(' ').Trim('\n').Trim('↑').Trim('↓').Trim('(').Trim(')').Replace("(", "").Trim());
                            }
                            else
                            {
                                e4.Humidity = double.Parse(c[7].InnerText);
                            }

                            //var c9 = c[9].InnerText.Split(new char[] { '\n' }, StringSplitOptions.RemoveEmptyEntries);
                            //if (c9.Count() > 1)
                            //  e4.DewPoint = double.Parse(c9[1].Trim(' ').Trim('\n').Trim(' ').Trim('\n').Trim('↑').Trim('↓').Trim('(').Trim(')').Replace("(", "").Trim());
                            //else
                            //  e4.DewPoint = double.Parse(c[9].InnerText);

                            e4.Pressure /**/   = double.Parse(c[cnt - 8].InnerText);
                            e4.Visibility /**/ = double.Parse(c[cnt - 4].InnerText);
                            if (cnt != 27 && !c[13].InnerText.Contains("*"))
                            {
                                e4.Humidex /**/ = double.Parse(c[13].InnerText);
                            }

                            s = c[11].InnerText.Trim();
                            string[] w = s.Split(new char[] { ' ', '\n' }, StringSplitOptions.RemoveEmptyEntries);
                            switch (w.Length)
                            {
                            case 1: e4.WindDir = w[0]; e4.WindKmH = 0; break;

                            case 2: e4.WindDir = w[0]; e4.WindKmH = Convert.ToDouble(w[1]); break;

                            case 3: break;

                            case 4: e4.WindDir = w[0]; e4.WindKmH = Convert.ToDouble(w[1]); e4.WindGust = Convert.ToDouble(w[3]); break;

                            default: break;
                            }

                            if (e4 != null && e4.TempAir != -999)
                            {
                                ecdList.Add(e4);
                            }
                        }
                        catch (Exception ex) { ex.Log(); }
                    }
                }
            }

            //old: for (int curpos = 0, i = 0; i < 25; i++)			{				var  e = process1hourButtonvilleLikeEntry(ref s, ref sDate, ref html, ref curpos);				if (e != null && e.Pressure > 0)					ecdList.Add(e);			}

            //`for (int i = 0; i < ecdList.Count; i++) Console.WriteLine("{0,2}) {1}", i, ecdList[i].ToString());

            return(ecdList);
        }
        private void button_Click(object sender, RoutedEventArgs e)
        {
            var document = XDocument.Load(@"C:\Users\LENOVO\Desktop\export_merchants.xml");
            var rows     = document.Descendants("row");
            var count    = rows.Count();
            var htmlWeb  = new HtmlWeb();

            foreach (var row in rows)
            {
                var merchant_name = row.Elements("field").Where(a => a.Attribute("name").Value == "name").Select(p => p.Value).FirstOrDefault();
                //var merchant_name = "Vacto_Molunel";
                var htmlDoc = htmlWeb.Load(string.Concat(
                                               "http://wiki.project1999.com/",
                                               merchant_name));

                var uls = htmlDoc.DocumentNode
                          .Descendants("div").Where(d =>
                                                    d.Attributes.Contains("class") &&
                                                    d.Attributes["class"].Value.Contains("mobPageDiv"));

                //
                //<span class="mw-headline" id="Items_Sold">
                var                toto  = htmlDoc.DocumentNode.SelectSingleNode("//span[@id='Items_Sold']");
                HtmlNode           toto2 = null;
                HtmlNodeCollection toto3 = null;
                //var toto2 = toto.ParentNode.NextSibling.NextSibling.SelectNodes("//li");
                if (toto != null)
                {
                    toto2 = toto.ParentNode.NextSibling.SelectSingleNode("//ul");
                }
                if (toto2 != null)
                {
                    toto3 = toto2.ChildNodes;
                }
                //  var uls = htmlDoc.DocumentNode
                //.SelectNodes("//ul")
                //.Where(a => a.Descendants().Any(b => b.InnerHtml == "span"));

                //         var uls = htmlDoc.DocumentNode
                //       .Descendants("span").Where(d =>
                //d.Attributes.Contains("id") &&
                // d.Attributes["id"].Value.Contains("Items_Sold"));

                try
                {
                    //if (uls.Count() > 0)
                    if (toto3 != null && toto3.Count() > 0)
                    {
                        //foreach (var li in uls.ElementAt(0).ChildNodes)
                        foreach (var li in toto3)
                        {
                            if (li.InnerHtml.Contains(@"None"))
                            {
                                break;
                            }
                            var links = li.Descendants("a")
                                        .Select(a => a.InnerText)
                                        .ToList();
                            // if (links.Count() == 0) continue;
                            if (links.Count() > 0)
                            {
                                foreach (var item in links)
                                {
                                    //Ajout de chaque item dans le row courant
                                    var item_row = new XElement("field", new XAttribute("name", "item_name"));
                                    item_row.Value = item.ToString();
                                    row.Add(item_row);
                                }
                                //break;
                            }
                        }
                    }
                }
                finally
                {
                }
                //
            }
            document.Save(@"C:\Users\LENOVO\Desktop\export_merchants_P99_FIX1.xml");
            MessageBox.Show("Done !");
        }
Exemple #18
0
        public void GetHtmlNotAsync(string url)
        {
            var url_       = url;
            var httpClient = new HttpClient();
            // var html = await httpClient.GetStringAsync(url_);  <<< If I would like to do it Async
            //var htmlDocument = new HtmlDocument();
            //htmlDocument.LoadHtml(html);

            HtmlWeb      web          = new HtmlWeb();
            HtmlDocument htmlDocument = web.Load(url_);

            IEnumerable <HtmlNode> radioStNameHtmlDoc = htmlDocument.DocumentNode.Descendants("h4")
                                                        .Where(node => node.Attributes.Contains("class") &&
                                                               node.Attributes["class"].Value.Contains("a"));
            HtmlNodeCollection radioUrlHtmlDoc = htmlDocument.DocumentNode.SelectNodes("//small[@class='hidden-xs']/a[@title='PLS Playlist File']");

            string[] arrWithNames = new string[radioStNameHtmlDoc.Count()];
            string[] arrWithUrls  = new string[radioUrlHtmlDoc.Count()];
            int      i            = 0;

            foreach (var Names in radioStNameHtmlDoc)
            {
                arrWithNames[i] = string.Format("{0}). {1}", i + 1, Names.InnerText);
                i = i + 1;
            }

            int j = 0;

            foreach (var Names in radioUrlHtmlDoc)
            {
                arrWithUrls[j] = string.Format("{0}). :{1}", j + 1, Names.OuterHtml);
                j = j + 1;
            }

            String SplitStr  = "playpls', '";
            String UrlString = string.Join(",", arrWithUrls);

            String[] splittedUrlString = UrlString.Split(SplitStr);
            splittedUrlString = splittedUrlString.Skip(1).ToArray();

            int k = 0;

            foreach (var urls in splittedUrlString)
            {
                if (urls.Contains('\''))
                {
                    int index = urls.IndexOf('\'');
                    arrWithUrls[k] = urls.Substring(0, index);
                    k = k + 1;
                }
            }
            int l = 0;

            foreach (var urlsm3u in arrWithUrls)
            {
                if (urlsm3u.Contains("listen.pls"))
                {
                    arrWithUrls[l] = urlsm3u.Replace("listen.pls", "m3u^");
                    int index = arrWithUrls[l].IndexOf('^');
                    arrWithUrls[l] = arrWithUrls[l].Substring(0, index);
                }
                l = l + 1;
            }

            radiostationsName = arrWithNames;
            radiostationsUrl  = arrWithUrls;
        }
Exemple #19
0
        public static List <EnvironmentCanadaData> Fore24hourAtButtonville(bool fromCache)
        {
            string s = "", sDate = "",
                   html = fromCache ?
                          WebScraper.GetHtmlFromCacheOrWeb(_urlFore24hr) :
                          WebScraper.GetHtmlFromWeb(_urlFore24hr);

            List <EnvironmentCanadaData> ecdList = new List <EnvironmentCanadaData>();

            if (html == null || html.Length < 25000)
            {
                return(ecdList);
            }

            HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(html); //note: var doc = new HtmlWeb().Load(file); allows  "//table/tbody/tr"

            foreach (HtmlNode tr in doc.DocumentNode.SelectNodes("//table/tbody"))
            {
                //77 Debug.WriteLine("\n============ {0}:", tr.ChildNodes.Count());

                foreach (HtmlNode r in tr.ChildNodes)//.Where(n => n.ChildNodes.Count() > 0))
                {
                    HtmlNodeCollection c = r.ChildNodes;
                    //77 Debug.WriteLine("\n------------        {0}:", c.Count());
                    int i = 0;
                    foreach (HtmlNode t in c)
                    {
                        Debug.Write(string.Format("{0,3}:{1} \t", i++, t.InnerText.Trim()));
                    }
                    if (c.Count() == 3)
                    {
                        sDate = c[1].InnerText.Trim();
                    }
                    else if (c.Count() >= 11)
                    {
                        try
                        {
                            EnvironmentCanadaData e4 = new EnvironmentCanadaData {
                                TempAir = -999
                            };
                            e4.TakenAt = Convert.ToDateTime(sDate + ' ' + c[1].InnerText);

                            e4.TempAir = double.Parse(c[3].InnerText.Trim(' ').Trim('\n').Trim(' ').Trim('\n').Trim('↑').Trim('↓').Trim());
                            if (c.Count() > 11 && double.TryParse(c[11].InnerText, out double dbl))
                            {
                                e4.Humidex = dbl;
                            }
                            e4.Conditions = c[5].InnerText.Trim();

                            //e4.Humidity = double.Parse(c[7].InnerText);
                            //e4.DewPoint = double.Parse(c[9].InnerText);
                            //e4.Pressure = double.Parse(c[13].InnerText);
                            //e4.Visibility = double.Parse(c[15].InnerText);

                            s = c[9].InnerText.Trim();
                            string[] w = s.Split(new char[] { ' ', ' ', '\t', '\n' }, StringSplitOptions.RemoveEmptyEntries);
                            switch (w.Length)
                            {
                            case 1: e4.WindDir = w[0]; e4.WindKmH = 0; break;

                            case 2: e4.WindDir = w[0]; e4.WindKmH = Convert.ToDouble(w[1]); break;

                            case 3: break;

                            case 4: e4.WindDir = w[0]; e4.WindKmH = Convert.ToDouble(w[1]); e4.WindGust = Convert.ToDouble(w[3]); break;

                            default: break;
                            }

                            if (e4 != null && e4.TempAir != -999)
                            {
                                ecdList.Add(e4);
                            }
                        }
                        catch (Exception ex) { Trace.WriteLine(ex.Message, ">>> " + System.Reflection.MethodInfo.GetCurrentMethod().DeclaringType.Name + "." + System.Reflection.MethodInfo.GetCurrentMethod().Name); }
                    }
                }
            }

            //old: for (int curpos = 0, i = 0; i < 25; i++)			{				var  e = process1hourButtonvilleLikeEntry(ref s, ref sDate, ref html, ref curpos);				if (e != null && e.Pressure > 0)					ecdList.Add(e);			}

            //`for (int i = 0; i < ecdList.Count; i++) Console.WriteLine("{0,2}) {1}", i, ecdList[i].ToString());

            return(ecdList);
        }
Exemple #20
0
        /// <summary>
        /// 下载html
        /// </summary>
        public void DownLoadHtml()
        {
            string sqlStr = "select Url from IcookMenu where IsDownload = 0";

            object[] menuUrlObj = sh.GetField(sqlStr);
            string   title, fullFoldPath = string.Empty;

            HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();

            ArrayList mainAList = hh.GetHtmlData(mainUrl, cookie);
            int       htmlCount = 0;

            foreach (var menuUrl in menuUrlObj)
            {
                try
                {
                    ArrayList menuResList = hh.GetHtmlData(menuUrl.ToString(), cookie);
                    doc.LoadHtml(menuResList[1].ToString());

                    HtmlNode h1Node = doc.DocumentNode.SelectSingleNode("//div[@class='recipe-details-header-title']/h1[@class='title']");
                    title        = myUtils.StringConvert(h1Node.InnerText).Trim();//标题
                    title        = myUtils.FilterPath(title);
                    fullFoldPath = outPath + title + @"\";
                    if (!Directory.Exists(fullFoldPath))         //判断是否存在
                    {
                        Directory.CreateDirectory(fullFoldPath); //创建新路径
                    }
                    HtmlNode headerNode  = doc.DocumentNode.SelectSingleNode("//div[@class='recipe-details-header recipe-details-block']");
                    HtmlNode headerChild = doc.DocumentNode.SelectSingleNode("//div[@class='recipe-details-header recipe-details-block']/div[@class='header-row center-row']");
                    HtmlNode rightChild  = doc.DocumentNode.SelectSingleNode("//div[@class='recipe-details-header recipe-details-block']/div[@class='header-row center-row']/div[@class='header-col right-col']");
                    headerChild.RemoveChild(rightChild);//删除右边

                    HtmlNode headerImgParentNode = doc.DocumentNode.SelectSingleNode("//div[@class='recipe-details-header recipe-details-block']/div[@class='header-row center-row']/div[@class='header-col left-col']/div[@class='recipe-cover']");
                    HtmlNode headerImgChild      = doc.DocumentNode.SelectSingleNode("//img[@class='main-pic']");
                    string   headerImgSrc        = headerImgChild.GetAttributeValue("src", "");
                    myUtils.DownLoadImage(headerImgSrc, fullFoldPath + @"图片1.jpg", cookie);
                    headerImgParentNode.RemoveAllChildren();

                    HtmlNode newheaderImgNode = doc.CreateElement("div");
                    newheaderImgNode.InnerHtml = $"图片{1}";
                    headerImgParentNode.AppendChild(newheaderImgNode);

                    string headerHtml = headerNode.InnerHtml;//头部内容

                    HtmlNode mainNode  = doc.DocumentNode.SelectSingleNode("//div[@class='recipe-details-main']");
                    HtmlNode mainChild = doc.DocumentNode.SelectSingleNode("//div[@class='recipe-details-main']/div[@class='recipe-ad-placeholder']");
                    mainNode.RemoveChild(mainChild);

                    HtmlNodeCollection imgParentNodeList = doc.DocumentNode.SelectNodes("//div[@class='step-cover']");
                    int totalImgCount = 0, index = 0;
                    if (imgParentNodeList != null)
                    {
                        totalImgCount = imgParentNodeList.Count();
                        for (int i = 1; i < imgParentNodeList.Count + 1; i++)
                        {
                            try
                            {
                                HtmlNode imgChildNode = imgParentNodeList[i - 1].SelectSingleNode("a");
                                string   imgUrl       = imgChildNode.GetAttributeValue("href", "").Replace("medium_", "large_");
                                myUtils.DownLoadImage(imgUrl, fullFoldPath + $"图片{i + 1}.jpg", cookie);
                                imgParentNodeList[i - 1].RemoveAllChildren();
                                HtmlNode newImgNode = doc.CreateElement("div");
                                newImgNode.InnerHtml = $"图片{i + 1}";
                                imgParentNodeList[i - 1].AppendChild(newImgNode);
                                index++;
                            }
                            catch (Exception ex)
                            {
                                myUtils.WriteLog(ex);
                            }
                        }
                    }

                    string mainStr = mainNode.InnerHtml;//主题内容
                    string allStr = headerHtml + mainStr;

                    // sqlStr = $"UPDATE IcookMenu SET Title = '{title}', Html = '{allStr}' WHERE Url = '{menuUrl}'";
                    sqlStr = $"UPDATE IcookMenu SET Title = '{title}' WHERE Url = '{menuUrl}'";
                    sh.RunSql(sqlStr);
                    bool isOk = false;
                    if (totalImgCount == index)
                    {
                        if (myUtils.TransToWord(allStr, title, fullFoldPath))
                        {
                            if (myUtils.InsertPictureToWord(outPath, title))
                            {
                                sqlStr = $"UPDATE IcookMenu SET IsDownload = 1 WHERE Url = '{menuUrl}'";
                                sh.RunSql(sqlStr);
                                htmlCount++;
                                myUtils.UpdateLabel(label3, htmlCount);
                                myUtils.UpdateListBox(listBox1, title);
                                isOk = true;
                            }
                        }
                    }

                    if (!isOk)
                    {
                        if (Directory.Exists(fullFoldPath))
                        {
                            Directory.Delete(fullFoldPath, true);
                        }
                    }
                }
                catch (Exception e)
                {
                    myUtils.WriteLog(e);
                }
            }
        }
Exemple #21
0
        /// <summary>
        /// try to get a feed form a url using the given definition
        /// </summary>
        /// <param name="pageUrl"></param>
        /// <param name="feed"></param>
        /// <param name="webPageFeedDef"></param>
        public void Read(string pageUrl, ref Feed feed, WebPageFeedDef webPageFeedDef)
        {
            feed = null;

            if (webPageFeedDef != null)
            {
                if (!pageUrl.ToLower().Contains(webPageFeedDef.BaseURL.ToLower()))
                {
                    return;
                }
            }
            else
            {
                return;
            }

            bool loadSuccess = false;

            string content = "";

            /*
             * try
             * {
             *  //try to download the main page
             *  content = new System.Net.WebClient().DownloadString(pageUrl);
             *  //Console.WriteLine(content);
             *  loadSuccess = true;
             * }
             * catch (Exception ex)
             * {
             *  Console.WriteLine("Error while getting feed-webpage: " + Environment.NewLine + ex.Message);
             * }
             */
            if (loadSuccess)
            {
                var htmlDoc = new HtmlAgilityPack.HtmlDocument()
                {
                    OptionAutoCloseOnEnd = true,
                    OptionFixNestedTags  = true
                };

                htmlDoc.LoadHtml(content);
                try
                {
                    Clipboard.SetText(content);
                }
                catch (Exception ex)
                {
                    Console.WriteLine(ex.Message);
                    //Console.WriteLine(content);
                }

                if (htmlDoc.DocumentNode != null)
                {
                    feed         = new Feed();
                    feed.FeedURL = pageUrl;

                    //get title (if not properly loaded already)
                    HtmlNode titleNode = htmlDoc.DocumentNode.Descendants("title").FirstOrDefault();
                    if (titleNode != null)
                    {
                        feed.Title = System.Web.HttpUtility.HtmlDecode(titleNode.InnerText);
                    }
                    else
                    {
                        Console.WriteLine("titlenode is null!");
                    }

                    //get feeditems
                    string classID_Title      = webPageFeedDef.ClassID_Title;
                    string classID_UpdateTime = webPageFeedDef.ClassID_UpdateTime;

                    HtmlNodeCollection titleNodes = htmlDoc.DocumentNode.SelectNodes(classID_Title);
                    HtmlNodeCollection timeNodes  = htmlDoc.DocumentNode.SelectNodes(classID_UpdateTime);



                    try
                    {
                        if (titleNodes != null && timeNodes != null)
                        {
                            if (timeNodes.Count > 0 && titleNodes.Count > 0)
                            {
                                feed.Items = new List <FeedItem>();

                                //create List of items from detected entries with their upload-Dates
                                for (int i = 0; i < Math.Min(timeNodes.Count, titleNodes.Count); i++)
                                {
                                    string suburl = titleNodes[i].OuterHtml;

                                    if (suburl.Contains("/"))
                                    {
                                        suburl = suburl.Remove(0, titleNodes[i].OuterHtml.IndexOf("/"));
                                    }


                                    string url = webPageFeedDef.BaseURL;

                                    if (suburl.Contains("\""))
                                    {
                                        url += suburl.Remove(suburl.IndexOf("\""));
                                    }


                                    string title = System.Web.HttpUtility.HtmlDecode(titleNodes[i].InnerHtml);

                                    //search for feeditem-title in the innerHtml
                                    foreach (string titleMarker in titleMarkers)
                                    {
                                        if (title.ToLower().Contains(titleMarker))
                                        {
                                            title = title.Remove(0, title.ToLower().LastIndexOf(titleMarker) + titleMarker.Length);
                                            title = title.Remove(title.IndexOf("\""));
                                        }
                                    }


                                    DateTime updateTime = getDateTime(timeNodes[i].InnerHtml);

                                    FeedItem item = new FeedItem();
                                    item.Id             = url;
                                    item.Link           = url;
                                    item.PublishingDate = updateTime;
                                    item.Read           = false;
                                    item.Title          = title;

                                    feed.Items.Add(item);
                                }

                                feed.Updated = true;
                            }
                        }
                        else
                        {
                            Console.WriteLine("FeedURL: " + feed.FeedURL + "  Title = " + feed.Title + ": no Chapter-Nodes detected. DetectionText: '" + classID_Title + "'");

                            if (timeNodes == null)
                            {
                                Console.WriteLine("TimeNodes is null.");
                            }
                            else
                            {
                                Console.WriteLine("Number of timenodes = " + timeNodes.Count());
                            }
                        }
                    }
                    catch (Exception ex)
                    {
                        Console.WriteLine("Innerexception 1: " + ex.Message);
                    }
                }
            }
        }
Exemple #22
0
        public static void Initiate(HashSet <String> trainingDocs = null)
        {
            if (trainingDocs == null)
            {
                trainingDocsNames = new HashSet <String>(allDocsNames);
            }
            else
            {
                //Set the set of training documents names
                trainingDocsNames = trainingDocs;
            }
            //Reset the Dom Pool Vars
            TargetNodes          = new HashSet <HtmlNode>();
            TargetNodesPrecision = new HashSet <HtmlNode>();
            NonTargetNodes       = new HashSet <HtmlNode>();

            TESTTargetNodes          = new HashSet <HtmlNode>();
            TESTTargetNodesPrecision = new HashSet <HtmlNode>();
            TESTNonTargetNodes       = new HashSet <HtmlNode>();

            TESTSeenTargetNodes          = new HashSet <HtmlNode>();
            TESTSeenTargetNodesPrecision = new HashSet <HtmlNode>();
            TESTSeenNonTargetNodes       = new HashSet <HtmlNode>();
            //Reset the query result cache
            queryResultCache = new Dictionary <string, HashSet <HtmlNode> >();

            int minSelected = 100;

            foreach (String srcDomName in trainingDocsNames)
            {
                HtmlNode srcDom = null;
                docsAndNames.TryGetValue(srcDomName, out srcDom);
                HtmlNodeCollection selected = srcDom.SelectNodes("//*[@" + selectionAttribute + "]");
                if (selected == null || selected.Count <= 0)
                {
                    continue;
                }
                if (selected.Count() < minSelected)
                {
                    minSelected = selected.Count();
                }
            }

            foreach (String srcDomName in trainingDocsNames)
            {
                HtmlNode srcDom = null;
                docsAndNames.TryGetValue(srcDomName, out srcDom);
                HtmlNodeCollection selected = srcDom.SelectNodes("//*[@" + selectionAttribute + "]");
                if (selected == null)
                {
                    continue;
                }
                TargetNodes.UnionWith(selected.Take(minSelected));
                TargetNodesPrecision.UnionWith(selected);
                HtmlNodeCollection selectedChildren = srcDom.SelectNodes("//*[@" + selectionAttribute + "]//* | //*[@" + optionalSelectionAttribute + "] | //*[@" + optionalSelectionAttribute + "]//*");
                if (selectedChildren != null)
                {
                    TargetNodesPrecision.UnionWith(selectedChildren);
                }
                //select the rest and add them to
                HtmlNodeCollection all       = srcDom.SelectNodes("//*");
                HashSet <HtmlNode> nonTarget = new HashSet <HtmlNode>(all);
                nonTarget.ExceptWith(selected);
                if (selectedChildren != null)
                {
                    nonTarget.ExceptWith(selectedChildren);
                }
                NonTargetNodes.UnionWith(nonTarget);
            }

            foreach (String srcDomName in allDocsNames.Except(trainingDocsNames))
            {
                HtmlNode srcDom = null;
                docsAndNames.TryGetValue(srcDomName, out srcDom);
                HtmlNodeCollection selected = srcDom.SelectNodes("//*[@" + selectionAttribute + "]");
                TESTTargetNodes.UnionWith(selected);
                TESTTargetNodesPrecision.UnionWith(selected);
                HtmlNodeCollection selectedChildren = srcDom.SelectNodes("//*[@" + selectionAttribute + "]//* | //*[@" + optionalSelectionAttribute + "] | //*[@" + optionalSelectionAttribute + "]//*");
                if (selectedChildren != null)
                {
                    TESTTargetNodesPrecision.UnionWith(selectedChildren);
                }
                //select the rest and add them to
                HtmlNodeCollection all       = srcDom.SelectNodes("//*");
                HashSet <HtmlNode> nonTarget = new HashSet <HtmlNode>(all);
                nonTarget.ExceptWith(selected);
                if (selectedChildren != null)
                {
                    nonTarget.ExceptWith(selectedChildren);
                }
                TESTNonTargetNodes.UnionWith(nonTarget);
            }

            if (testDocsAndNames.Count() > 0)
            {
                foreach (String srcDomName in testDocsAndNames.Keys.Intersect(trainingDocsNames))
                {
                    HtmlNode srcDom = null;
                    testDocsAndNames.TryGetValue(srcDomName, out srcDom);
                    HtmlNodeCollection selected = srcDom.SelectNodes("//*[@" + selectionAttribute + "]");
                    TESTSeenTargetNodes.UnionWith(selected);
                    TESTSeenTargetNodesPrecision.UnionWith(selected);
                    HtmlNodeCollection selectedChildren = srcDom.SelectNodes("//*[@" + selectionAttribute + "]//* | //*[@" + optionalSelectionAttribute + "] | //*[@" + optionalSelectionAttribute + "]//*");
                    if (selectedChildren != null)
                    {
                        TESTSeenTargetNodesPrecision.UnionWith(selectedChildren);
                    }
                    //select the rest and add them to
                    HtmlNodeCollection all       = srcDom.SelectNodes("//*");
                    HashSet <HtmlNode> nonTarget = new HashSet <HtmlNode>(all);
                    nonTarget.ExceptWith(selected);
                    if (selectedChildren != null)
                    {
                        nonTarget.ExceptWith(selectedChildren);
                    }
                    TESTSeenNonTargetNodes.UnionWith(nonTarget);
                }
            }
        }
        public static void GetPoolData()
        {
            string  response  = string.Empty;
            Boolean header    = true;
            string  sheetName = "Sheet_17_04_2018_12_25";

            var service = CreateGoogleSheets(sheetName);

            List <KeyValuePair <string, string> > competitionList = new List <KeyValuePair <string, string> >();

            try
            {
                HttpWebRequest webRequest = (HttpWebRequest)WebRequest.Create("http://websites.sportstg.com/comp_info.cgi?round=1&a=ROUND&client=0-11971-0-487287-0&pool=1");
                webRequest.Method    = "GET";
                webRequest.Host      = webRequest.RequestUri.Host;
                webRequest.UserAgent = "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36";

                HttpWebResponse webResponse = (HttpWebResponse)webRequest.GetResponse();
                StreamReader    stream      = new StreamReader(webResponse.GetResponseStream());
                response = stream.ReadToEnd();
                stream.Dispose();

                HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument();
                htmlDoc.LoadHtml(response);

                HtmlNodeCollection competitionNodeList = htmlDoc.DocumentNode.SelectNodes("//select[@id='compselectbox']");

                if (competitionNodeList != null)
                {
                    if (competitionNodeList.Count() > 0)
                    {
                        for (int x = 0; x < competitionNodeList.Count(); x++)
                        {
                            HtmlNodeCollection subCompetitionNodeList = competitionNodeList[x].SelectNodes(".//option");

                            for (int y = 0; y < subCompetitionNodeList.Count(); y++)
                            {
                                string competitionName = subCompetitionNodeList[y].InnerText.Replace("&nbsp;", "").Trim().ToString();
                                string competitionCode = subCompetitionNodeList[y].Attributes["value"].Value.Trim().ToString();
                                if (competitionCode != "")
                                {
                                    string teamUrl = "http://websites.sportstg.com/comp_info.cgi?c=" + competitionCode + "&a=ROUND#";
                                    competitionList.Add(new KeyValuePair <string, string>(competitionName, teamUrl));
                                }
                            }
                        }
                    }
                }

                if (competitionList.Count() > 0)
                {
                    foreach (var item in competitionList)
                    {
                        if (item.Value != "")
                        {
                            List <KeyValuePair <string, string> > poolList  = new List <KeyValuePair <string, string> >();
                            List <KeyValuePair <string, string> > roundList = new List <KeyValuePair <string, string> >();
                            string poolResponse = PoolHandler.GetResponseFromUrl(item.Value);

                            HtmlAgilityPack.HtmlDocument htmlnewDoc = new HtmlAgilityPack.HtmlDocument();
                            htmlnewDoc.LoadHtml(poolResponse);

                            HtmlNodeCollection htmlPools = htmlnewDoc.DocumentNode.SelectNodes("//div[@class='fixoptions']//div[@class='nonactpool-wrap']//a");

                            #region Retrive Pools List
                            if (htmlPools != null)
                            {
                                if (htmlPools.Count() > 0)
                                {
                                    for (int j = 0; j < htmlPools.Count(); j++)
                                    {
                                        string poolTitle = htmlPools[j].InnerText.Trim().ToString();
                                        string poolUrl   = htmlPools[j].Attributes["href"].Value.ToString();
                                        if (poolTitle != "Final" && poolUrl != "#")
                                        {
                                            poolList.Add(new KeyValuePair <string, string>(poolTitle, "http://websites.sportstg.com/" + poolUrl.Replace("amp;", "").Trim().ToString()));
                                        }
                                    }
                                }
                            }
                            #endregion

                            if (poolList.Count() > 0)
                            {
                                foreach (var poolItem in poolList)
                                {
                                    string pool_Name = poolItem.Key;
                                    if (!string.IsNullOrEmpty(poolItem.Value))
                                    {
                                        string subPoolResponse = PoolHandler.GetResponseFromUrl(poolItem.Value);

                                        HtmlAgilityPack.HtmlDocument htmlnewDocA = new HtmlAgilityPack.HtmlDocument();
                                        htmlnewDocA.LoadHtml(subPoolResponse);

                                        HtmlNodeCollection currentRoundNode = htmlnewDocA.DocumentNode.SelectNodes("//div[@class='roundlist']//span[@data-rd]");
                                        HtmlNodeCollection roundsNodes      = htmlnewDocA.DocumentNode.SelectNodes("//div[@class='roundlist']//a[@data-rd]");

                                        if (currentRoundNode != null)
                                        {
                                            if (currentRoundNode.Count() > 0)
                                            {
                                                string roundValue = currentRoundNode[0].Attributes["data-rd"].Value.Trim().ToString();
                                                string roundUrl   = poolItem.Value;
                                                if (roundValue != "")
                                                {
                                                    roundList.Add(new KeyValuePair <string, string>("Round " + roundValue, roundUrl.Replace("round=0", "round=" + roundValue).Replace("action=ROUND", "a=ROUND")));
                                                }
                                            }
                                        }

                                        if (roundsNodes != null)
                                        {
                                            if (roundsNodes.Count() > 0)
                                            {
                                                for (int rndCount = 0; rndCount < roundsNodes.Count(); rndCount++)
                                                {
                                                    if (roundsNodes[rndCount].Attributes.Contains("data-rd"))
                                                    {
                                                        string roundValue = roundsNodes[rndCount].Attributes["data-rd"].Value.Trim().ToString();
                                                        string roundUrl   = roundsNodes[rndCount].Attributes["href"].Value.Trim().Replace("amp;", "").Trim().ToString();
                                                        if (!string.IsNullOrEmpty(roundValue))
                                                        {
                                                            roundList.Add(new KeyValuePair <string, string>("Round " + roundValue, "http://websites.sportstg.com/" + roundUrl));
                                                        }
                                                    }
                                                }
                                            }
                                        }

                                        if (roundList.Count > 0)
                                        {
                                            foreach (var roundItem in roundList.OrderBy(x => x.Key))
                                            {
                                                string roundResponse = PoolHandler.GetResponseFromUrl(roundItem.Value);

                                                int startIndex = roundResponse.IndexOf("var matches =");
                                                int endIndex   = roundResponse.LastIndexOf("];</script>");

                                                if (startIndex > 0 && endIndex > 0)
                                                {
                                                    string jsonString = roundResponse.Substring(startIndex, endIndex - startIndex + 5);
                                                    jsonString = jsonString.Replace("var matches =", "").Replace(";</s", "").Trim().ToString();

                                                    if (!string.IsNullOrEmpty(jsonString))
                                                    {
                                                        List <RootObject>       teamsList  = new List <RootObject>();
                                                        List <MatchMasterModel> masterList = new List <MatchMasterModel>();
                                                        teamsList = JsonConvert.DeserializeObject <List <RootObject> >(jsonString);

                                                        if (teamsList.Count() > 0)
                                                        {
                                                            masterList = teamsList.Select(x => new MatchMasterModel()
                                                            {
                                                                matchDate    = x.TimeDateRaw,
                                                                roundno      = x.Round,
                                                                fieldno      = x.VenueName.Replace("Field", "").Replace("&nbsp;", "").Trim().ToString(),
                                                                divisionName = x.CompName,
                                                                poolName     = pool_Name.Replace("Pool", "").Trim().ToString(),
                                                                teamA        = x.HomeName,
                                                                versus       = "V",
                                                                teamB        = x.AwayName,
                                                                halveA       = string.Empty,
                                                                halveB       = string.Empty
                                                            }).ToList();

                                                            List <IList <Object> > objNewRecords = new List <IList <Object> >();

                                                            IList <Object> obj = new List <Object>();

                                                            if (header == true)
                                                            {
                                                                obj.Add("Time");
                                                                obj.Add("Round");
                                                                obj.Add("Field");
                                                                obj.Add("Division");
                                                                obj.Add("Pool");
                                                                obj.Add("Team");
                                                                obj.Add("v");
                                                                obj.Add("Team");
                                                                obj.Add("Halve 1");
                                                                obj.Add("Halve 2");
                                                                objNewRecords.Add(obj);
                                                            }
                                                            foreach (var a in masterList)
                                                            {
                                                                obj = new List <Object>();
                                                                obj = GenerateData(a, service);
                                                                if (obj != null)
                                                                {
                                                                    objNewRecords.Add(obj);
                                                                }
                                                            }

                                                            string newRange = GetRange(service, sheetName);

                                                            AppendGoogleSheetinBatch(objNewRecords, spreadSheetId, newRange, service);

                                                            if (header == true)
                                                            {
                                                                //The formatHeaderField method will format the header as user wants to.
                                                                formatHeaderField(service, sheetName);
                                                                header = false;
                                                            }
                                                        }
                                                    }
                                                }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            catch (Exception ex)
            {
            }
        }
Exemple #24
0
        public void Run(DateTime day)
        {
            bool is25hours = (day.Month == 10 && isLastSunday(day));
            bool is23hours = !is25hours && (day.Month == 3 && isLastSunday(day));

            string URL = _baseURL + day.ToString("yyyy-MM-dd") + "/FR";

            try
            {
                _htmlDoc.LoadHtml(_webClient.DownloadString(URL));

                //ottengo l'array delle date visualizzate
                HtmlNode        dateRow = _htmlDoc.DocumentNode.SelectSingleNode("//div[@id='tab_fr']//table[@class='list hours responsive']//tr");
                List <DateTime> days    = new List <DateTime>();
                foreach (HtmlNode col in dateRow.SelectNodes("th"))
                {
                    DateTime d = new DateTime();
                    if (DateTime.TryParseExact(col.InnerText + " " + day.Year, "ddd, dd/MM yyyy", new CultureInfo("en-US"), DateTimeStyles.None, out d))
                    {
                        days.Add(d);
                    }
                }

                KeyValuePair <string, int>[] tabIDs = new KeyValuePair <string, int>[]
                {
                    new KeyValuePair <string, int>("tab_fr", 987),
                    new KeyValuePair <string, int>("tab_de", 924),
                    new KeyValuePair <string, int>("tab_ch", 988)
                };

                foreach (KeyValuePair <string, int> tabID in tabIDs)
                {
                    HtmlNodeCollection tab = _htmlDoc.DocumentNode.SelectNodes("//div[@id='" + tabID.Key + "']//table[@class='list hours responsive']//tr[@class='no-border']");

                    //la mia data ha 24 ore ma la tabella contiene anche la riga della 25-esima
                    if (!is25hours && tab.Count() == 25)
                    {
                        tab.RemoveAt(3);
                    }

                    DataTable dt = initTable();

                    int i     = 0;
                    int index = days.IndexOf(day);
                    foreach (HtmlNode row in tab)
                    {
                        //seleziono il valore che mi interessa dalla tabella sapendo che index è 0-based e che le prime 2 colonne sono di intestazione
                        HtmlNode mgpVal = row.SelectSingleNode("td[" + (3 + index) + "]");
                        DataRow  newRow = dt.NewRow();

                        newRow["Zona"] = tabID.Value;
                        newRow["Data"] = day.ToString("yyyyMMdd") + (++i < 10 ? "0" : "") + i;
                        newRow["Mgp"]  = 0;
                        decimal tmp;
                        if (Decimal.TryParse(mgpVal.InnerText.Replace('.', ','), out tmp))
                        {
                            newRow["MGP"] = tmp;
                        }

                        dt.Rows.Add(newRow);
                    }

                    if (dt.Rows.Count > 0)
                    {
                        //scrivo la tabella all'interno del caricatore
                        string path = Path.Combine(_basePath, day.ToString("yyyyMMdd") + "_" + tabID.Value + ".xml");
                        dt.WriteXml(path);
                    }
                }
            }
            catch (Exception)
            {
            }
        }
Exemple #25
0
        private bool ScrapeEuroGirlsEscort(string urlToScrape)
        {
            try
            {
                List <string> ProfileURLs = new List <string>();

                using WebClient client = new WebClient();
                client.Headers.Add("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0");
                client.Headers.Add(HttpRequestHeader.Cookie, "over18=1");

                string htmlCode = client.DownloadString(urlToScrape);

                var doc = new HtmlDocument();
                doc.LoadHtml(htmlCode);

                HtmlNodeCollection parentItem = doc.DocumentNode.SelectNodes("//div[contains(@class, 'list-items')]");

                if (parentItem.Count() == 1)
                {
                    HtmlNodeCollection childDivs = parentItem.First().SelectNodes(".//div");

                    foreach (HtmlNode node in childDivs)
                    {
                        HtmlNodeCollection profileURLNodes = node.SelectNodes(".//a[@href]");

                        if (profileURLNodes != null)
                        {
                            foreach (HtmlNode profileNode in profileURLNodes.Where(x => x.InnerHtml != null))
                            {
                                string hrefValue = profileNode.GetAttributeValue("href", string.Empty);

                                string url = "https://" + new Uri(urlToScrape).Host + hrefValue;
                                ProfileURLs.Add(url);
                            }
                        }
                    }

                    foreach (string url in ProfileURLs)
                    {
                        try
                        {
                            string finalInfo = "";

                            htmlCode = browser.Get(url, true, Properties.Resources.EuroGirlsEscort);

                            finalInfo += url + newLine;

                            doc = new HtmlDocument();
                            doc.LoadHtml(htmlCode);

                            HtmlNode parentProfile   = doc.GetElementbyId("main-content");
                            HtmlNode descriptionNode = parentProfile.SelectSingleNode(".//div[@class='description']");
                            HtmlNode nameNode        = descriptionNode.SelectSingleNode(".//h1");

                            string nameStr = nameNode.InnerText.Replace("\n", "").Replace("\r", "").Trim().Replace("  ", " ");

                            if (nameStr.Contains(","))
                            {
                                string Name       = nameStr.Split(',')[0];
                                string Afiliation = nameStr.Split(',')[1];

                                finalInfo += "Name:" + Name + newLine;
                                finalInfo += "Affiliation:" + Afiliation + newLine;
                            }
                            else
                            {
                                finalInfo += "Name:" + nameStr + newLine;
                            }

                            HtmlNode profileParent = parentProfile.SelectSingleNode(".//a[contains(@class, 'js-gallery')]");

                            string imageURL = profileParent.GetAttributeValue("href", string.Empty);
                            ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls12;
                            client.DownloadFile(new Uri(imageURL), Path.Combine(AppPath, "Images/" + imageURL.Split('/').Last()));

                            HtmlNode           infoNodes       = parentProfile.SelectSingleNode(".//div[@class='params']");
                            HtmlNodeCollection singleInfoNodes = infoNodes.SelectNodes(".//div");

                            foreach (HtmlNode lineNode in singleInfoNodes)
                            {
                                string lineInfo = lineNode.InnerText.Replace("\n", "").Replace("\r", "").Trim().Replace("  ", " ");
                                finalInfo += lineInfo + newLine;
                            }

                            HtmlNode phoneNode = parentProfile.SelectSingleNode(".//a[contains(@class, 'js-phone')]");

                            string phoneNumber = phoneNode.InnerText.Replace("&nbsp;", " ");
                            finalInfo += "Phone number:" + phoneNumber + newLine;

                            try
                            {
                                HtmlNode ratesNodes = parentProfile.SelectSingleNode(".//div[@class='rates']");

                                if (ratesNodes != null)
                                {
                                    HtmlNode ratesTableNodes = ratesNodes.SelectSingleNode(".//tbody");

                                    HtmlNodeCollection ratesLines = ratesTableNodes.SelectNodes(".//tr");

                                    finalInfo += "Rates:";

                                    foreach (HtmlNode rateLine in ratesLines)
                                    {
                                        string rate = rateLine.InnerText.Replace("&nbsp;", " ").Replace("\n", "-").Replace("\r", "-").Trim();

                                        rate = rate.Replace("--", "-").Replace("--", "-");

                                        finalInfo += rate;

                                        if (rateLine != ratesLines.Last())
                                        {
                                            finalInfo += " | ";
                                        }
                                    }

                                    finalInfo += newLine;
                                }
                            }
                            catch { }

                            try
                            {
                                HtmlNode servicesNodes = parentProfile.SelectSingleNode(".//div[@class='services']");

                                if (servicesNodes != null)
                                {
                                    HtmlNode servicesTableNodes = servicesNodes.SelectSingleNode(".//tbody");

                                    HtmlNodeCollection servicesLines = servicesNodes.SelectNodes(".//tr");

                                    finalInfo += "Services:";

                                    foreach (HtmlNode serviceLine in servicesLines)
                                    {
                                        string service = serviceLine.InnerText.Replace("&nbsp;", " ").Replace("\n", "-").Replace("\r", "-").Trim();

                                        service = service.Replace("--", "-").Replace("--", "-").Replace("--", "-");

                                        if (service == "-Services-Included-Extra-")
                                        {
                                            continue;
                                        }

                                        finalInfo += service;

                                        if (serviceLine != servicesLines.Last())
                                        {
                                            finalInfo += " | ";
                                        }
                                    }
                                    finalInfo += newLine;
                                }
                            }
                            catch { }

                            finalInfo = finalInfo.Trim();

                            string profilesPath = Path.Combine(AppPath, "Profiles");

                            if (!Directory.Exists(profilesPath))
                            {
                                Directory.CreateDirectory(profilesPath);
                            }

                            string ProfilePath = Path.Combine(profilesPath, nameStr);

                            if (Directory.Exists(ProfilePath))
                            {
                                Directory.Delete(ProfilePath, true);
                            }

                            Directory.CreateDirectory(ProfilePath);

                            string profileTxtPath = Path.Combine(ProfilePath, "profile.txt");

                            StreamWriter sw = new StreamWriter(profileTxtPath);
                            sw.WriteLine(finalInfo);
                            sw.Close();

                            HtmlNode imgNode = doc.GetElementbyId("js-gallery");

                            HtmlNodeCollection imgsNodes = imgNode.SelectNodes(".//a[@class='js-gallery']");

                            int i = 0;

                            foreach (HtmlNode imgElm in imgsNodes)
                            {
                                string imgURL = imgElm.GetAttributeValue("href", string.Empty);

                                client.DownloadFile(imgURL, Path.Combine(ProfilePath, $"{i}.jpg"));
                                i++;
                            }
                        }
                        catch (ThreadAbortException)
                        {
                            break;
                        }
                        catch (Exception ex)
                        {
                            LogError("Url invalid: " + url + ". Error: " + ex.ToString());
                            break;
                        }
                    }
                }
                else
                {
                    LogError($"Invalid parent item count, expected 1 got {parentItem.Count()}");
                }
            }
            catch (ThreadAbortException)
            {
                //nothing, stopped
            }
            catch (Exception ex)
            {
                LogError(ex.ToString());
            }
            finally
            {
                GC.Collect();
                GC.WaitForPendingFinalizers();
            }

            return(true);
        }
Exemple #26
0
        private bool ScrapeTopEscortBabes(string urlToScrape)
        {
            try
            {
                List <string> ProfileURLs = new List <string>();

                using WebClient client = new WebClient();
                client.Headers.Add("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0");
                client.Headers.Add(HttpRequestHeader.Cookie, "plus18=1");

                string htmlCode = client.DownloadString(urlToScrape);

                var doc = new HtmlDocument();
                doc.LoadHtml(htmlCode);

                HtmlNode           parentNode       = doc.GetElementbyId("homepage_right");
                HtmlNodeCollection itemParentsNodes = parentNode.SelectNodes(".//div[contains(@class, 'items')]");

                if (itemParentsNodes.Count() == 2)
                {
                    foreach (HtmlNode mainNode in itemParentsNodes)
                    {
                        HtmlNodeCollection profilesNodes = mainNode.SelectNodes(".//li");

                        if (profilesNodes != null)
                        {
                            foreach (HtmlNode singleProfileNode in profilesNodes)
                            {
                                HtmlNode hrefNode  = singleProfileNode.SelectSingleNode(".//a[@href]");
                                string   hrefValue = hrefNode.GetAttributeValue("href", string.Empty);

                                if (string.IsNullOrWhiteSpace(hrefValue))
                                {
                                    continue;
                                }

                                ProfileURLs.Add(hrefValue);
                            }
                        }
                    }

                    foreach (string url in ProfileURLs)
                    {
                        try
                        {
                            string finalInfo = "";

                            htmlCode = browser.Get(url, true, Properties.Resources.TopEscortBabes);

                            Log(url);

                            finalInfo += url + newLine;

                            doc = new HtmlDocument();
                            doc.LoadHtml(htmlCode);

                            HtmlNode mainParentNode = doc.GetElementbyId("homepage");

                            HtmlNode headerNode = mainParentNode.SelectSingleNode(".//div[@class='profile-cover']");
                            HtmlNode titleNode  = headerNode.SelectSingleNode(".//h2[@class='header-title']");

                            string profileName = titleNode.InnerText.Replace("\n", "").Replace("\r", "").Trim().Replace("  ", " ");

                            finalInfo += "Name:" + profileName + newLine;

                            try
                            {
                                HtmlNode afiliationNode = doc.GetElementbyId("accord-agency");

                                if (afiliationNode == null)
                                {
                                    finalInfo += "Affiliation:Independent" + newLine;
                                }
                                else
                                {
                                    HtmlNode afiliationBodyNode = afiliationNode.SelectSingleNode(".//div");
                                    HtmlNode afiliationNameNode = afiliationBodyNode.SelectSingleNode(".//h4");

                                    if (afiliationNameNode == null)
                                    {
                                        finalInfo += "Affiliation:Independent" + newLine;
                                    }
                                    else
                                    {
                                        string afiliation = afiliationNameNode.InnerText.Replace("\n", "").Replace("\r", "").Trim().Replace("  ", " ");
                                        finalInfo += "Affiliation:" + afiliation + newLine;
                                    }
                                }
                            }
                            catch { }

                            HtmlNode personalNode = doc.GetElementbyId("accord-personal-data");
                            HtmlNode detailsNode  = personalNode.SelectSingleNode(".//div[contains(@class, 'detail-block-body')]");

                            HtmlNodeCollection detailLineNode = detailsNode.SelectNodes(".//div[@class='personal-data-item']");

                            foreach (HtmlNode lineNode in detailLineNode)
                            {
                                string infoLine = lineNode.InnerText.Replace("\n", "").Replace("\r", "").Trim().Replace("  ", " ");

                                RegexOptions options = RegexOptions.None;
                                Regex        regex   = new Regex("[ ]{2,}", options);
                                infoLine = regex.Replace(infoLine, ":");

                                infoLine = infoLine.Replace(":•:", ",");

                                finalInfo += infoLine + newLine;
                            }

                            try
                            {
                                HtmlNode           priceNode     = doc.GetElementbyId("prices");
                                HtmlNodeCollection priceListNode = priceNode.SelectNodes(".//div[@class='price-item']");

                                if (priceListNode != null)
                                {
                                    finalInfo += "Prices:";

                                    foreach (HtmlNode lineNode in priceListNode)
                                    {
                                        string infoLine = lineNode.InnerText.Replace("\n", "").Replace("\r", "").Trim().Replace("  ", " ");

                                        RegexOptions options = RegexOptions.None;
                                        Regex        regex   = new Regex("[ ]{2,}", options);
                                        infoLine = regex.Replace(infoLine, ":");

                                        infoLine = "-" + infoLine.Replace(" Price:", "-").Replace(":", " ") + "-";

                                        finalInfo += infoLine;

                                        if (lineNode != priceListNode.Last())
                                        {
                                            finalInfo += " | ";
                                        }
                                    }

                                    finalInfo += newLine;
                                }
                            }
                            catch { }

                            finalInfo = finalInfo.Trim();

                            string profilesPath = Path.Combine(AppPath, "Profiles");

                            if (!Directory.Exists(profilesPath))
                            {
                                Directory.CreateDirectory(profilesPath);
                            }

                            string ProfilePath = Path.Combine(profilesPath, profileName);

                            if (Directory.Exists(ProfilePath))
                            {
                                Directory.Delete(ProfilePath, true);
                            }

                            Directory.CreateDirectory(ProfilePath);

                            string profileTxtPath = Path.Combine(ProfilePath, "profile.txt");

                            StreamWriter sw = new StreamWriter(profileTxtPath);
                            sw.WriteLine(finalInfo);
                            sw.Close();

                            Log(ProfilePath);

                            //try
                            //{
                            //    HtmlNode mainImageNode =  doc.DocumentNode.SelectSingleNode(".//div[@class='profile-details-right']");
                            //    HtmlNode imagesParentNode = mainImageNode.SelectSingleNode(".//div[@class='photos-wrapper']");

                            //    if (imagesParentNode != null)
                            //    {
                            //        HtmlNodeCollection imagesNode = imagesParentNode.SelectNodes(".//a[@class='ilightbox']");

                            //        if (imagesNode != null)
                            //        {
                            //            int i = 0;
                            //            foreach (HtmlNode singleImageNode in imagesNode)
                            //            {
                            //                string imgURL = singleImageNode.GetAttributeValue("href", string.Empty);

                            //                Log(imgURL);
                            //                //Clipboard.SetText(imgURL);

                            //                client.DownloadFile(imgURL, Path.Combine(ProfilePath, $"{i}.jpg"));

                            //                i++;
                            //            }
                            //        }
                            //    }
                            //}
                            //catch { }
                        }
                        catch (ThreadAbortException)
                        {
                            break;
                        }
                        catch (Exception ex)
                        {
                            LogError("Url invalid: " + url + ". Error: " + ex.ToString());
                            break;
                        }
                    }
                }
                else
                {
                    LogError($"Invalid parent item count, expected 2 got {itemParentsNodes.Count()}");
                }
            }
            catch (ThreadAbortException)
            {
                //nothing, stopped
            }
            catch (Exception ex)
            {
                LogError(ex.ToString());
            }
            finally
            {
                GC.Collect();
                GC.WaitForPendingFinalizers();
            }

            return(true);
        }
Exemple #27
0
        public void run(string url, Encoding code)
        {
            _EnCode = code;
            Dictionary <string, EInfoSummery> detailList = new Dictionary <string, EInfoSummery>();

            try
            {
                string htmlstr = GetHtmlStr(url, code);
                HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
                doc.LoadHtml(htmlstr);
                HtmlNode rootnode    = doc.DocumentNode;
                string   xpathstring = "";
                if (!_IsFromIQB)
                {
                    xpathstring = "//a[text() ='最新技术']";
                    HtmlNodeCollection listAddr = rootnode.SelectNodes(xpathstring);
                    url    += listAddr[0].Attributes["href"].Value;
                    htmlstr = GetHtmlStr(url, Encoding.UTF8);
                    doc     = new HtmlAgilityPack.HtmlDocument();
                    doc.LoadHtml(htmlstr);
                    rootnode = doc.DocumentNode;
                }

                xpathstring = "//span[@class='list-title-size']";
                HtmlNodeCollection titles = rootnode.SelectNodes(xpathstring);    //所有找到的节点都是一个集合

                xpathstring = "//div[@class='cell-flat-content list-content-size']";
                HtmlNodeCollection ess = rootnode.SelectNodes(xpathstring);

                xpathstring = "//div[@class='image-container undefined ']";
                HtmlNodeCollection img = rootnode.SelectNodes(xpathstring);

                xpathstring = "//div[@class='cell-flat-time']";
                HtmlNodeCollection date = rootnode.SelectNodes(xpathstring);

                xpathstring = "//div[@class='cell-flat-eye']";
                HtmlNodeCollection rc = rootnode.SelectNodes(xpathstring);

                xpathstring = "//a[@class='cell-flat']";
                HtmlNodeCollection detail = rootnode.SelectNodes(xpathstring);


                using (Html5Content db = new Html5Content())
                {
                    for (int i = 0; i < titles.Count(); i++)
                    {
                        string detailUrl = "http://hjc025.xiaoyun.com" + detail[i].Attributes["href"].Value;
                        if (this.IsFilterUrls(detail[i].Attributes["href"].Value))
                        {
                            WriteOut("Url Filted -- " + detailUrl);
                            continue;
                        }
                        if (!db.IsExistSummery(detail[i].Attributes["href"].Value))
                        {
                            EInfoSummery es = new EInfoSummery();
                            es.ReadCount   = Convert.ToInt32(rc[i].InnerText.Replace("阅读", ""));
                            es.Title       = titles[i].InnerText;
                            es.Summery     = ess[i].InnerText;
                            es.PublishDate = this.FormatPublishDate(date[i].InnerText);
                            if (!_IsFromIQB)
                            {
                                es.CoverImg = img[i + 7].Attributes["src"].Value;
                            }
                            else
                            {
                                es.CoverImg = img[i + 9].Attributes["src"].Value;
                            }

                            es.OrigInfoId     = detail[i].Attributes["href"].Value;
                            es.CreateDateTime = DateTime.Now;
                            db.InfoSummery.Add(es);


                            //detailList.Add(detailUrl, es);

                            using (TransactionScope sc = new TransactionScope())
                            {
                                db.SaveChanges();
                                EInfoDetail ed = this.AnalyDetail(detailUrl, es);
                                db.InfoDetail.Add(ed);
                                db.SaveChanges();
                                sc.Complete();
                            }
                            WriteOut("Analy Url -- " + detailUrl + " Done");
                        }
                        else
                        {
                            WriteOut("Url Existed -- " + detailUrl);
                            continue;
                        }
                        //EInfoDetail ed = this.AnalyDetail(detailUrl, es);
                        //db.InfoDetail.Add(ed);
                    }



                    //  this.AnalyDetail(detailList);
                }



                //}
                //sc.Complete();
            }
            catch (Exception ex)
            {
                throw ex;
            }
            finally
            {
            }
        }
Exemple #28
0
        public string Submit()
        {
            try
            {
                HtmlDocument doc = new HtmlDocument();
                doc.LoadHtml(response);
                HtmlNodeCollection formNodes = doc.DocumentNode.SelectNodes("//form");

                if (formNodes.Count() > 0)
                {
                    foreach (var node in formNodes)
                    {
                        Dictionary <string, string> param = new Dictionary <string, string>();
                        doc.LoadHtml(node.OuterHtml);

                        string actionUrl = string.Empty;
                        var    forms     = from form in doc.DocumentNode.Descendants()
                                           where form.Name == "form" &&
                                           form.Attributes["action"] != null
                                           select new
                        {
                            action = form.Attributes["action"].Value,
                        };

                        if (forms.Count() > 0)
                        {
                            foreach (var a in forms)
                            {
                                actionUrl = a.action;
                                break;
                            }

                            actionUrl = Utilities.Helper.FixLink(actionUrl, targetUrl);

                            string method = string.Empty;
                            if (Regex.IsMatch(node.OuterHtml, "method=[\"](.+?)[\"]"))
                            {
                                method = Regex.Match(response, "method=[\"](.+?)[\"]").Groups[1].Value.ToLower();
                            }
                            else
                            {
                                method = "get";
                            }

                            HtmlNodeCollection inputNodes = doc.DocumentNode.SelectNodes("//input");

                            foreach (var inputNode in inputNodes)
                            {
                                var names  = Regex.Match(inputNode.OuterHtml, "name=[\"](.+?)[\"]");
                                var values = Regex.Match(inputNode.OuterHtml, "value=[\"](.+?)[\"]");
                                var type   = Regex.Match(inputNode.OuterHtml, "type=[\"](.+?)[\"]").Groups[1].Value.ToLower();

                                if (!type.Equals("button") && !type.Equals("reset") && !string.IsNullOrEmpty(names.Value)) // prevent for adding unpostable parameters
                                {
                                    param.Add(names.Groups[1].Value, values.Groups[1].Value);
                                }
                            }

                            Dictionary <string, string> temp = new Dictionary <string, string>(param);
                            foreach (var key in temp.Keys)
                            {
                                if (string.IsNullOrEmpty(temp[key]))
                                {
                                    param[key] = GetFieldValue(key);
                                }
                            }

                            var content = new FormUrlEncodedContent(param);
                            if (method.Equals("post"))
                            {
                                var res = httpClient.PostAsync(actionUrl, content).Result;
                                if (res.IsSuccessStatusCode)
                                {
                                    return(res.Content.ReadAsStringAsync().Result);
                                }
                            }

                            else // get request
                            {
                                string uri    = AttachParameters(actionUrl, param);
                                var    result = httpClient.GetAsync(uri).Result;
                                return(result.Content.ReadAsStringAsync().Result);
                            }
                        }

                        else
                        {
                            return(null);
                        }
                    }
                }

                else
                {
                    return(null);
                }
            }

            catch { return(null); };

            return(null);
        }
Exemple #29
0
        public override string Fetch(DataRow dr)
        {
            Initialize(dr, true);
            string url         = string.Empty;
            string output      = string.Empty;
            string message     = string.Empty;
            var    detailsPage = "";
            string code        = string.Empty;

            try
            {
                var fields = Validate();
                if (fields.IsValid)
                {
                    string firstpage = SiteCalling_HTTPPOSTANDGET("https://portalclient.echo-cloud.com/98059portal/VerifPortal/msltop.asp?id=", "GET", cookiesresponse, "");
                    if (firstpage != null && firstpage.Contains("*Required Fields"))
                    {
                        _htmlDocResults.LoadHtml(firstpage);
                        var secondpage = SiteCalling_HTTPPOSTANDGET("https://portalclient.echo-cloud.com/98059portal/VerifPortal/doclist.asp", "POST", cookiesresponse, GetParamm());
                        if (secondpage != null && !secondpage.Contains("No physicians"))
                        {
                            _htmlDocResults.LoadHtml(secondpage);
                            HtmlNodeCollection Temptrs = _htmlDocResults.DocumentNode.SelectNodes("//tr/td/a[contains(.," + provider.LastName + ")]");
                            if (Temptrs != null && Temptrs.Count() > 0)
                            {
                                int TotalRecords = Temptrs.Count();
                                foreach (HtmlNode Temptr in Temptrs)
                                {
                                    if (Temptr != null)
                                    {
                                        HtmlAttribute att = Temptr.Attributes["href"];
                                        if (att != null)
                                        {
                                            string GetUrlString = att.Value;
                                            if (!string.IsNullOrEmpty(GetUrlString))
                                            {
                                                GetUrlString = GetUrlString.Replace("JavaScript:SubmitVerif(", "").Replace(")", "").Replace("'", "");
                                                string[] Urlcode = GetUrlString.Split(',');
                                                if (Urlcode != null && Urlcode.Count() >= 4)
                                                {
                                                    var details = SiteCalling_HTTPPOSTANDGET("https://portalclient.echo-cloud.com/98059portal/VerifPortal/docset.asp?dr_id=" + HttpUtility.UrlEncode(Urlcode[0]) + "&standing=" + HttpUtility.UrlEncode(Urlcode[1]) + "&uname=Test&title=Test&org=Test&addr=Test&addr2=Test&reclink=" + HttpUtility.UrlEncode(Urlcode[2]) + "&recstat=" + HttpUtility.UrlEncode(Urlcode[3]), "GET", cookiesresponse, "");
                                                    if (details != null && details.Contains("Sentara Medical Staff Services"))
                                                    {
                                                        _TemphtmlDocResults.LoadHtml(details);
                                                        GetpageDetails();
                                                    }
                                                    else
                                                    {
                                                        message = ErrorMsg.CannotAccessDetailsPage;
                                                    }
                                                }
                                            }
                                            else
                                            {
                                                message = ErrorMsg.Custom("JavaScript element not found");
                                            }
                                        }
                                        else
                                        {
                                            message = ErrorMsg.Custom("Error occurred while retrieving the Anchor Node");
                                        }
                                    }
                                    else
                                    {
                                        message = ErrorMsg.Custom("Error occurred while retrieving the data");
                                    }
                                }
                                if (_sbResponseHtmlCollection != null && _sbResponseHtmlCollection.Count() > 0 && _sbResponseHtmlCollection.Count() == _sbResponseTextCollection.Count())
                                {
                                    int SelectedRecords = _sbResponseHtmlCollection.Count();
                                    if (SelectedRecords > 1)
                                    {
                                        _sbResponseHtml.Append("<tr><td> --- Multiple Results Found </td><td>(Total: ");
                                        _sbResponseHtml.Append(SelectedRecords.ToString());
                                        _sbResponseHtml.Append(") --- </td></tr>");
                                        _sbResponseText.Append(" --- Multiple Result Found (Total: " + SelectedRecords + ") --- ");
                                        _sbResponseText.Append('\r');
                                        _sbResponseText.Append('\n');
                                    }
                                    int Count = 1;
                                    for (int i = 0; i < SelectedRecords; i++)
                                    {
                                        if (_sbResponseHtmlCollection[i] != null && _sbResponseTextCollection[i] != null)
                                        {
                                            if (SelectedRecords > 1)
                                            {
                                                _sbResponseHtml.Append("<tr><td>--- Result </td><td>(" + Count + " Of " + SelectedRecords + ") --- </td></tr>");
                                                _sbResponseText.Append("--- Result (" + Count + " Of " + SelectedRecords + ") --- ");
                                                _sbResponseText.Append('\r');
                                                _sbResponseText.Append('\n');
                                            }
                                            _sbResponseHtml.Append(_sbResponseHtmlCollection[i]);
                                            _sbResponseText.Append(_sbResponseTextCollection[i]);
                                            Count++;
                                        }
                                    }
                                    output  = _sbResponseHtml.ToString();
                                    message = _sbResponseText.ToString();
                                    try
                                    {
                                        pdf.Html = detailsPage;
                                        pdf.ConvertToABCImage(new ImageParameters {
                                            BaseUrl = "https://portalclient.echo-cloud.com/98059portal/VerifPortal/"
                                        });
                                    }
                                    catch { }
                                }
                                else
                                {
                                    message = ErrorMsg.Custom("Facility name not matching with user name");
                                }
                            }
                            else
                            {
                                message = ErrorMsg.NoResultsFound;
                            }
                        }
                        else
                        {
                            message = ErrorMsg.NoResultsFound;
                        }
                    }
                    else
                    {
                        message = ErrorMsg.CannotAccessSite;
                    }
                }
                else
                {
                    message = fields.Error.Message;
                }
            }
            catch (Exception ex)
            {
                message = ex.Message;
            }

            return(ProcessResults(output, message));
        }
        public static HttpParams getParams(string xmlPath, string url, string[] list)
        {
            HttpParams httpParams = new HttpParams();

            //string basePath = Directory.GetCurrentDirectory() + @"\XML\";
            string basePath = @"\XML\";

            HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
            doc.Load(@basePath + xmlPath);
            HtmlNodeCollection rootNodeList = doc.DocumentNode.SelectNodes("/params/content[@id='" + url + "'][1]");

            int listIndex = 0;

            foreach (HtmlNode param in rootNodeList)
            {
                string httpurl = param.Attributes["url"].Value.Replace("&amp;", "&");
                httpurl = ReplaceSpecialChar(ref httpurl, ref listIndex, list);
                string referer = param.Attributes["referer"].Value.Replace("&amp;", "&");
                referer            = ReplaceSpecialChar(ref referer, ref listIndex, list);
                httpParams.HttpUrl = httpurl;
                httpParams.Method  = param.Attributes["method"].Value;
                httpParams.Referer = referer;
                httpParams.Host    = param.Attributes["host"].Value;

                if (param.Attributes["accept"] != null)
                {
                    httpParams.Accept = param.Attributes["accept"].Value;
                }
                if (param.Attributes["userAgent"] != null)
                {
                    httpParams.UserAgent = param.Attributes["userAgent"].Value;
                }
                if (param.Attributes["acceptEncoding"] != null)
                {
                    httpParams.AcceptEncoding = param.Attributes["acceptEncoding"].Value;
                }
                if (param.Attributes["acceptLanguage"] != null)
                {
                    httpParams.AcceptLanguage = param.Attributes["acceptLanguage"].Value;
                }
                if (param.Attributes["xRequestedWith"] != null)
                {
                    httpParams.XRequestedWith = param.Attributes["xRequestedWith"].Value;
                }
                if (param.Attributes["xPrototypeVersion"] != null)
                {
                    httpParams.XPrototypeVersion = param.Attributes["xPrototypeVersion"].Value;
                }
                if (param.Attributes["contentType"] != null)
                {
                    httpParams.ContentType = param.Attributes["contentType"].Value;
                }
                if (param.Attributes["cacheControl"] != null)
                {
                    httpParams.CacheControl = param.Attributes["cacheControl"].Value;
                }
                if (param.Attributes["keepAlive"] != null)
                {
                    if (param.Attributes["keepAlive"].Value == "true")
                    {
                        httpParams.KeepAlive = true;
                    }
                    else if (param.Attributes["keepAlive"].Value == "false")
                    {
                        httpParams.KeepAlive = false;
                    }
                }
                if (param.Attributes["responseEncode"] != null)
                {
                    httpParams.ResponseEncode = param.Attributes["responseEncode"].Value;
                }
            }
            HtmlNodeCollection paramsList = doc.DocumentNode.SelectNodes("/params/content[@id='" + url + "']/propery");

            if (paramsList != null)
            {
                StringBuilder stringBuilder = new StringBuilder();
                int           count         = paramsList.Count();
                int           i             = 0;
                foreach (HtmlNode param in paramsList)
                {
                    i++;
                    string key    = param.Attributes["name"].Value;
                    var    encode = param.Attributes["encode"];
                    string value  = param.InnerText;
                    if (value == "@")
                    {
                        value = list[listIndex];
                        listIndex++;
                    }
                    else if (value.Contains("@"))
                    {
                        value = ReplaceSpecialChar(ref value, ref listIndex, list);
                        //listIndex++;
                    }
                }
                httpParams.httpParams = stringBuilder;
            }
            return(httpParams);
        }