Beispiel #1
0
		public IHTMLElementCollection GetElementCollection(IHTMLElementCollection elements)
		{
			if (elements == null) return null;

			if (TagName == null) return elements;

			return (IHTMLElementCollection) elements.tags(TagName);
		}
        public IHTMLElementCollection GetElementCollection(IHTMLElementCollection elements)
        {
            if (elements == null)
            {
                return(null);
            }

            if (TagName == null)
            {
                return(elements);
            }

            return((IHTMLElementCollection)elements.tags(TagName));
        }
        /// <summary>
        /// Helper function to build up the list of form field elements
        /// </summary>
        private void verifyFieldElements(IHTMLDocument2 document)
        {
            if (fieldElements == null)
            {
                fieldElements = new ArrayList();

                IHTMLElementCollection allElements      = document.all;
                IHTMLElementCollection inputElements    = (IHTMLElementCollection)allElements.tags(ElementsSupport.InputTagName);
                IHTMLElementCollection textareaElements = (IHTMLElementCollection)allElements.tags("textarea");

                // Merge the two collections
                for (int i = 0; i < inputElements.length; i++)
                {
                    IHTMLElement node = (IHTMLElement)inputElements.item(i, null);
                    fieldElements.Add(node);
                }
                for (int i = 0; i < textareaElements.length; i++)
                {
                    IHTMLElement node = (IHTMLElement)textareaElements.item(i, null);
                    fieldElements.Add(node);
                }
            }
        }
Beispiel #4
0
        /// <summary>
        /// 利用mshtml进行分析
        /// </summary>
        /// <param name="html"></param>
        /// <param name="tag"></param>
        public string ParserHtml(string html, string tag)
        {
            // 首先html代码內容存入HTMLDocumentClass
            IHTMLDocument2 document = new HTMLDocumentClass();

            document.write(new object[] { html });
            document.close();

            // 属性body则是html中的body tag
            // 而body本身就是一个IHTMLElement
            // 所以可以用all这个属性将所有元素取出成为一个collection
            IHTMLElementCollection body = (IHTMLElementCollection)document.body.all;

            // 可以用tags这个方法过滤出我们所需要的tag
            IHTMLElementCollection elements = (IHTMLElementCollection)body.tags("TABLE");

            for (int i = 0; i < elements.length; i++)
            {
                IHTMLElement tr = (IHTMLElement)elements.item(i, null);



                //result += element.innerHTML;
            }

            string result = "";

            for (int i = 0; i < elements.length; i++)
            {
                // 使用item这个方法可以将集合中的元素取出
                // 第一个参数代表的是顺序,但是在msdn中表示为name
                // 第二个参数msdn中表示为index,但经过测试后,指的并不是顺序,所以目前无法确定它的用途
                // 如果有知道的朋友,也请跟我说一下
                IHTMLElement element = (IHTMLElement)elements.item(i, null);

                if (string.IsNullOrEmpty(element.innerHTML))
                {
                    continue;
                }

                result += element.innerHTML;
            }

            return(result);
        }
Beispiel #5
0
        private void Vpn_PageData()
        {
            log.Info($"Entered Vpn_PageData.");
            log.Info($"  Currently on page {current_page}/{total_pages}.");

            #region write in sql table

            /// 取得gvList
            HTMLDocument d          = (HTMLDocument)m.vpnweb.Document;
            IHTMLElement gvDownLoad = d.getElementById(DOM_FOR_ACTUAL_DATA);
            log.Info($"[{DOM_FOR_ACTUAL_DATA}] is read.");

            /// 讀取
            /// 20200503 我發現Html Agility Pack不能click
            /// 只好第一層是ihtmlelement
            IHTMLElementCollection trs_ = gvDownLoad.all;
            IHTMLElementCollection trs  = trs_.tags("tr");
            DateTime current_time       = DateTime.Now;

            // 使用item這個方法可以將集合中的元素取出
            // 第一個參數代表的是順序,但是在msdn中標示為name
            // 第二個參數msdn中標示為index,但經過測試後,指的並不是順序,所以目前無法確定他的用途
            // 如果有知道的朋友,也請跟我說一下

            // current_line = 0 是標題行
            /// 20200503 我發現Html Agility Pack不能click
            /// 只好第一層是ihtmlelement

            VPN_files.Clear();
            for (current_line = 1; current_line < trs.length; current_line++)
            {
                IHTMLElement tr = trs.item(current_line, null);
                HtmlDocument h_ = new HtmlDocument();
                h_.LoadHtml(tr.innerHTML);
                HtmlNodeCollection tds = h_.DocumentNode.SelectNodes("//td");

                tbl_download result = _td_parcer(tds);

                using (NHIDataContext dc = new NHIDataContext())
                {
                    var q = from p in dc.tbl_download
                            where (p.f_name == result.f_name) && (p.SDATE == result.SDATE)
                            select p;
                    if (q.Count() == 0)
                    {
                        result.QDATE = current_time;
                        dc.tbl_download.InsertOnSubmit(result);
                        dc.SubmitChanges();
                        log.Info($"    [{result.f_name}] added to SQL server");
                    }
                }
            }

            #endregion write in sql table

            #region download files

            // making queue_files
            foreach (KeyValuePair <int, string> v in VPN_files)
            {
                if (!Local_files.Contains(v.Value))
                {
                    queue_files.Enqueue(v.Key);
                    log.Info($"    {v.Key}: {v.Value} enqueued.");
                }
            }

            if ((queue_files.Count == 0) && (current_page == total_pages))
            {
                // 這頁讀完, 且所有頁都讀完了.
                m.Refresh_Table();
                tb.ShowBalloonTip("結束", "完成所有頁面讀取", BalloonIcon.Info);
            }
            else if (queue_files.Count == 0)
            {
                log.Info($"    Nothing enqueued on page {current_page}/{total_pages}");
                Goto_next_page();
            }
            else
            {
                // 有東西才需要執行

                tb.ShowBalloonTip("計時器開始", $"一共{queue_files.Count}個檔案要下載", BalloonIcon.Info);
                // initialization
                current_line = 0;
                current_line = queue_files.Dequeue();

                // execution
                this._timer1 = new System.Timers.Timer
                {
                    Interval = 6000
                };
                this._timer1.Elapsed += new System.Timers.ElapsedEventHandler(TimersTimer_Elapsed);

                log.Info($"    _timer1 started.");
                this._timer1.Start();
            }

            #endregion download files

            log.Info("Exited Vpn_PageData.");
        }
Beispiel #6
0
        private void TimersTimer_Elapsed(object sender, ElapsedEventArgs e)
        {
            log.Info($"  Entered TimersTimer_Elapsed.");

            m.Dispatcher.Invoke((Action)(() =>
            {
                HTMLDocument d = (HTMLDocument)m.vpnweb.Document;

                // dispatcher的問題, 要叫用就要在這裡面
                if (GOTO_NEXT_PAGE)
                {
                    GOTO_NEXT_PAGE = false;

                    log.Info($"    _timer1 stopped. this pages finished.");
                    this._timer1.Stop();

                    Goto_next_page();

                    log.Info($"  Exited TimersTimer_Elapsed.");
                    return;
                }

                log.Info($"    Now dealing with page {current_page}/{total_pages}.");
                log.Info($"    Now dealing with line {current_line}.");
                IHTMLElement gvDownLoad = d.getElementById(DOM_FOR_ACTUAL_DATA);
                IHTMLElementCollection trs_ = gvDownLoad.all;
                IHTMLElementCollection trs = trs_.tags("tr");
                IHTMLElement tr = trs.item(current_line, null);
                IHTMLElement a = tr.children[4].children[0];
                a.click();
                log.Info($"    button click: {a.innerHTML}");

                System.Threading.ThreadStart th_begin = new System.Threading.ThreadStart(Work_todo);
                System.Threading.Thread thr = new System.Threading.Thread(th_begin)
                {
                    IsBackground = true,
                    Name = "PressS"
                };
                thr.Start();

                // 判斷是否這一頁讀完了? 是否最後一頁了?
                if ((queue_files.Count == 0) && (current_page == total_pages))
                {
                    // 這頁讀完, 且所有頁都讀完了.
                    log.Info($"    _timer1 stopped. all pages finished.");
                    m.Refresh_Table();
                    tb.ShowBalloonTip("結束", "完成所有頁面讀取", BalloonIcon.Info);
                    this._timer1.Stop();
                }
                else if (queue_files.Count == 0)
                {
                    // 這頁讀完, 但還有下一頁.
                    GOTO_NEXT_PAGE = true;
                }
                else
                {
                    current_line = queue_files.Dequeue();
                    log.Info($"    go to next line: {current_line}.");
                }
            }));

            log.Info($"  Exited TimersTimer_Elapsed.");
            return;
        }
Beispiel #7
0
        /// <summary>
        /// FixHTML
        /// </summary>
        /// <param name="htmlToParse"></param>
        /// <returns></returns>
        string FixHTML(string htmlToParse)
        {
            try
            {
                //::......... Declare a new HTML document to use, and write our normal HTML
                IHTMLDocument2 htmlDocument = new HTMLDocumentClass();

                htmlDocument.write(htmlToParse);
                htmlDocument.close();


                //::......... With this we retrieve all of the HTML elements collection existing on out HTML block
                IHTMLElementCollection allElements = (IHTMLElementCollection)htmlDocument.body.all;
                IHTMLElementCollection allInputs   = (IHTMLElementCollection)allElements.tags("img");
                IHTMLElementCollection allStyles   = (IHTMLElementCollection)htmlDocument.all.tags("style");
                IHTMLElementCollection allhead     = (IHTMLElementCollection)htmlDocument.all.tags("head");
                string MyDoc = "<html>";

                foreach (IHTMLElement HeadElement in allhead)
                {
                    MyDoc += HeadElement.outerHTML;
                    break;
                }
                //::......... Change some properties
                foreach (IHTMLElement element in allInputs)
                {
                    string src = (string)element.getAttribute("src", 0);
                    int    Pos = -1;
                    if ((Pos = src.IndexOf("_files/")) != -1)
                    {
                        string URL = GetSecurePDF();
                        string u   = Request.Url.Authority;

                        if (u.ToLower() == "mytravelplans.com.sg")
                        {
                            URL += u;
                        }
                        else if (u.ToLower() == "mytravelplans.com.au")
                        {
                            URL += u;
                        }
                        else if (u.ToLower() == "mytravelplans.co.in")
                        {
                            URL += u;
                        }
                        else
                        {
                            URL += u;
                        }

                        URL += GetWebDirectory() + src.Substring(Pos + 1);
                        element.setAttribute("src", URL, 0);
                    }
                }

                //::......... Return the parent element content ( BODY > HTML )
                MyDoc += htmlDocument.body.outerHTML;

                MyDoc += "</html>";
                return(MyDoc);
            }
            catch (Exception e)
            {
                logger.Error("Exception in HTML fix: " + e.Message);
            }
            return(htmlToParse);
        }