예제 #1
0
        public void Navigate(string _Url)
        {
            Uri _uri = null;
            try
            {
                _uri = new Uri(_Url);
            }
            catch (UriFormatException)
            {
                IsError = true;
                workingdownload = true;
                // Throw Incorrect Url
                return;
            }

            if (curNavigate != null && curNavigate.ThreadState == ThreadState.Running)
            {
                curNavigate.Abort();
                while (curNavigate.ThreadState == ThreadState.Running) ;
            }

            IsError = false;
            workingUrl = _uri;
            workingPage = null;
            workingdownload = false;
            retry = 0;
            curNavigate = new Thread(new ThreadStart(DoNavigate));
            curNavigate.Start();
        }
예제 #2
0
        static void DoNavigate()
        {
            if (++retry > 5) return;

            HttpWebRequest.DefaultWebProxy = null;
            HttpWebRequest wq = (HttpWebRequest)HttpWebRequest.Create(workingUrl);
            HttpWebResponse wr = null;

            try
            {
                wr = (HttpWebResponse)wq.GetResponse();
            }
            catch
            {
                IsError = true;
                workingdownload = true;
                // Throw Unable to Connect
                return;
            }

            //If it is not a html page, try to download as file.
            if (!wr.ContentType.Contains("text/html"))
            {
                DownloadQueue.Enqueue(wr);
                return;
            }

            try
            {
                Stream wrs = null;
                try
                {
                    wrs = wr.GetResponseStream();
                }
                catch (ObjectDisposedException)
                {
                    // If the connection was disconnected, Do Again.
                    DoNavigate();
                    return;
                }

                StreamReader sr = new StreamReader(wrs);
                string html = sr.ReadToEnd();

                workingPage = HtmlReader.GetDocument(html);
                workingdownload = true;
            }
            catch
            {
                IsError = true;
                workingdownload = true;
                // Error occured.
            }
        }
예제 #3
0
        /// <summary>
        /// Tokenize an html document.
        /// </summary>
        /// <param name="HtmlString">A html document string</param>
        /// <returns>Tokenized html document</returns>
        public static Document Tokenize(string HtmlString)
        {
            HtmlTag.Initialize();

            TokenType prevtype = TokenType.Text;
            Document doc = new Document();
            TokenType type = TokenType.Text;
            bool quote = false; //True:" , False:'
            int sp = 0;
            for (int pos = 0; pos < HtmlString.Length; ++pos)
            {
                switch (HtmlString[pos])
                {
                    case '<':
                        // If it is in a string or comment, ignore
                        if (type == TokenType.String || type == TokenType.Comment) break;
                        // If it is in a tag, actually previous one is not a tag. so change previous one as Text
                        if (type == TokenType.Tag)
                        {
                            type = TokenType.Text;
                            --pos;
                            break;
                        }
                        //If it is possible to be a tag, add previous one.
                        if (pos + 1 != HtmlString.Length && IsTagStartPossibleCharacter(HtmlString[pos + 1]))
                        {
                            if (sp <= pos - 1)
                            {
                                doc.Items.Add(GetElement(TokenType.Text, HtmlString.Substring(sp, pos - sp)));
                            }

                            type = TokenType.Tag;
                            sp = pos;
                        }
                        //If it is a comment
                        else if (pos + 4 < HtmlString.Length && HtmlString[pos + 1] == '!' && HtmlString[pos + 2] == '-' && HtmlString[pos + 3] == '-')
                        {
                            if (sp <= pos - 1)
                            {
                                doc.Items.Add(GetElement(TokenType.Text, HtmlString.Substring(sp, pos - sp)));
                            }

                            type = TokenType.Comment;
                            sp = pos;
                        }
                        // If it is possible to be an end or special tag, add previous one.
                        else if (pos + 2 != HtmlString.Length && (HtmlString[pos + 1] == '/' || HtmlString[pos + 1] == '!') && IsTagStartPossibleCharacter(HtmlString[pos + 2]))
                        {
                            if (sp <= pos - 1)
                            {
                                doc.Items.Add(GetElement(TokenType.Text, HtmlString.Substring(sp, pos - sp)));
                            }

                            type = TokenType.Tag;
                            sp = pos;
                        }
                        // Otherwise, Process as Text
                        else
                        {
                            type = TokenType.Text;
                        }
                        break;
                    case '>':
                        if (type == TokenType.Tag)
                        {
                            doc.Items.Add(GetElement(TokenType.Tag, HtmlString.Substring(sp, pos - sp + 1)));
                            sp = pos + 1;
                            type = TokenType.Text;
                        }
                        else if (type == TokenType.Comment)
                        {
                            // If it is the end of a comment
                            if (pos - 2 > 0 && HtmlString[pos - 2] == '-' && HtmlString[pos - 1] == '-')
                            {
                                doc.Items.Add(GetElement(TokenType.Comment, HtmlString.Substring(sp, pos - sp + 1)));
                                sp = pos + 1;
                                type = TokenType.Text;
                            }
                        }
                        break;
                    case '\'':
                    case '"':
                        switch (type)
                        {
                            case TokenType.String:
                                if ((quote && HtmlString[pos] == '\'') || (!quote && HtmlString[pos] == '"')) break;
                                else
                                    type = prevtype;
                                break;
                            case TokenType.Tag:
                                prevtype = type;
                                quote = (HtmlString[pos] == '"');
                                type = TokenType.String;
                                break;
                        }
                        break;
                }
            }
            if (sp != HtmlString.Length)
            {
                doc.Items.Add(GetElement(TokenType.Text, HtmlString.Substring(sp, HtmlString.Length - sp)));
            }

            return doc;
        }
예제 #4
0
파일: Core.cs 프로젝트: forcom/y-browser
 public static string MakeHtml(Document doc)
 {
     StringBuilder sb = new StringBuilder();
     foreach (var i in doc.Items)
     {
         switch (i.Type)
         {
             case Element.ElementType.Special:
             case Element.ElementType.Object:
                 sb.Append('<');
                 if (!i.IsStartTag) sb.Append('/');
                 sb.Append(i.Name);
                 foreach (var j in i.Attributes.Items.Values)
                 {
                     sb.Append(' ');
                     sb.Append(j.Name);
                     if (j.Value != "")
                     {
                         sb.Append("=\"");
                         sb.Append(j.Value);
                         sb.Append("\"");
                     }
                 }
                 sb.Append(">\n");
                 break;
             case Element.ElementType.Text:
                 sb.Append(i.Name);
                 break;
             case Element.ElementType.Structure:
                 sb.Append("\n<");
                 if (!i.IsStartTag) sb.Append('/');
                 sb.Append(i.Name);
                 foreach (var j in i.Attributes.Items.Values)
                 {
                     sb.Append(' ');
                     sb.Append(j.Name);
                     if (j.Value != "")
                     {
                         sb.Append("=\"");
                         sb.Append(j.Value);
                         sb.Append("\"");
                     }
                 }
                 sb.Append(">\n");
                 break;
             case Element.ElementType.Markup:
                 sb.Append("<");
                 if (!i.IsStartTag) sb.Append('/');
                 sb.Append(i.Name);
                 foreach (var j in i.Attributes.Items.Values)
                 {
                     sb.Append(' ');
                     sb.Append(j.Name);
                     if (j.Value != "")
                     {
                         sb.Append("=\"");
                         sb.Append(j.Value);
                         sb.Append("\"");
                     }
                 }
                 sb.Append(">");
                 break;
         }
     }
     return sb.ToString();
 }
예제 #5
0
        /// <summary>
        /// Parse a tokenized html document.
        /// </summary>
        /// <param name="html">A tokenized html document</param>
        /// <returns>Parsed html document</returns>
        public static Document ParseHtml(Document html)
        {
            Document doc = new Document();
            HtmlTag.Initialize();

            Stack<Element> stk = new Stack<Element>();
            //List<HtmlTag.Entity> markup = new List<HtmlTag.Entity>();
            Stack<HtmlTag.Entity> markup = new Stack<HtmlTag.Entity>();
            Element top = null;

            foreach (Element i in html.Items)
            {
                top = stk.Count > 0 ? stk.Peek() : null;
                switch (i.Type)
                {
                    case Element.ElementType.Special:
                        break;
                    case Element.ElementType.Structure:
                        if (i.IsStartTag)
                        {
                            if (top != null && !HtmlTag.TagMap[top.Name].Children.Contains(HtmlTag.TagMap[i.Name]))
                                break;

                            if (HtmlTag.TagMap[i.Name].IsStartTag)
                                stk.Push(i);
                            doc.Items.Add(i);
                        }
                        else
                        {
                            if (!HtmlTag.TagMap[i.Name].IsStartTag)
                                break;

                            if (top != null && top.Name == i.Name)
                            {
                                stk.Pop();
                                doc.Items.Add(i);
                            }
                        }
                        break;
                    case Element.ElementType.Markup:
                        if (i.IsStartTag)
                        {
                            if (top != null && !HtmlTag.TagMap[top.Name].Children.Contains(HtmlTag.TagMap[i.Name]))
                                break;

                            /*if (CheckCanExist(markup, i))
                            {
                                markup.Add(HtmlTag.TagMap[i.Name]);
                                doc.Items.Add(i);
                            }*/
                            markup.Push(HtmlTag.TagMap[i.Name]);
                            doc.Items.Add(i);
                        }
                        else
                        {
                            /*if (markup.Contains(HtmlTag.TagMap[i.Name]))
                            {
                                markup.Reverse();
                                markup.Remove(HtmlTag.TagMap[i.Name]);
                                markup.Reverse();
                                doc.Items.Add(i);
                            }*/
                            if (markup.Peek() == HtmlTag.TagMap[i.Name])
                            {
                                markup.Pop();
                                doc.Items.Add(i);
                            }
                        }
                        break;
                    case Element.ElementType.Object:
                        if (top != null && !HtmlTag.TagMap[top.Name].Children.Contains(HtmlTag.TagMap[i.Name]))
                            break;
                        doc.Items.Add(i);
                        break;
                    case Element.ElementType.Text:
                        if (top != null && !HtmlTag.TagMap[top.Name].Children.Contains(HtmlTag.TagMap["CDATA"]))
                            break;
                        doc.Items.Add(i);
                        break;
                    case Element.ElementType.Unknown:
                        break;
                }
            }

            //Auto Recovery for unmarked elements =_=;;
            /*markup.Reverse();
            foreach (var i in markup)
            {
                doc.Items.Add(new Element(i.Name, Element.ElementType.Markup, false));
            }*/
            while (markup.Count != 0)
            {
                Element i = markup.Pop();
                doc.Items.Add(new Element(i.Name, Element.ElementType.Markup, false));
            }

            while (stk.Count != 0)
            {
                Element i = stk.Pop();
                doc.Items.Add(new Element(i.Name, i.Type, false));
            }

            return doc;
        }