Example #1
0
        private void HandleParam(HTMLchunk oChunk, ref int state)
        {
            if (oChunk.iParams > 0)
            {
                for (int i = 0; i < oChunk.iParams; i++)
                {
                    switch (oChunk.cParamChars[i])
                    {
                    default:
                        if (oChunk.sValues[i] == "bodyContent" && oChunk.sParams[i] == "id" && state == 1)
                        {
                            state = 2;
                        }
                        else if (oChunk.sValues[i] == "mw-search-results" && oChunk.sParams[i] == "class" && state == 3)
                        {
                            state = 4;
                        }
                        else if (oChunk.sParams[i] == "href" && state == 6)
                        {
                            item.Url = "http://en.wikipedia.org" + oChunk.sValues[i];
                        }
                        else if (oChunk.sParams[i] == "title" && state == 6)
                        {
                            item.Title = oChunk.sValues[i];
                        }

                        break;
                    }
                }
            }
        }
Example #2
0
        static string getNPVByNm(string nn, string tv, string pn)
        {
            string    rv       = null;
            HTMLchunk m12chunk = null;

            while ((m12chunk = parser.ParseNext()) != null)
            {
                switch (m12chunk.oType)
                {
                case HTMLchunkType.OpenTag:
                    if (m12chunk.sTag == nn)
                    {
                        if (m12chunk.GetParamValue("name") == tv)
                        {
                            rv = m12chunk.GetParamValue(pn);
                        }
                    }
                    break;

                case HTMLchunkType.CloseTag: break;

                case HTMLchunkType.Script:
                    break;

                case HTMLchunkType.Comment: break;

                case HTMLchunkType.Text: break;

                default: break;
                }
            }
            return(rv);
        }
Example #3
0
 private void HandleText(HTMLchunk oChunk, ref int state)
 {
     if (state == 7)
     {
         item.Description += oChunk.oHTML;
     }
 }
Example #4
0
 public void ParseTag(HTMLchunk chunk, AElement atom)
 {
     if (!chunk.bClosure || chunk.bEndClosure)
     {
         // create the tag and add it to the list of open tags.
         OpenTag tag = new OpenTag(chunk);
         m_OpenTags.Add(tag);
         // parse the tag (which will update the StyleParser's current style
         ParseTag(tag, atom);
         // if the style has changed and atom is not null, set the atom's style to the current style.
         if (atom != null)
         {
             atom.Style = Style;
         }
         // if this is a self-closing tag (<br/>) close it!
         if (chunk.bEndClosure)
         {
             CloseOneTag(chunk);
         }
     }
     else
     {
         CloseOneTag(chunk);
     }
 }
Example #5
0
        private void HandleText(HTMLchunk oChunk, ref int state)
        {
            if (state == 2)
            {
                dictResult.Word += (oChunk.oHTML.Trim(new char[] { ' ', '\t', '\r', '\n' }));
            }
            else if (state == 4)
            {
                dictResult.Pronunciation += (System.Web.HttpUtility.HtmlDecode(oChunk.oHTML));
            }
            else if (state == 6)
            {
                dictResult.ChineseExplanations += (oChunk.oHTML.Trim(new char[] { ' ', '\t', '\r', '\n' }));
            }

            else if (state == 12)
            {
                dictResult.Variations += (oChunk.oHTML.Trim(new char[] { ' ', '\t', '\r', '\n' }));
            }

            else if (state == 14)
            {
                dictResult.EnglishExplanations += (oChunk.oHTML.Trim(new char[] { ' ', '\t', '\r', '\n' }));
            }
            else if (state == 9)
            {
                dictResult.Examples += ((oChunk.oHTML.Trim(new char[] { ' ', '\t', '\r', '\n' })));
            }
            else if (state == 16)
            {
                dictResult.FromEncyclopedia += (oChunk.oHTML.Trim(new char[] { ' ', '\t', '\r', '\n' }));
            }
        }
        private void Dispose(bool bDisposing)
        {
            if (!bDisposed)
            {
                bDisposed = true;

                if (oChunk != null)
                {
                    oChunk.Dispose();
                    oChunk = null;
                }

                if (sText != null)
                {
                    sText.Dispose();
                    sText = null;
                }

                bHTML = null;

                if (oE != null)
                {
                    oE.Dispose();
                    oE = null;
                }

                if (oTP != null)
                {
                    oTP.Dispose();
                    oTP = null;
                }
            }
        }
Example #7
0
        private IBookmarkItem ParseItem(HTMLparser parser)
        {
            BookmarkLink item = null;
            HTMLchunk    chunk, prevChunk = parser.CurrentChunk;

            while ((chunk = parser.ParseNext()) != null)
            {
                if (chunk.IsOpenTag && chunk.Tag == "a")
                {
                    item = new BookmarkLink();
                    AssignLinkAttributes(item, chunk.oParams);
                    item.Title = GetTextOrDontMove(parser);
                }
                else if (chunk.IsOpenTag && chunk.Tag == "dd" && item != null)
                {
                    item.Description = ParseDescription(parser);
                }
                else if (chunk.IsOpenTag && chunk.Tag == "h3")
                {
                    var folder = new BookmarkFolder();
                    AssignFolderAttributes(folder, chunk.oParams);
                    folder.Title = GetTextOrDontMove(parser);
                    return(folder);
                }
                else if ((chunk.IsOpenTag && chunk.Tag == "dt") || chunk.Tag == "dl")
                {
                    parser.StepBack(prevChunk);
                    break;
                }
                prevChunk = chunk;
            }
            return(item);
        }
Example #8
0
        /// <summary>
        /// Mainly looks to see if user-input controls certain attributes.  If the input is a URL, this attempts
        /// to see if the scheme or domain can be controlled.  If it's not, it attempts to see if the attribute
        /// data starts with the user-data.
        /// </summary>
        /// <param name="parms"></param>
        /// <param name="body"></param>
        /// <param name="tag"></param>
        /// <param name="attribute"></param>
        /// <param name="requiredAttribute"></param>
        /// <param name="requiredAttributeValue"></param>
        private void CheckTags(NameValueCollection parms, HTMLchunk chunk)
        {
            string paramValue;

            // Loop through all attributes of the current HTML element
            foreach (DictionaryEntry dictEntry in chunk.oParams)
            {
                // Ignore all action events e.g. onmouseover, onclick, on*
                if (dictEntry.Key.ToString().ToLower().StartsWith("on"))
                {
                    return;
                }

                // Loop through all values in the user-controlled parameters
                foreach (string param in parms)
                {
                    paramValue = parms.Get(param);
                    paramValue = Utility.ToSafeLower(paramValue);

                    // Only look at user-controlled parameter values that are bigger than 1 character.
                    // This is kinda lame but reduces false positives.
                    if (paramValue.Length > 1 && dictEntry.Value.ToString().StartsWith(paramValue, StringComparison.InvariantCultureIgnoreCase))
                    {
                        AssembleAlert(chunk.sTag, dictEntry.Key.ToString(), param, paramValue, dictEntry.Value.ToString());
                    }
                }
            }
        }
        /// <summary>
        /// Internally parses tag and returns it from point when '<' was found
        /// </summary>
        /// <returns>Chunk</returns>
        HTMLchunk GetNextTag()
        {
            //iCurPos++;

            oChunk = oTP.ParseTag(ref iCurPos);

            // for backwards compatibility mark closed tags with params as open
            if (oChunk.iParams > 0 && bAutoMarkClosedTagsWithParamsAsOpen && oChunk.oType == HTMLchunkType.CloseTag)
            {
                oChunk.oType = HTMLchunkType.OpenTag;
            }

            //                    012345
            // check for start of script
            if (oChunk.sTag.Length == 6 && oChunk.sTag[0] == 's' && oChunk.sTag == "script")
            {
                if (!oChunk.bClosure)
                {
                    oChunk.oType = HTMLchunkType.Script;
                    oChunk       = oTP.ParseScript(ref iCurPos);
                    return(oChunk);
                }
            }

            oChunk.iChunkLength = iCurPos - oChunk.iChunkOffset;

            if (bKeepRawHTML)
            {
                oChunk.oHTML = oEnc.GetString(bHTML, oChunk.iChunkOffset, oChunk.iChunkLength);
            }

            return(oChunk);
        }
Example #10
0
        private void HandleCloseTag(HTMLchunk oChunk, ref int state)
        {
            if (state == 2)
            {
                state += 1;
            }
            else if (state == 4)
            {
                state = 5;
            }

            else if (oChunk.sTag == "ol" && (state == 9))
            {
                state = 10;
            }
            else if (oChunk.sTag == "table" && state == 12)
            {
                state = 13;
            }
            else if (oChunk.sTag == "div" && (state == 6 || state == 14 || state == 16))
            {
                if (--divCount == 0)
                {
                    state += 1;
                }
            }
        }
Example #11
0
 public OpenTag(HTMLchunk chunk)
 {
     Tag        = chunk.Tag;
     Closure    = chunk.Closure;
     EndClosure = chunk.EndClosure;
     Params     = new Hashtable();
     foreach (DictionaryEntry entry in chunk.Params)
     {
         Params.Add(entry.Key, entry.Value);
     }
 }
Example #12
0
 private void HandleParam(HTMLchunk oChunk, ref int state)
 {
     if (oChunk.iParams > 0)
     {
         for (int i = 0; i < oChunk.iParams; i++)
         {
             switch (oChunk.cParamChars[i])
             {
             default:
                 if (oChunk.sValues[i] == "g" && oChunk.sParams[i] == "class" && state == 2)
                 {
                     state = 3;
                     if (item.Url != null && item.Url != "")
                     {
                         searchResult.Results.Add(item);
                         item = new SearchEngineResult.ResultItem();
                         //item.Source = "Google";
                     }
                 }
                 else if (oChunk.sValues[i] == "r" && oChunk.sParams[i] == "class" && state == 3)
                 {
                     state = 4;
                 }
                 else if (oChunk.sValues[i] == "s" && oChunk.sParams[i] == "class" && state == 6)
                 {
                     state = 7;
                 }
                 else if (oChunk.sValues[i] == "gl" && oChunk.sParams[i] == "class" && state == 7)
                 {
                     state = 8;
                 }
                 else if (oChunk.sParams[i] == "href")
                 {
                     if (state == 5)
                     {
                         item.Url = oChunk.sValues[i];
                     }
                     else if (state == 9 || state == 11)
                     {
                         if (oChunk.sValues[i].IndexOf("q=related") != -1)
                         {
                             item.SimilarUrl = oChunk.sValues[i];
                         }
                         else if (oChunk.sValues[i].IndexOf("q=cache") != -1)
                         {
                             item.CacheUrl = oChunk.sValues[i];
                         }
                     }
                 }
                 break;
             }
         }
     }
 }
Example #13
0
 private void HandleText(HTMLchunk oChunk, ref int state)
 {
     if (state == 4)
     {
         item.Title += oChunk.oHTML;
     }
     else if (state == 5)
     {
         item.Description += oChunk.oHTML;
     }
 }
Example #14
0
        public OpenTag(HTMLchunk chunk)
        {
            sTag        = chunk.sTag;
            bClosure    = chunk.bClosure;
            bEndClosure = chunk.bEndClosure;

            oParams = new Hashtable();
            foreach (DictionaryEntry entry in chunk.oParams)
            {
                oParams.Add(entry.Key, entry.Value);
            }
        }
Example #15
0
        private void HandleParam(HTMLchunk oChunk, ref int state)
        {
            if (oChunk.iParams > 0)
            {
                for (int i = 0; i < oChunk.iParams; i++)
                {
                    switch (oChunk.cParamChars[i])
                    {
                    default:
                        if (oChunk.sValues[i] == "main_right_left" && oChunk.sParams[i] == "id" && state == 0)
                        {
                            state = 1;
                        }
                        else if (oChunk.sValues[i] == "word" && oChunk.sParams[i] == "id" && state > 0)
                        {
                            state = 2;
                        }
                        else if (oChunk.sValues[i] == "pron" && oChunk.sParams[i] == "id" && state == 3)
                        {
                            state = 4;
                        }
                        else if (oChunk.sValues[i] == "exp_exp" && oChunk.sParams[i] == "id" && state > 2)
                        {
                            state    = 6;
                            divCount = 1;
                        }
                        else if (oChunk.sValues[i] == "exp_eg" && oChunk.sParams[i] == "id" && state > 2)
                        {
                            state = 8;
                        }
                        else if (oChunk.sValues[i] == "exp_tran" && oChunk.sParams[i] == "id" && state > 2)
                        {
                            state = 11;
                        }
                        else if (oChunk.sValues[i] == "exp_eee" && oChunk.sParams[i] == "id" && state > 2)
                        {
                            state    = 14;
                            divCount = 1;
                        }
                        else if (oChunk.sValues[i] == "exp_baike" && oChunk.sParams[i] == "id" && state > 2)
                        {
                            state    = 16;
                            divCount = 1;
                        }

                        break;
                    }
                }
            }
        }
Example #16
0
 private void HandleParam(HTMLchunk oChunk, ref int state)
 {
     if (oChunk.iParams > 0)
     {
         for (int i = 0; i < oChunk.iParams; i++)
         {
             switch (oChunk.cParamChars[i])
             {
             default:
                 if (oChunk.sValues[i] == "f" && oChunk.sParams[i] == "class" && state == 2)
                 {
                     state = 3;
                     if (item.Url != null && item.Url != "")
                     {
                         searchResult.Results.Add(item);
                         item = new SearchEngineResult.ResultItem();
                         //item.Source = "Sogou";
                     }
                 }
                 else if (oChunk.sParams[i] == "href")
                 {
                     if (state == 4)
                     {
                         item.Url = oChunk.sValues[i];
                     }
                     else if (state == 7)
                     {
                         item.CacheUrl = oChunk.sValues[i];
                     }
                     else if (state == 10)
                     {
                         item.SimilarUrl = oChunk.sValues[i];
                     }
                 }
                 else if (oChunk.sParams[i] == "id" && (state == 6 || state == 9))
                 {
                     if (oChunk.sValues[i].StartsWith("sogou_snapshot"))
                     {
                         state = 7;
                     }
                     else if (oChunk.sValues[i].StartsWith("sogou_sis"))
                     {
                         state = 10;
                     }
                 }
                 break;
             }
         }
     }
 }
Example #17
0
 private void HandleCloseTag(HTMLchunk oChunk, ref int state)
 {
     if (oChunk.sTag == "ol")
     {
         state = 0;
     }
     else if (oChunk.sTag == "a")
     {
         if (state == 5 || state == 9 || state == 11)
         {
             state += 1;
         }
     }
 }
Example #18
0
 private void HandleOpenTag(HTMLchunk oChunk, ref int state)
 {
     if (oChunk.sTag == "ol" && state == 8)
     {
         state = 9;
     }
     else if (oChunk.sTag == "table" && state == 11)
     {
         state = 12;
     }
     else if (oChunk.sTag == "div" && (state == 6 || state == 14 || state == 16))
     {
         ++divCount;
     }
 }
Example #19
0
        /// <summary>
        /// Check the EMBED tag for its enableHtmlAccess value.
        /// Silverlight can be embedded using the EMBED element tag.
        /// See http://www.informit.com/articles/article.aspx?p=1078181
        /// </summary>
        /// <param name="bod"></param>
        /// <param name="chunk"></param>
        private void CheckEmbedTag(HTMLchunk chunk)
        {
            if (chunk.oParams.ContainsKey("enablehtmlaccess"))
            {
                String value = chunk.oParams["enablehtmlaccess"].ToString();
                CheckEnableHtmlAccessValue(value, chunk.oHTML);
            }

            //foreach (Match m in Utility.GetHtmlTags(bod, "embed"))
            //{
            //    value = Utility.ToSafeLower(Utility.GetHtmlTagAttribute(m.ToString(), "enablehtmlaccess"));
            //    if (value != null)
            //        CheckEnableHtmlAccessValue(value, m.ToString());
            //}
        }
Example #20
0
        public SearchEngineResult Parse(string html, Encoding encoding)
        {
            HTMLparser oP = HtmlParserFactory.GetInstance();

            searchResult = new SearchEngineResult();
            searchResult.SearchEngineType = SearchEngineType.Baidu;
            item = new SearchEngineResult.ResultItem();
            //item.Source = "Baidu";
            oP.Init(encoding.GetBytes(html));
            oP.SetEncoding(encoding);
            HTMLchunk oChunk = null;

            int  state        = 0;
            bool bEncodingSet = false;

            while ((oChunk = oP.ParseNext()) != null)
            {
                switch (oChunk.oType)
                {
                case HTMLchunkType.OpenTag:
                    HandleOpenTag(oChunk, ref state);

printParams:
                    if (oChunk.sTag == "meta")
                    {
                        HandleMetaEncoding(oP, oChunk, ref bEncodingSet);
                    }
                    ;
                    HandleParam(oChunk, ref state);


                    break;

                case HTMLchunkType.CloseTag:
                    HandleCloseTag(oChunk, ref state);
                    break;

                case HTMLchunkType.Text:
                    HandleText(oChunk, ref state);
                    break;

                default:
                    break;
                }
            }
            return(searchResult);
        }
Example #21
0
        public SmartMe.Core.Data.DictResult Parse(string html, Encoding encoding)
        {
            dictResult = new DictResult();
            HTMLparser oP = HtmlParserFactory.GetInstance();

            dictResult.DictionaryType = DictionaryType.Dict_cn;

            oP.Init(encoding.GetBytes(html));
            oP.SetEncoding(encoding);
            HTMLchunk oChunk = null;

            int  state        = 0;
            bool bEncodingSet = false;

            while ((oChunk = oP.ParseNext()) != null)
            {
                switch (oChunk.oType)
                {
                case  HTMLchunkType.OpenTag:
                    HandleOpenTag(oChunk, ref state);

printParams:
                    if (oChunk.sTag == "meta")
                    {
                        HandleMetaEncoding(oP, oChunk, ref bEncodingSet);
                    }
                    ;
                    HandleParam(oChunk, ref state);


                    break;

                case HTMLchunkType.CloseTag:
                    HandleCloseTag(oChunk, ref state);
                    break;

                case HTMLchunkType.Text:
                    HandleText(oChunk, ref state);
                    break;

                default:
                    break;
                }
            }

            return(dictResult);
        }
Example #22
0
        public void CloseOneTag(HTMLchunk chunk)
        {
            var mustRecalculateStyle = false;

            for (var i = _openTags.Count - 1; i >= 0; i--)
            {
                if (_openTags[i].Tag == chunk.Tag)
                {
                    _openTags.RemoveAt(i);
                    mustRecalculateStyle = true;
                    break;
                }
            }
            if (mustRecalculateStyle)
            {
                RecalculateStyle();
            }
        }
Example #23
0
 public void InterpretHREF(HTMLchunk chunk, AElement atom)
 {
     if (chunk.EndClosure)
     {
     }                         // solo anchor elements are meaningless.
     if (!chunk.Closure)
     {
         // opening a hyperlink!
         RecalculateStyle();
         var tag = new OpenTag(chunk);
         _openTags.Add(tag);
         ParseTag(tag, atom);
     }
     else
     {
         RecalculateStyle();  // closing a hyperlink.
     }
 }
Example #24
0
        private void HandleMetaEncoding(HTMLparser oP, HTMLchunk oChunk, ref bool bEncodingSet)
        {
            // if encoding already set then we should not be trying to set new one
            // this is the logic that major browsers follow - the first Encoding is assumed to be
            // the correct one
            if (bEncodingSet)
            {
                return;
            }

            if (HTMLparser.HandleMetaEncoding(oP, oChunk, ref bEncodingSet))
            {
                if (!bEncodingSet)
                {
                    Console.WriteLine("Failed to set encoding from META: {0}", oChunk.GenerateHTML());
                }
            }
        }
Example #25
0
 private void HandleCloseTag(HTMLchunk oChunk, ref int state)
 {
     if (oChunk.sTag == "tr")
     {
         state = 0;
     }
     else if (oChunk.sTag == "td")
     {
         state = 1;
     }
     else if (oChunk.sTag == "a")
     {
         if (state == 4 || state == 6 || state == 8)
         {
             state += 1;
         }
     }
 }
Example #26
0
        public void CloseOneTag(HTMLchunk chunk)
        {
            bool bMustRecalculateStyle = false;

            for (int i = m_OpenTags.Count - 1; i >= 0; i--)
            {
                if (m_OpenTags[i].sTag == chunk.sTag)
                {
                    m_OpenTags.RemoveAt(i);
                    bMustRecalculateStyle = true;
                    break;
                }
            }

            if (bMustRecalculateStyle)
            {
                RecalculateStyle();
            }
        }
Example #27
0
 private void HandleOpenTag(HTMLchunk oChunk, ref int state)
 {
     if (oChunk.sTag == "div" && state == 0)
     {
         state = 1;
     }
     else if (oChunk.sTag == "ul" && state == 2)
     {
         state = 3;
     }
     else if (oChunk.sTag == "li" && state == 4)
     {
         state = 5;
     }
     else if (oChunk.sTag == "a" && state == 5)
     {
         state = 6;
     }
 }
Example #28
0
 private void HandleParam(HTMLchunk oChunk, ref int state)
 {
     if (oChunk.iParams > 0)
     {
         for (int i = 0; i < oChunk.iParams; i++)
         {
             switch (oChunk.cParamChars[i])
             {
             default:
                 if (oChunk.sValues[i] == "f" && oChunk.sParams[i] == "class" && state == 2)
                 {
                     state = 3;
                     if (item.Url != null && item.Url != "")
                     {
                         searchResult.Results.Add(item);
                         item = new SearchEngineResult.ResultItem();
                         //item.Source = "Baidu";
                     }
                 }
                 else if (oChunk.sParams[i] == "href")
                 {
                     if (state == 4)
                     {
                         item.Url = oChunk.sValues[i];
                     }
                     else if (state == 6 || state == 8)
                     {
                         if (oChunk.sValues[i].IndexOf("cache") != -1)
                         {
                             item.CacheUrl = oChunk.sValues[i];
                         }
                         else if (oChunk.sValues[i].StartsWith("s?cl=2"))
                         {
                             item.SimilarUrl = oChunk.sValues[i];
                         }
                     }
                 }
                 break;
             }
         }
     }
 }
        private void CheckObjectTag(HTMLchunk chunk, ref UtilityHtmlParser parser)
        {
            String[] bods = null;
            String   attr = null;
            String   html = null;
            String   allowScriptAccessValue = null;
            bool     flag = false;

            string b = chunk.oHTML;

            // Check the param elements of an object element
            if (chunk.oParams.ContainsKey("classid"))
            {
                attr = chunk.oParams["classid"].ToString();
                if ((attr == "clsid:d27cdb6e-ae6d-11cf-96b8-444553540000") || (attr == "x-shockwave-flash")) // flash clsid
                {
                    allowScriptAccessValue = GetAllowScriptAccessValue(ref parser, ref flag, allowScriptAccessValue, ref html);

                    if (flag)
                    {
                        CheckAllowScriptAccessValue(allowScriptAccessValue, b);
                    }
                }
            }

            // Otherwise check the attributes of the object element
            if (chunk.oParams.ContainsKey("type"))
            {
                string type = chunk.oParams["type"].ToString();
                if (Utility.ToSafeLower(type) == "application/x-shockwave-flash" && chunk.oParams.ContainsKey("allowscriptaccess"))
                {
                    allowScriptAccessValue = chunk.oParams["allowscriptaccess"].ToString();
                    CheckAllowScriptAccessValue(allowScriptAccessValue, chunk.oHTML);
                }
                // Start looking through the param elements.
                else if (Utility.ToSafeLower(type) == "application/x-shockwave-flash")
                {
                    allowScriptAccessValue = GetAllowScriptAccessValue(ref parser, ref flag, allowScriptAccessValue, ref html);
                    CheckAllowScriptAccessValue(allowScriptAccessValue, html);
                }
            }
        }
Example #30
0
 private void HandleCloseTag(HTMLchunk oChunk, ref int state)
 {
     if (oChunk.sTag == "a" && state == 6)
     {
         state += 1;
     }
     else if (oChunk.sTag == "li" && state == 7)
     {
         state = 4;
         if (item.Url != null && item.Url != "")
         {
             searchResult.Results.Add(item);
             item = new SearchEngineResult.ResultItem();
         }
     }
     else if (oChunk.sTag == "ul" && state == 4)
     {
         state = -1;
     }
 }
Example #31
0
		/// <summary>
		/// Sets oHTML variable in a chunk to the raw HTML that was parsed for that chunk.
		/// </summary>
		/// <param name="oChunk">Chunk returned by ParseNext function, it must belong to the same HTMLparser that
		/// was initiated with the same HTML data that this chunk belongs to</param>
		public void SetRawHTML(HTMLchunk oChunk)
		{
            // note: this really should have been byte array assigned rather than string
            // it would be more correct originality-wise
			oChunk.oHTML=oEnc.GetString(bHTML,oChunk.iChunkOffset,oChunk.iChunkLength);
		}
Example #32
0
		private void Dispose(bool bDisposing)
		{
			if(!bDisposed)
			{
				bDisposed=true;

				if(oChunk!=null)
				{
					oChunk.Dispose();
					oChunk=null;
				}

				if(sText!=null)
				{
					sText.Dispose();
					sText=null;
				}

				bHTML=null;

				if(oE!=null)
				{
					oE.Dispose();
					oE=null;
				}

				if(oTP!=null)
				{
					oTP.Dispose();
					oTP=null;
				}

			}

		}
Example #33
0
		/// <summary>
		/// Handles META tags that set page encoding
		/// </summary>
		/// <param name="oP">HTML parser object that is used for parsing</param>
		/// <param name="oChunk">Parsed chunk that should contain tag META</param>
		/// <param name="bEncodingSet">Your own flag that shows whether encoding was already set or not, if set
		/// once then it should not be changed - this is the logic applied by major browsers</param>
		/// <returns>True if this was META tag setting Encoding, false otherwise</returns>
		public static bool HandleMetaEncoding(HTMLparser oP,HTMLchunk oChunk,ref bool bEncodingSet)
		{
			if(oChunk.sTag.Length!=4 || oChunk.sTag[0]!='m' || oChunk.sTag!="meta")
				return false;

			// if we do not use hashmode already then we call conversion explicitly
			// this is slow, but METAs are very rare so performance penalty is low
			if(!oChunk.bHashMode)
				oChunk.ConvertParamsToHash();

			string sKey=oChunk.oParams["http-equiv"] as string;

			if(sKey!=null)
			{

				// FIXIT: even though this is happening rare I really don't like lower casing stuff
				// that most likely would not need to be - if you feel bored then rewrite this bit
				// to make it faster, it is really easy...
				switch(sKey.ToLower())
				{
					case "content-type":
					// rare case (appears to work in IE) reported to exist in some pages by Martin Bächtold
					case "content-category":

						// we might have charset here that may hint at necessity to decode page
						// check for possible encoding change

						// once encoding is set it should not be changed, but you can be damn
						// sure there are web pages out there that do that!!!
						if(!bEncodingSet)
						{
							string sData=oChunk.oParams["content"] as string;

							// it is possible we have broken META tag without Content part
							if(sData!=null)
							{

								if(oP.SetEncoding(sData))
								{
									// we may need to re-encode title

									if(!bEncodingSet)
									{
										// here you need to reencode any text that you found so far
										// most likely it will be just TITLE, the rest can be ignored anyway
										bEncodingSet=true;
									}
								}
								else
								{
									// failed to set encoding - most likely encoding string
									// was incorrect or your machine lacks codepages or something
									// else - might be good idea to put warning message here
								}
							}

						}

						return true;

					default:
						break;
				};


			}

			return false;
		}
Example #34
0
		/// <summary>
		/// Internally parses tag and returns it from point when '<' was found
		/// </summary>
		/// <returns>Chunk</returns>
		HTMLchunk GetNextTag()
		{
			//iCurPos++;

			oChunk=oTP.ParseTag(ref iCurPos);

			// for backwards compatibility mark closed tags with params as open
			if(oChunk.iParams>0 && bAutoMarkClosedTagsWithParamsAsOpen && oChunk.oType==HTMLchunkType.CloseTag)
				oChunk.oType=HTMLchunkType.OpenTag;

			//                    012345
			// check for start of script
			if(oChunk.sTag.Length==6 && oChunk.sTag[0]=='s' && oChunk.sTag=="script")
			{
				if(!oChunk.bClosure)
				{
					oChunk.oType=HTMLchunkType.Script;
					oChunk=oTP.ParseScript(ref iCurPos);
					return oChunk;
				}
			}

			oChunk.iChunkLength=iCurPos-oChunk.iChunkOffset;

			if(bKeepRawHTML)
				oChunk.oHTML=oEnc.GetString(bHTML,oChunk.iChunkOffset,oChunk.iChunkLength);

			return oChunk;

		}