Esempio n. 1
0
        public SearchEngineResult Parse(string html, Encoding encoding)
        {
            HTMLparser oP = HtmlParserFactory.GetInstance();

            searchResult = new SearchEngineResult();
            searchResult.SearchEngineType = SearchEngineType.Baidu;
            item = new SearchEngineResult.ResultItem();
            //item.Source = "Baidu";
            oP.Init(encoding.GetBytes(html));
            oP.SetEncoding(encoding);
            HTMLchunk oChunk = null;

            int  state        = 0;
            bool bEncodingSet = false;

            while ((oChunk = oP.ParseNext()) != null)
            {
                switch (oChunk.oType)
                {
                case HTMLchunkType.OpenTag:
                    HandleOpenTag(oChunk, ref state);

printParams:
                    if (oChunk.sTag == "meta")
                    {
                        HandleMetaEncoding(oP, oChunk, ref bEncodingSet);
                    }
                    ;
                    HandleParam(oChunk, ref state);


                    break;

                case HTMLchunkType.CloseTag:
                    HandleCloseTag(oChunk, ref state);
                    break;

                case HTMLchunkType.Text:
                    HandleText(oChunk, ref state);
                    break;

                default:
                    break;
                }
            }
            return(searchResult);
        }
Esempio n. 2
0
        public SmartMe.Core.Data.DictResult Parse(string html, Encoding encoding)
        {
            dictResult = new DictResult();
            HTMLparser oP = HtmlParserFactory.GetInstance();

            dictResult.DictionaryType = DictionaryType.Dict_cn;

            oP.Init(encoding.GetBytes(html));
            oP.SetEncoding(encoding);
            HTMLchunk oChunk = null;

            int  state        = 0;
            bool bEncodingSet = false;

            while ((oChunk = oP.ParseNext()) != null)
            {
                switch (oChunk.oType)
                {
                case  HTMLchunkType.OpenTag:
                    HandleOpenTag(oChunk, ref state);

printParams:
                    if (oChunk.sTag == "meta")
                    {
                        HandleMetaEncoding(oP, oChunk, ref bEncodingSet);
                    }
                    ;
                    HandleParam(oChunk, ref state);


                    break;

                case HTMLchunkType.CloseTag:
                    HandleCloseTag(oChunk, ref state);
                    break;

                case HTMLchunkType.Text:
                    HandleText(oChunk, ref state);
                    break;

                default:
                    break;
                }
            }

            return(dictResult);
        }