Ejemplo n.º 1
0
        private List <AAtom> decodeText(string inText, bool parseHTML)
        {
            List <AAtom>           outAtoms     = new List <AAtom>();
            List <string>          openTags     = new List <string>();
            Color                  currentColor = Color.White;
            List <HREF_Attributes> openHREFs    = new List <HREF_Attributes>();

            // if this is not HTML, do not parse tags. Otherwise search out and interpret tags.
            if (!parseHTML)
            {
                for (int i = 0; i < inText.Length; i++)
                {
                    addCharacter(inText[i], outAtoms, openTags, currentColor, openHREFs);
                }
            }
            else
            {
                Parsing.HTMLparser parser = new Parsing.HTMLparser(inText);
                Parsing.HTMLchunk  chunk;
                while ((chunk = parser.ParseNext()) != null)
                {
                    if (!(chunk.oHTML == string.Empty))
                    {
                        // this is a span of text.
                        string span = chunk.oHTML;
                        // make sure to replace escape characters!
                        span = EscapeCharacters.ReplaceEscapeCharacters(span);
                        //Add the characters to the outText list.
                        for (int i = 0; i < span.Length; i++)
                        {
                            addCharacter(span[i], outAtoms, openTags, currentColor, openHREFs);
                        }
                    }
                    else
                    {
                        // this is a tag. interpret the tag and edit the openTags list.
                        bool readParams = true;
                        bool isClosing  = chunk.bClosure;
                        switch (chunk.sTag)
                        {
                        case "font":
                            break;

                        case "br":
                            addCharacter('\n', outAtoms, openTags, currentColor, openHREFs);
                            break;

                        case "b":
                            editOpenTags(openTags, isClosing, "b");
                            break;

                        case "i":
                            editOpenTags(openTags, isClosing, "i");
                            break;

                        case "u":
                            editOpenTags(openTags, isClosing, "u");
                            break;

                        case "outline":
                            editOpenTags(openTags, isClosing, "outline");
                            break;

                        case "big":
                            editOpenTags(openTags, isClosing, "big");
                            break;

                        case "basefont":
                        case "medium":
                            editOpenTags(openTags, isClosing, "medium");
                            break;

                        case "small":
                            editOpenTags(openTags, isClosing, "small");
                            break;

                        case "center":
                            editOpenTags(openTags, isClosing, "center");
                            break;

                        case "left":
                            editOpenTags(openTags, isClosing, "left");
                            break;

                        case "right":
                            editOpenTags(openTags, isClosing, "right");
                            break;

                        case "gumpimg":
                            addGumpImage(outAtoms, openTags, openHREFs);
                            break;

                        case "span":
                            addSpan(outAtoms, openTags, openHREFs);
                            break;

                        case "a":
                            editOpenTags(openTags, isClosing, "a");
                            if (isClosing)
                            {
                                // closing a hyperlink - restore previous address, if any.
                                if (openHREFs.Count > 0)
                                {
                                    openHREFs.RemoveAt(openHREFs.Count - 1);
                                }
                            }
                            else
                            {
                                // hyperlink with attributes
                                HREF_Attributes href = new HREF_Attributes();
                                openHREFs.Add(href);
                            }
                            break;

                        default:
                            readParams = false;
                            for (int i = 0; i < chunk.iChunkLength; i++)
                            {
                                addCharacter(char.Parse(inText.Substring(i + chunk.iChunkOffset, 1)), outAtoms, openTags, currentColor, openHREFs);
                            }
                            break;
                        }

                        if (readParams)
                        {
                            foreach (DictionaryEntry param in chunk.oParams)
                            {
                                string key   = param.Key.ToString();
                                string value = param.Value.ToString();
                                if (value.EndsWith("/"))
                                {
                                    value = value.Substring(0, value.Length - 1);
                                }

                                switch (key)
                                {
                                case "href":
                                    if (chunk.sTag == "a")
                                    {
                                        openHREFs[openHREFs.Count - 1].HREF = value;
                                    }
                                    else
                                    {
                                        Logger.Warn("href paramater used outside of an 'a' tag link. href is ignored in this case.");
                                    }
                                    break;

                                case "color":
                                case "hovercolor":
                                case "activecolor":
                                    // get the color!
                                    string color = value;
                                    if (color[0] == '#')
                                    {
                                        color = color.Substring(1);
                                    }
                                    if (color.Length == 3 || color.Length == 6)
                                    {
                                        Color c = Utility.ColorFromHexString(color);
                                        if (key == "color")
                                        {
                                            currentColor = c;
                                        }
                                        if (chunk.sTag == "a")
                                        {
                                            switch (key)
                                            {
                                            case "color":
                                                openHREFs[openHREFs.Count - 1].UpHue = UltimaData.HuesXNA.GetWebSafeHue(c);
                                                break;

                                            case "hovercolor":
                                                openHREFs[openHREFs.Count - 1].OverHue = UltimaData.HuesXNA.GetWebSafeHue(c);
                                                break;

                                            case "activecolor":
                                                openHREFs[openHREFs.Count - 1].DownHue = UltimaData.HuesXNA.GetWebSafeHue(c);
                                                break;
                                            }
                                        }
                                    }
                                    else
                                    {
                                        Logger.Warn("Improperly formatted color:" + color);
                                    }
                                    break;

                                case "text-decoration":
                                    switch (value)
                                    {
                                    case "none":
                                        if (chunk.sTag == "a")
                                        {
                                            openHREFs[openHREFs.Count - 1].Underline = false;
                                        }
                                        break;

                                    default:
                                        Logger.Warn(string.Format("Unknown text-decoration:{0}", value));
                                        break;
                                    }
                                    break;

                                case "src":
                                case "hoversrc":
                                case "activesrc":
                                    switch (chunk.sTag)
                                    {
                                    case "gumpimg":
                                        if (key == "src")
                                        {
                                            ((ImageAtom)outAtoms[outAtoms.Count - 1]).Value = int.Parse(value);
                                        }
                                        else if (key == "hoversrc")
                                        {
                                            ((ImageAtom)outAtoms[outAtoms.Count - 1]).ValueOver = int.Parse(value);
                                        }
                                        else if (key == "activesrc")
                                        {
                                            ((ImageAtom)outAtoms[outAtoms.Count - 1]).ValueDown = int.Parse(value);
                                        }
                                        break;

                                    default:
                                        Logger.Warn("src param encountered within " + chunk.sTag + " which does not use this param.");
                                        break;
                                    }
                                    break;

                                case "width":
                                    switch (chunk.sTag)
                                    {
                                    case "gumpimg":
                                    case "span":
                                        outAtoms[outAtoms.Count - 1].Width = int.Parse(value);
                                        break;

                                    default:
                                        Logger.Warn("width param encountered within " + chunk.sTag + " which does not use this param.");
                                        break;
                                    }
                                    break;

                                case "height":
                                    switch (chunk.sTag)
                                    {
                                    case "gumpimg":
                                    case "span":
                                        outAtoms[outAtoms.Count - 1].Width = int.Parse(value);
                                        break;

                                    default:
                                        Logger.Warn("height param encountered within " + chunk.sTag + " which does not use this param.");
                                        break;
                                    }
                                    break;

                                default:
                                    Logger.Warn(string.Format("Unknown parameter:{0}", key));
                                    break;
                                }
                            }
                        }
                    }
                }
            }

            return(outAtoms);
        }
Ejemplo n.º 2
0
        private List<AHTMLAtom> decodeText(string inText, bool parseHTML)
        {
            List<AHTMLAtom> outAtoms = new List<AHTMLAtom>();
            List<string> openTags = new List<string>();
            Color currentColor = Color.White;
            List<HREF_Attributes> openHREFs = new List<HREF_Attributes>();

            // if this is not HTML, do not parse tags. Otherwise search out and interpret tags.
            if (!parseHTML)
            {
                for (int i = 0; i < inText.Length; i++)
                {
                    addCharacter(inText[i], outAtoms, openTags, currentColor, openHREFs);
                }
            }
            else
            {
                Parsing.HTMLparser parser = new Parsing.HTMLparser(inText);
                Parsing.HTMLchunk chunk;
                while ((chunk = parser.ParseNext()) != null)
                {
                    if (!(chunk.oHTML == ""))
                    {
                        // this is text. add the characters to the outText list.
                        for (int i = 0; i < chunk.oHTML.Length; i++)
                            addCharacter(chunk.oHTML[i], outAtoms, openTags, currentColor, openHREFs);
                    }
                    else
                    {
                        // this is a tag. interpret the tag and edit the openTags list.
                        bool readParams = true;
                        bool isClosing = chunk.bClosure;
                        switch (chunk.sTag)
                        {
                            case "font":
                                break;
                            case "br":
                                addCharacter('\n', outAtoms, openTags, currentColor, openHREFs);
                                break;
                            case "b":
                                editOpenTags(openTags, isClosing, "b");
                                break;
                            case "i":
                                editOpenTags(openTags, isClosing, "i");
                                break;
                            case "u":
                                editOpenTags(openTags, isClosing, "u");
                                break;
                            case "big":
                                editOpenTags(openTags, isClosing, "big");
                                break;
                            case "basefont":
                            case "medium":
                                editOpenTags(openTags, isClosing, "medium");
                                break;
                            case "small":
                                editOpenTags(openTags, isClosing, "small");
                                break;
                            case "center":
                                editOpenTags(openTags, isClosing, "center");
                                break;
                            case "left":
                                editOpenTags(openTags, isClosing, "left");
                                break;
                            case "right":
                                editOpenTags(openTags, isClosing, "right");
                                break;
                            case "gumpimg":
                                addGumpImage(outAtoms, openTags, openHREFs);
                                break;
                            case "span":
                                addSpan(outAtoms, openTags, openHREFs);
                                break;
                            case "a":
                                editOpenTags(openTags, isClosing, "a");
                                if (isClosing)
                                {
                                    // closing a hyperlink - restore previous address, if any.
                                    if (openHREFs.Count > 0)
                                        openHREFs.RemoveAt(openHREFs.Count - 1);
                                }
                                else
                                {
                                    // hyperlink with attributes
                                    HREF_Attributes href = new HREF_Attributes();
                                    openHREFs.Add(href);
                                }
                                break;
                            default:
                                readParams = false;
                                for (int i = 0; i < chunk.iChunkLength; i++)
                                {
                                    addCharacter(char.Parse(inText.Substring(i + chunk.iChunkOffset, 1)), outAtoms, openTags, currentColor, openHREFs);
                                }
                                break;
                        }

                        if (readParams)
                        {
                            foreach (DictionaryEntry param in chunk.oParams)
                            {
                                string key = param.Key.ToString();
                                string value = param.Value.ToString();
                                if (value.EndsWith("/"))
                                    value = value.Substring(0, value.Length - 1);

                                switch (key)
                                {
                                    case "href":
                                        if (chunk.sTag == "a")
                                        {
                                            openHREFs[openHREFs.Count - 1].HREF = value;
                                        }
                                        else
                                        {
                                            Logger.Warn("href paramater used outside of an 'a' tag link. href is ignored in this case.");
                                        }
                                        break;
                                    case "color":
                                    case "hovercolor":
                                    case "activecolor":
                                        // get the color!
                                        string color = value;
                                        if (color[0] == '#')
                                            color = color.Substring(1);
                                        if (color.Length == 3 || color.Length == 6)
                                        {
                                            Color c = Utility.ColorFromHexString(color);
                                            if (key == "color")
                                                currentColor = c;
                                            if (chunk.sTag == "a")
                                            {
                                                switch (key)
                                                {
                                                    case "color":
                                                        openHREFs[openHREFs.Count - 1].UpHue = UltimaData.HuesXNA.GetWebSafeHue(c);
                                                        break;
                                                    case "hovercolor":
                                                        openHREFs[openHREFs.Count - 1].OverHue = UltimaData.HuesXNA.GetWebSafeHue(c);
                                                        break;
                                                    case "activecolor":
                                                        openHREFs[openHREFs.Count - 1].DownHue = UltimaData.HuesXNA.GetWebSafeHue(c);
                                                        break;
                                                }
                                            }
                                        }
                                        else
                                            Logger.Warn("Improperly formatted color:" + color);
                                        break;
                                    case "text-decoration":
                                        switch (value)
                                        {
                                            case "none":
                                                if (chunk.sTag == "a")
                                                    openHREFs[openHREFs.Count - 1].Underline = false;
                                                break;
                                            default:
                                                Logger.Warn(string.Format("Unknown text-decoration:{0}", value));
                                                break;
                                        }
                                        break;
                                    case "src":
                                    case "hoversrc":
                                    case "activesrc":
                                        switch (chunk.sTag)
                                        {
                                            case "gumpimg":
                                                if (key == "src")
                                                    ((HTMLImageGump)outAtoms[outAtoms.Count - 1]).Value = int.Parse(value);
                                                else if (key == "hoversrc")
                                                    ((HTMLImageGump)outAtoms[outAtoms.Count - 1]).ValueOver = int.Parse(value);
                                                else if (key == "activesrc")
                                                    ((HTMLImageGump)outAtoms[outAtoms.Count - 1]).ValueDown = int.Parse(value);
                                                break;
                                            default:
                                                Logger.Warn("src param encountered within " + chunk.sTag + " which does not use this param.");
                                                break;
                                        }
                                        break;
                                    case "width":
                                        switch (chunk.sTag)
                                        {
                                            case "gumpimg":
                                            case "span":
                                                outAtoms[outAtoms.Count - 1].Width = int.Parse(value);
                                                break;
                                            default:
                                                Logger.Warn("width param encountered within " + chunk.sTag + " which does not use this param.");
                                                break;
                                        }
                                        break;
                                    case "height":
                                        switch (chunk.sTag)
                                        {
                                            case "gumpimg":
                                            case "span":
                                                outAtoms[outAtoms.Count - 1].Width = int.Parse(value);
                                                break;
                                            default:
                                                Logger.Warn("height param encountered within " + chunk.sTag + " which does not use this param.");
                                                break;
                                        }
                                        break;
                                    default:
                                        Logger.Warn(string.Format("Unknown parameter:{0}", key));
                                        break;
                                }
                            }
                        }

                    }
                }
            }

            return outAtoms;
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Inits tag parser
        /// </summary>
        /// <param name="p_oChunk"></param>
        /// <param name="p_sText"></param>
        internal void Init(HTMLparser p_oP,HTMLchunk p_oChunk,DynaString p_sText,byte[] p_bHTML,int p_iDataLength,HTMLentities p_oE,HTMLheuristics p_oHE)
        {
            oP=p_oP;
            oChunk=p_oChunk;
            sText=p_sText;
            bHTML=p_bHTML;
            iDataLength=p_iDataLength;

            // we don't want to be too close to end of data when dealing with heuristics
            iMaxHeuDataLength=iDataLength-MIN_DATA_SIZE_FOR_HEURISTICS;

            oE=p_oE;
            oHE=p_oHE;
        }
Ejemplo n.º 4
0
        private void Dispose(bool bDisposing)
        {
            if(!bDisposed)
            {
                bDisposed=true;

                bHTML=null;
                oChunk=null;
                sText=null;
                oE=null;
                oP=null;
            }
        }
Ejemplo n.º 5
0
        /// <summary>
        /// Handles META tags that set page encoding
        /// </summary>
        /// <param name="oP">HTML parser object that is used for parsing</param>
        /// <param name="oChunk">Parsed chunk that should contain tag META</param>
        /// <param name="bEncodingSet">Your own flag that shows whether encoding was already set or not, if set
        /// once then it should not be changed - this is the logic applied by major browsers</param>
        /// <returns>True if this was META tag setting Encoding, false otherwise</returns>
        public static bool HandleMetaEncoding(HTMLparser oP, HTMLchunk oChunk, ref bool bEncodingSet)
        {
            if (oChunk.sTag.Length != 4 || oChunk.sTag[0] != 'm' || oChunk.sTag != "meta")
            {
                return(false);
            }

            // if we do not use hashmode already then we call conversion explicitly
            // this is slow, but METAs are very rare so performance penalty is low
            if (!oChunk.bHashMode)
            {
                oChunk.ConvertParamsToHash();
            }

            string sKey = oChunk.oParams["http-equiv"] as string;

            if (sKey != null)
            {
                // FIXIT: even though this is happening rare I really don't like lower casing stuff
                // that most likely would not need to be - if you feel bored then rewrite this bit
                // to make it faster, it is really easy...
                switch (sKey.ToLower())
                {
                case "content-type":
                // rare case (appears to work in IE) reported to exist in some pages by Martin B�chtold
                case "content-category":

                    // we might have charset here that may hint at necessity to decode page
                    // check for possible encoding change

                    // once encoding is set it should not be changed, but you can be damn
                    // sure there are web pages out there that do that!!!
                    if (!bEncodingSet)
                    {
                        string sData = oChunk.oParams["content"] as string;

                        // it is possible we have broken META tag without Content part
                        if (sData != null)
                        {
                            if (oP.SetEncoding(sData))
                            {
                                // we may need to re-encode title

                                if (!bEncodingSet)
                                {
                                    // here you need to reencode any text that you found so far
                                    // most likely it will be just TITLE, the rest can be ignored anyway
                                    bEncodingSet = true;
                                }
                            }
                            else
                            {
                                // failed to set encoding - most likely encoding string
                                // was incorrect or your machine lacks codepages or something
                                // else - might be good idea to put warning message here
                            }
                        }
                    }

                    return(true);

                default:
                    break;
                }
                ;
            }

            return(false);
        }
Ejemplo n.º 6
0
        /// <summary>
        /// Handles META tags that set page encoding
        /// </summary>
        /// <param name="oP">HTML parser object that is used for parsing</param>
        /// <param name="oChunk">Parsed chunk that should contain tag META</param>
        /// <param name="bEncodingSet">Your own flag that shows whether encoding was already set or not, if set
        /// once then it should not be changed - this is the logic applied by major browsers</param>
        /// <returns>True if this was META tag setting Encoding, false otherwise</returns>
        public static bool HandleMetaEncoding(HTMLparser oP,HTMLchunk oChunk,ref bool bEncodingSet)
        {
            if(oChunk.sTag.Length!=4 || oChunk.sTag[0]!='m' || oChunk.sTag!="meta")
                return false;

            // if we do not use hashmode already then we call conversion explicitly
            // this is slow, but METAs are very rare so performance penalty is low
            if(!oChunk.bHashMode)
                oChunk.ConvertParamsToHash();

            string sKey=oChunk.oParams["http-equiv"] as string;

            if(sKey!=null)
            {

                // FIXIT: even though this is happening rare I really don't like lower casing stuff
                // that most likely would not need to be - if you feel bored then rewrite this bit
                // to make it faster, it is really easy...
                switch(sKey.ToLower())
                {
                    case "content-type":
                    // rare case (appears to work in IE) reported to exist in some pages by Martin B�chtold
                    case "content-category":

                        // we might have charset here that may hint at necessity to decode page
                        // check for possible encoding change

                        // once encoding is set it should not be changed, but you can be damn
                        // sure there are web pages out there that do that!!!
                        if(!bEncodingSet)
                        {
                            string sData=oChunk.oParams["content"] as string;

                            // it is possible we have broken META tag without Content part
                            if(sData!=null)
                            {

                                if(oP.SetEncoding(sData))
                                {
                                    // we may need to re-encode title

                                    if(!bEncodingSet)
                                    {
                                        // here you need to reencode any text that you found so far
                                        // most likely it will be just TITLE, the rest can be ignored anyway
                                        bEncodingSet=true;
                                    }
                                }
                                else
                                {
                                    // failed to set encoding - most likely encoding string
                                    // was incorrect or your machine lacks codepages or something
                                    // else - might be good idea to put warning message here
                                }
                            }

                        }

                        return true;

                    default:
                        break;
                };

            }

            return false;
        }