Esempio n. 1
0
        public static String GetDocumentSource(ref HTMLDocument doc, Encoding enc)
        {
            if (doc == null)
            {
                return(null);
            }

            bool IsUnicodeDetermined = false;

            Encoding theEncoding = enc;

            if (theEncoding == null)
            {
                theEncoding = Encoding.GetEncoding(0);
                //Windows default
            }

            if (theEncoding != Encoding.GetEncoding(0))
            {
                //Don't try to detect unicode if we were
                //passed an encoding other than the default
                IsUnicodeDetermined = true;
            }

            // use the routine from htmlwrapper
            MemoryStream memstream = new MemoryStream();
            ComStream    cstream   = new ComStream(memstream);

            IPersistStreamInit pStreamInit = (IPersistStreamInit)doc;

            pStreamInit.Save(cstream, false);

            StringBuilder Result = new StringBuilder();

            //goto start of stream
            memstream.Seek(0, SeekOrigin.Begin);

            int iSize = 2048;

            byte[] bytedata   = new byte[2048];
            int    iBOMLength = 0;

            while (true)
            {
                iSize = memstream.Read(bytedata, 0, bytedata.Length);
                if (iSize > 0)
                {
                    if (!IsUnicodeDetermined)
                    {
                        //look for byte order mark
                        bool IsUTF16LE    = false;
                        bool IsUTF16BE    = false;
                        bool IsUTF8       = false;
                        bool IsBOMPresent = false;

                        if ((bytedata[0] == 0xFF) & (bytedata[1] == 0xFE))//UTF16LE
                        {
                            IsUTF16LE    = true;
                            IsBOMPresent = true;
                        }

                        if ((bytedata[0] == 0xFE) & (bytedata[1] == 0xFF))// UTF16BE
                        {
                            IsUTF16BE    = true;
                            IsBOMPresent = true;
                        }

                        if ((bytedata[0] == 0xEF) & (bytedata[1] == 0xBB) & (bytedata[2] == 0xBF)) //UTF8
                        {
                            IsUTF8       = true;
                            IsBOMPresent = true;
                        }


                        //look for alternate zeroes

                        if (!IsUTF16LE & !IsUTF16BE & !IsUTF8)
                        {
                            if ((bytedata[1] == 0) & (bytedata[3] == 0) & (bytedata[5] == 0) & (bytedata[7] == 0))
                            {
                                IsUTF16LE = true; //best guess
                            }
                        }

                        if (IsUTF16LE)
                        {
                            theEncoding = Encoding.Unicode;
                        }
                        else if (IsUTF16BE)
                        {
                            theEncoding = Encoding.BigEndianUnicode;
                        }
                        else if (IsUTF8)
                        {
                            theEncoding = Encoding.UTF8;
                        }

                        if (IsBOMPresent)
                        {
                            //strip out the BOM
                            iBOMLength = theEncoding.GetPreamble().Length;
                        }

                        //don't repeat the test
                        IsUnicodeDetermined = true;
                    }

                    Result.Append(theEncoding.GetString(bytedata, iBOMLength, iSize));
                }
                else
                {
                    break;
                }
            }
            memstream.Close();

            return(Result.ToString());
        }
Esempio n. 2
0
        public static String GetDocumentSource(ref HTMLDocument doc, Encoding enc)
        {
            if (doc == null) return null;

            bool IsUnicodeDetermined = false;

            Encoding theEncoding = enc;
            if (theEncoding == null)
            {
                theEncoding = Encoding.GetEncoding(0);
                //Windows default
            }

            if (theEncoding != Encoding.GetEncoding(0))
            {
                //Don't try to detect unicode if we were
                //passed an encoding other than the default
                IsUnicodeDetermined = true;
            }

            // use the routine from htmlwrapper
            MemoryStream memstream = new MemoryStream();
            ComStream cstream = new ComStream(memstream);

            IPersistStreamInit pStreamInit = (IPersistStreamInit)doc;
            pStreamInit.Save(cstream, false);

            StringBuilder Result = new StringBuilder();

            //goto start of stream
            memstream.Seek(0, SeekOrigin.Begin);

            int iSize = 2048;
            byte[] bytedata = new byte[2048];
            int iBOMLength = 0;

            while (true)
            {
                iSize = memstream.Read(bytedata, 0, bytedata.Length);
                if (iSize > 0)
                {

                    if (!IsUnicodeDetermined)
                    {
                        //look for byte order mark
                        bool IsUTF16LE = false;
                        bool IsUTF16BE = false;
                        bool IsUTF8 = false;
                        bool IsBOMPresent = false;

                        if ((bytedata[0] == 0xFF) & (bytedata[1] == 0xFE))//UTF16LE
                        {
                            IsUTF16LE = true;
                            IsBOMPresent = true;
                        }

                        if ((bytedata[0] == 0xFE) & (bytedata[1] == 0xFF))// UTF16BE
                        {
                            IsUTF16BE = true;
                            IsBOMPresent = true;
                        }

                        if ((bytedata[0] == 0xEF) & (bytedata[1] == 0xBB) & (bytedata[2] == 0xBF)) //UTF8
                        {
                            IsUTF8 = true;
                            IsBOMPresent = true;
                        }


                        //look for alternate zeroes

                        if (!IsUTF16LE & !IsUTF16BE & !IsUTF8)
                        {
                            if ((bytedata[1] == 0) & (bytedata[3] == 0) & (bytedata[5] == 0) & (bytedata[7] == 0))
                            {
                                IsUTF16LE = true; //best guess
                            }
                        }

                        if (IsUTF16LE)
                        {
                            theEncoding = Encoding.Unicode;
                        }
                        else if (IsUTF16BE)
                        {
                            theEncoding = Encoding.BigEndianUnicode;
                        }
                        else if (IsUTF8)
                        {
                            theEncoding = Encoding.UTF8;
                        }

                        if (IsBOMPresent)
                        {
                            //strip out the BOM
                            iBOMLength = theEncoding.GetPreamble().Length;

                        }

                        //don't repeat the test
                        IsUnicodeDetermined = true;
                    }

                    Result.Append(theEncoding.GetString(bytedata, iBOMLength, iSize));
                }
                else
                {
                    break;
                }
            }
            memstream.Close();

            return Result.ToString();
			
        }