Пример #1
0
        private void Tokenize()
        {
            if (ActiveStreamReader == null)
            {
                throw new ArgumentNullException("reader was null.");
            }

            ConfigureTreeBuilderForParsingMode();
            tokenizer.Start();

            bool swallowBom = true;


            try
            {
                char[] buffer = new char[tokenizerBlockChars];
                UTF16Buffer bufr = new UTF16Buffer(buffer, 0, 0);
                bool lastWasCR = false;
                int len = -1;
                if ((len = ActiveStreamReader.Read(buffer, 0, buffer.Length)) != 0)
                {
                    
                    int offset = 0;
                    int length = len;
                    if (swallowBom)
                    {
                        if (buffer[0] == '\uFEFF')
                        {
                            ActiveStreamOffset = -1;
                            offset = 1;
                            length--;
                        }
                    }
                    if (length > 0)
                    {
                        tokenizer.SetTransitionBaseOffset(ActiveStreamOffset);
                        bufr.Start = offset;
                        bufr.End = offset + length;
                        while (bufr.HasMore && !tokenizer.IsSuspended)
                        {
                            bufr.Adjust(lastWasCR);
                            lastWasCR = false;
                            if (bufr.HasMore && !tokenizer.IsSuspended)
                            {
                                lastWasCR = tokenizer.TokenizeBuffer(bufr);
                            }
                        }
                    }

                    CheckForReEncode();

                    ActiveStreamOffset = length;
                    while (!tokenizer.IsSuspended && (len = ActiveStreamReader.Read(buffer, 0, buffer.Length)) != 0)
                    {
                        tokenizer.SetTransitionBaseOffset(ActiveStreamOffset);
                        bufr.Start = 0;
                        bufr.End = len;
                        while (bufr.HasMore && !tokenizer.IsSuspended)
                        {
                            bufr.Adjust(lastWasCR);
                            lastWasCR = false;
                            if (bufr.HasMore && !tokenizer.IsSuspended)
                            {
                                lastWasCR = tokenizer.TokenizeBuffer(bufr);
                            }
                        }
                        ActiveStreamOffset += len;
                        CheckForReEncode();
                    }
                }
                if (!tokenizer.IsSuspended)
                {
                    tokenizer.Eof();
                }
            }
            finally
            {
                tokenizer.End();
            }
        }
		private void Tokenize(TextReader reader)
		{
			if (reader == null)
			{
				throw new ArgumentNullException("reader was null.");
			}

			tokenizer.Start();
			bool swallowBom = true;

			try
			{
				char[] buffer = new char[2048];
				UTF16Buffer bufr = new UTF16Buffer(buffer, 0, 0);
				bool lastWasCR = false;
				int len = -1;
				if ((len = reader.Read(buffer, 0, buffer.Length)) != 0)
				{
					int streamOffset = 0;
					int offset = 0;
					int length = len;
					if (swallowBom)
					{
						if (buffer[0] == '\uFEFF')
						{
							streamOffset = -1;
							offset = 1;
							length--;
						}
					}
					if (length > 0)
					{
						tokenizer.SetTransitionBaseOffset(streamOffset);
						bufr.Start = offset;
						bufr.End = offset + length;
						while (bufr.HasMore)
						{
							bufr.Adjust(lastWasCR);
							lastWasCR = false;
							if (bufr.HasMore)
							{
								lastWasCR = tokenizer.TokenizeBuffer(bufr);
							}
						}
					}
					streamOffset = length;
					while ((len = reader.Read(buffer, 0, buffer.Length)) != 0)
					{
						tokenizer.SetTransitionBaseOffset(streamOffset);
						bufr.Start = 0;
						bufr.End = len;
						while (bufr.HasMore)
						{
							bufr.Adjust(lastWasCR);
							lastWasCR = false;
							if (bufr.HasMore)
							{
								lastWasCR = tokenizer.TokenizeBuffer(bufr);
							}
						}
						streamOffset += len;
					}
				}
				tokenizer.Eof();
			}
			finally
			{
				tokenizer.End();
			}
		}
Пример #3
0
        private void Tokenize(TextReader reader)
        {
            if (reader == null)
            {
                throw new ArgumentNullException("reader was null.");
            }

            if (HtmlParsingMode != HtmlParsingMode.Auto)
            {
                ConfigureTreeBuilderForParsingMode();
                tokenizer.Start();
            }

            bool swallowBom = true;


            try
            {
                char[] buffer = new char[2048];
                UTF16Buffer bufr = new UTF16Buffer(buffer, 0, 0);
                bool lastWasCR = false;
                int len = -1;
                if ((len = reader.Read(buffer, 0, buffer.Length)) != 0)
                {
                    if (HtmlParsingMode == HtmlParser.HtmlParsingMode.Auto)
                    {
                        string ctx = GetContext(buffer);
                        switch (ctx)
                        {
                            case "*document":
                                HtmlParsingMode = HtmlParsingMode.Document;
                                break;
                            case "*content":
                                HtmlParsingMode = HtmlParsingMode.Content;
                                break;
                            default:
                                HtmlParsingMode = HtmlParsingMode.Fragment;
                                treeBuilder.SetFragmentContext(ctx);
                                break;
                        }
                        ConfigureTreeBuilderForParsingMode();
                        tokenizer.Start();
                    }

                    int streamOffset = 0;
                    int offset = 0;
                    int length = len;
                    if (swallowBom)
                    {
                        if (buffer[0] == '\uFEFF')
                        {
                            streamOffset = -1;
                            offset = 1;
                            length--;
                        }
                    }
                    if (length > 0)
                    {
                        tokenizer.SetTransitionBaseOffset(streamOffset);
                        bufr.Start = offset;
                        bufr.End = offset + length;
                        while (bufr.HasMore)
                        {
                            bufr.Adjust(lastWasCR);
                            lastWasCR = false;
                            if (bufr.HasMore)
                            {
                                lastWasCR = tokenizer.TokenizeBuffer(bufr);
                            }
                        }
                    }
                    streamOffset = length;
                    while ((len = reader.Read(buffer, 0, buffer.Length)) != 0)
                    {
                        tokenizer.SetTransitionBaseOffset(streamOffset);
                        bufr.Start = 0;
                        bufr.End = len;
                        while (bufr.HasMore)
                        {
                            bufr.Adjust(lastWasCR);
                            lastWasCR = false;
                            if (bufr.HasMore)
                            {
                                lastWasCR = tokenizer.TokenizeBuffer(bufr);
                            }
                        }
                        streamOffset += len;
                    }
                }
                tokenizer.Eof();
            }
            finally
            {
                tokenizer.End();
            }
        }