An HTML entity decoder.
An HTML entity decoder.
コード例 #1
0
		public void TestDecodeNamedEntities ()
		{
			var path = Path.Combine ("..", "..", "TestData", "html", "HtmlEntities.json");
			var decoder = new HtmlEntityDecoder ();

			using (var json = new JsonTextReader (new StreamReader (path))) {
				while (json.Read ()) {
					string name, value;

					if (json.TokenType == JsonToken.StartObject)
						continue;

					if (json.TokenType != JsonToken.PropertyName)
						break;

					name = (string) json.Value;

					if (!json.Read () || json.TokenType != JsonToken.StartObject)
						break;

					// read to the "codepoints" property
					if (!json.Read () || json.TokenType != JsonToken.PropertyName)
						break;

					// skip the array of integers...
					if (!json.Read () || json.TokenType != JsonToken.StartArray)
						break;

					while (json.Read ()) {
						if (json.TokenType == JsonToken.EndArray)
							break;
					}

					// the property should be "characters" - this is what we want
					if (!json.Read () || json.TokenType != JsonToken.PropertyName)
						break;

					value = json.ReadAsString ();

					if (!json.Read () || json.TokenType != JsonToken.EndObject)
						break;

					for (int i = 0; i < name.Length && name[i] != ';'; i++)
						Assert.IsTrue (decoder.Push (name[i]), "Failed to push char #{0} of \"{1}\".", i, name);

					Assert.AreEqual (value, decoder.GetValue (), "Decoded entity did not match for \"{0}\".", name);

					decoder.Reset ();
				}
			}
		}
コード例 #2
0
ファイル: HtmlUtils.cs プロジェクト: ywscr/HtmlRenderer
        /// <summary>
        /// Decode HTML character data.
        /// </summary>
        /// <remarks>
        /// Decodes HTML character data.
        /// </remarks>
        /// <param name="output">The <see cref="System.IO.TextWriter"/> to output the result.</param>
        /// <param name="data">The character data to decode.</param>
        /// <param name="startIndex">The starting index of the character data.</param>
        /// <param name="count">The number of characters in the data.</param>
        /// <exception cref="System.ArgumentNullException">
        /// <para><paramref name="output"/> is <c>null</c>.</para>
        /// <para>-or-</para>
        /// <para><paramref name="data"/> is <c>null</c>.</para>
        /// </exception>
        /// <exception cref="System.ArgumentOutOfRangeException">
        /// <para><paramref name="startIndex"/> and <paramref name="count"/> do not specify
        /// a valid range in the data.</para>
        /// </exception>
        public static void HtmlDecode(TextWriter output, string data, int startIndex, int count)
        {
            if (output == null)
            {
                throw new ArgumentNullException("output");
            }

            if (data == null)
            {
                throw new ArgumentNullException("data");
            }

            if (startIndex < 0 || startIndex >= data.Length)
            {
                throw new ArgumentOutOfRangeException("startIndex");
            }

            if (count < 0 || count > (data.Length - startIndex))
            {
                throw new ArgumentOutOfRangeException("count");
            }

            var entity   = new HtmlEntityDecoder();
            int endIndex = startIndex + count;
            int index    = startIndex;

            while (index < endIndex)
            {
                if (data[index] == '&')
                {
                    while (index < endIndex && entity.Push(data[index]))
                    {
                        index++;
                    }

                    output.Write(entity.GetValue());
                    entity.Reset();

                    if (index < endIndex && data[index] == ';')
                    {
                        index++;
                    }
                }
                else
                {
                    output.Write(data[index++]);
                }
            }
        }
コード例 #3
0
ファイル: HtmlUtils.cs プロジェクト: prepare/WebParser
        /// <summary>
        /// Decode HTML character data.
        /// </summary>
        /// <remarks>
        /// Decodes HTML character data.
        /// </remarks>
        /// <param name="output">The <see cref="System.IO.TextWriter"/> to output the result.</param>
        /// <param name="data">The character data to decode.</param>
        /// <param name="startIndex">The starting index of the character data.</param>
        /// <param name="count">The number of characters in the data.</param>
        /// <exception cref="System.ArgumentNullException">
        /// <para><paramref name="output"/> is <c>null</c>.</para>
        /// <para>-or-</para>
        /// <para><paramref name="data"/> is <c>null</c>.</para>
        /// </exception>
        /// <exception cref="System.ArgumentOutOfRangeException">
        /// <para><paramref name="startIndex"/> and <paramref name="count"/> do not specify
        /// a valid range in the data.</para>
        /// </exception>
        public static void HtmlDecode(TextWriter output, string data, int startIndex, int count)
        {
            if (output == null)
                throw new ArgumentNullException("output");

            if (data == null)
                throw new ArgumentNullException("data");

            if (startIndex < 0 || startIndex >= data.Length)
                throw new ArgumentOutOfRangeException("startIndex");

            if (count < 0 || count > (data.Length - startIndex))
                throw new ArgumentOutOfRangeException("count");

            var entity = new HtmlEntityDecoder();
            int endIndex = startIndex + count;
            int index = startIndex;

            while (index < endIndex)
            {
                if (data[index] == '&')
                {
                    while (index < endIndex && entity.Push(data[index]))
                        index++;

                    output.Write(entity.GetValue());
                    entity.Reset();

                    if (index < endIndex && data[index] == ';')
                        index++;
                }
                else
                {
                    output.Write(data[index++]);
                }
            }
        }
コード例 #4
0
		static void TestDecodeNumericEntity (string text, string expected)
		{
			var decoder = new HtmlEntityDecoder ();

			for (int i = 0; i < text.Length; i++)
				Assert.IsTrue (decoder.Push (text[i]), "Failed to push char #{0} of \"{1}\".", i, text);

			Assert.AreEqual (expected, decoder.GetValue (), "Decoded entity did not match for \"{0}\".", text);
		}