Пример #1
0
        /// <summary> Look up a reference by kernel.
        /// Use a binary search on the ordered list of known references.
        /// <em>This is not very efficient, use Translate.Lookup(CharacterReference)
        /// instead.</em>
        /// </summary>
        /// <param name="kernel">The string to lookup, i.e. "amp".
        /// </param>
        /// <param name="start">The starting point in the string of the kernel.
        /// </param>
        /// <param name="end">The ending point in the string of the kernel.
        /// This should be the index of the semicolon if it exists, or failing that,
        /// at least an index past the last character of the kernel.
        /// </param>
        /// <returns> The reference that matches the given string, or <code>null</code>
        /// if it wasn't found.
        /// </returns>
        public static CharacterReference LookUp(System.String kernel, int start, int end)
        {
            CharacterReferenceEx probe;

            probe        = new CharacterReferenceEx();
            probe.Kernel = kernel;
            probe.Start  = start;
            probe.End    = end;

            return(LookUp(probe));
        }
		/// <summary>
		/// 
		/// </summary>
		/// <param name="strEncoded"></param>
		/// <returns></returns>
		public static String Decode (String strEncoded)
		{
			CharacterReferenceEx key;
			int amp;
			int index;
			int length;
			System.Text.StringBuilder buffer = new System.Text.StringBuilder();
			char character;
			int number;
			int radix;
			int i;
			int semi;
			bool done;
			CharacterReference item;
			String ret;

			if (-1 == (amp = strEncoded.IndexOf ('&')))
				ret = strEncoded;
			else
			{
				key = null;
				index = 0;
				length = strEncoded.Length;
				buffer = new System.Text.StringBuilder (length);
				do
				{
					// equivalent to buffer.append (string.substring (index, amp));
					// but without the allocation of a new String
				while (index < amp)
				{
					buffer.Append (strEncoded[index++]);
				}
                
					index++;
					if (index < length)
					{
						character = strEncoded[index];
						if ('#' == character)
						{
							// numeric character reference
							index++;
							number = 0;
							radix = 0;
							i = index;
							done = false;
							while ((i < length) && !done)
							{
								character = strEncoded[i];
								switch (character)
								{
									case '0':
									case '1':
									case '2':
									case '3':
									case '4':
									case '5':
									case '6':
									case '7':
									case '8':
									case '9':
										if (0 == radix)
										{
											radix = 10;
										}
										number = number * radix + (character - '0');
										break;
									case 'A':
									case 'B':
									case 'C':
									case 'D':
									case 'E':
									case 'F':
										if (16 == radix)
											number = number * radix + (character - 'A' + 10);
										else
											done = true;
										break;
									case 'a':
									case 'b':
									case 'c':
									case 'd':
									case 'e':
									case 'f':
										if (16 == radix)
											number = number * radix + (character - 'a' + 10);
										else
											done = true;
										break;
									case 'x':
									case 'X':
										if (0 == radix)
											radix = 16;
										else
											done = true;
										break;
									case ';':
										done = true;
										i++;
										break;
									default:
										done = true;
										break;
								}
								if (!done)
								{
									i++;
								}
							}
							if (0 != number)
							{
								buffer.Append ((char)number);
								index = i;
								amp = index;
							}
                        
						}
						else if (char.IsLetter (character)) // Can't start with a digit
						{
							// character entity reference
							i = index + 1;
							done = false;
							semi = length;
							while ((i < length) && !done)
							{
								character = strEncoded[i];
								if (';' == character)
								{
									done = true;
									semi = i;
									i++;
								}
								else if (char.IsLetterOrDigit (character))
									i++;
								else
								{
									done = true;
									semi = i;
								}
							}
							// new CharacterReference (string.substring (index, semi), 0);
							if (null == key)
								key = new CharacterReferenceEx ();
							key.Kernel = strEncoded;
							key.Start = index;
							key.End = semi;
							item = LookUp (key);
							if (null != item)
							{
								buffer.Append ((char)item.Character);
								index += item.Kernel.Length;
								if ((index < length) && (';' == strEncoded[index]))
									index++;
								amp = index;
							}
						}
						else
						{
							// need do nothing here, the ampersand will be consumed below
						}
					}
					// gather up unconsumed characters
				while (amp < index)
				{
					buffer.Append (strEncoded[amp++]);
				}
				}
				while ((index < length) && (-1 != (amp = strEncoded.IndexOf ('&', index))));
				// equivalent to buffer.append (string.substring (index));
				// but without the allocation of a new String
				while (index < length)
					buffer.Append (strEncoded[index++]);
				ret = buffer.ToString ();
			}

			return (ret);
		}
		/// <summary> Look up a reference by kernel.
		/// Use a binary search on the ordered list of known references.
		/// <em>This is not very efficient, use Translate.Lookup(CharacterReference)
		/// instead.</em>
		/// </summary>
		/// <param name="kernel">The string to lookup, i.e. "amp".
		/// </param>
		/// <param name="start">The starting point in the string of the kernel.
		/// </param>
		/// <param name="end">The ending point in the string of the kernel.
		/// This should be the index of the semicolon if it exists, or failing that,
		/// at least an index past the last character of the kernel.
		/// </param>
		/// <returns> The reference that matches the given string, or <code>null</code>
		/// if it wasn't found.
		/// </returns>
		public static CharacterReference LookUp(System.String kernel, int start, int end)
		{
			CharacterReferenceEx probe;
			
			probe = new CharacterReferenceEx();
			probe.Kernel = kernel;
			probe.Start = start;
			probe.End = end;
			
			return (LookUp(probe));
		}
Пример #4
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="strEncoded"></param>
        /// <returns></returns>
        public static String Decode(String strEncoded)
        {
            CharacterReferenceEx key;
            int amp;
            int index;
            int length;

            System.Text.StringBuilder buffer = new System.Text.StringBuilder();
            char character;
            int  number;
            int  radix;
            int  i;
            int  semi;
            bool done;
            CharacterReference item;
            String             ret;

            if (-1 == (amp = strEncoded.IndexOf('&')))
            {
                ret = strEncoded;
            }
            else
            {
                key    = null;
                index  = 0;
                length = strEncoded.Length;
                buffer = new System.Text.StringBuilder(length);
                do
                {
                    // equivalent to buffer.append (string.substring (index, amp));
                    // but without the allocation of a new String
                    while (index < amp)
                    {
                        buffer.Append(strEncoded[index++]);
                    }

                    index++;
                    if (index < length)
                    {
                        character = strEncoded[index];
                        if ('#' == character)
                        {
                            // numeric character reference
                            index++;
                            number = 0;
                            radix  = 0;
                            i      = index;
                            done   = false;
                            while ((i < length) && !done)
                            {
                                character = strEncoded[i];
                                switch (character)
                                {
                                case '0':
                                case '1':
                                case '2':
                                case '3':
                                case '4':
                                case '5':
                                case '6':
                                case '7':
                                case '8':
                                case '9':
                                    if (0 == radix)
                                    {
                                        radix = 10;
                                    }
                                    number = number * radix + (character - '0');
                                    break;

                                case 'A':
                                case 'B':
                                case 'C':
                                case 'D':
                                case 'E':
                                case 'F':
                                    if (16 == radix)
                                    {
                                        number = number * radix + (character - 'A' + 10);
                                    }
                                    else
                                    {
                                        done = true;
                                    }
                                    break;

                                case 'a':
                                case 'b':
                                case 'c':
                                case 'd':
                                case 'e':
                                case 'f':
                                    if (16 == radix)
                                    {
                                        number = number * radix + (character - 'a' + 10);
                                    }
                                    else
                                    {
                                        done = true;
                                    }
                                    break;

                                case 'x':
                                case 'X':
                                    if (0 == radix)
                                    {
                                        radix = 16;
                                    }
                                    else
                                    {
                                        done = true;
                                    }
                                    break;

                                case ';':
                                    done = true;
                                    i++;
                                    break;

                                default:
                                    done = true;
                                    break;
                                }
                                if (!done)
                                {
                                    i++;
                                }
                            }
                            if (0 != number)
                            {
                                buffer.Append((char)number);
                                index = i;
                                amp   = index;
                            }
                        }
                        else if (char.IsLetter(character))                          // Can't start with a digit
                        {
                            // character entity reference
                            i    = index + 1;
                            done = false;
                            semi = length;
                            while ((i < length) && !done)
                            {
                                character = strEncoded[i];
                                if (';' == character)
                                {
                                    done = true;
                                    semi = i;
                                    i++;
                                }
                                else if (char.IsLetterOrDigit(character))
                                {
                                    i++;
                                }
                                else
                                {
                                    done = true;
                                    semi = i;
                                }
                            }
                            // new CharacterReference (string.substring (index, semi), 0);
                            if (null == key)
                            {
                                key = new CharacterReferenceEx();
                            }
                            key.Kernel = strEncoded;
                            key.Start  = index;
                            key.End    = semi;
                            item       = LookUp(key);
                            if (null != item)
                            {
                                buffer.Append((char)item.Character);
                                index += item.Kernel.Length;
                                if ((index < length) && (';' == strEncoded[index]))
                                {
                                    index++;
                                }
                                amp = index;
                            }
                        }
                        else
                        {
                            // need do nothing here, the ampersand will be consumed below
                        }
                    }
                    // gather up unconsumed characters
                    while (amp < index)
                    {
                        buffer.Append(strEncoded[amp++]);
                    }
                }while ((index < length) && (-1 != (amp = strEncoded.IndexOf('&', index))));
                // equivalent to buffer.append (string.substring (index));
                // but without the allocation of a new String
                while (index < length)
                {
                    buffer.Append(strEncoded[index++]);
                }
                ret = buffer.ToString();
            }

            return(ret);
        }