/// <summary> Look up a reference by kernel. /// Use a binary search on the ordered list of known references. /// <em>This is not very efficient, use Translate.Lookup(CharacterReference) /// instead.</em> /// </summary> /// <param name="kernel">The string to lookup, i.e. "amp". /// </param> /// <param name="start">The starting point in the string of the kernel. /// </param> /// <param name="end">The ending point in the string of the kernel. /// This should be the index of the semicolon if it exists, or failing that, /// at least an index past the last character of the kernel. /// </param> /// <returns> The reference that matches the given string, or <code>null</code> /// if it wasn't found. /// </returns> public static CharacterReference LookUp(System.String kernel, int start, int end) { CharacterReferenceEx probe; probe = new CharacterReferenceEx(); probe.Kernel = kernel; probe.Start = start; probe.End = end; return(LookUp(probe)); }
/// <summary> /// /// </summary> /// <param name="strEncoded"></param> /// <returns></returns> public static String Decode (String strEncoded) { CharacterReferenceEx key; int amp; int index; int length; System.Text.StringBuilder buffer = new System.Text.StringBuilder(); char character; int number; int radix; int i; int semi; bool done; CharacterReference item; String ret; if (-1 == (amp = strEncoded.IndexOf ('&'))) ret = strEncoded; else { key = null; index = 0; length = strEncoded.Length; buffer = new System.Text.StringBuilder (length); do { // equivalent to buffer.append (string.substring (index, amp)); // but without the allocation of a new String while (index < amp) { buffer.Append (strEncoded[index++]); } index++; if (index < length) { character = strEncoded[index]; if ('#' == character) { // numeric character reference index++; number = 0; radix = 0; i = index; done = false; while ((i < length) && !done) { character = strEncoded[i]; switch (character) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': if (0 == radix) { radix = 10; } number = number * radix + (character - '0'); break; case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': if (16 == radix) number = number * radix + (character - 'A' + 10); else done = true; break; case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': if (16 == radix) number = number * radix + (character - 'a' + 10); else done = true; break; case 'x': case 'X': if (0 == radix) radix = 16; else done = true; break; case ';': done = true; i++; break; default: done = true; break; } if (!done) { i++; } } if (0 != number) { buffer.Append ((char)number); index = i; amp = index; } } else if (char.IsLetter (character)) // Can't start with a digit { // character entity reference i = index + 1; done = false; semi = length; while ((i < length) && !done) { character = strEncoded[i]; if (';' == character) { done = true; semi = i; i++; } else if (char.IsLetterOrDigit (character)) i++; else { done = true; semi = i; } } // new CharacterReference (string.substring (index, semi), 0); if (null == key) key = new CharacterReferenceEx (); key.Kernel = strEncoded; key.Start = index; key.End = semi; item = LookUp (key); if (null != item) { buffer.Append ((char)item.Character); index += item.Kernel.Length; if ((index < length) && (';' == strEncoded[index])) index++; amp = index; } } else { // need do nothing here, the ampersand will be consumed below } } // gather up unconsumed characters while (amp < index) { buffer.Append (strEncoded[amp++]); } } while ((index < length) && (-1 != (amp = strEncoded.IndexOf ('&', index)))); // equivalent to buffer.append (string.substring (index)); // but without the allocation of a new String while (index < length) buffer.Append (strEncoded[index++]); ret = buffer.ToString (); } return (ret); }
/// <summary> Look up a reference by kernel. /// Use a binary search on the ordered list of known references. /// <em>This is not very efficient, use Translate.Lookup(CharacterReference) /// instead.</em> /// </summary> /// <param name="kernel">The string to lookup, i.e. "amp". /// </param> /// <param name="start">The starting point in the string of the kernel. /// </param> /// <param name="end">The ending point in the string of the kernel. /// This should be the index of the semicolon if it exists, or failing that, /// at least an index past the last character of the kernel. /// </param> /// <returns> The reference that matches the given string, or <code>null</code> /// if it wasn't found. /// </returns> public static CharacterReference LookUp(System.String kernel, int start, int end) { CharacterReferenceEx probe; probe = new CharacterReferenceEx(); probe.Kernel = kernel; probe.Start = start; probe.End = end; return (LookUp(probe)); }
/// <summary> /// /// </summary> /// <param name="strEncoded"></param> /// <returns></returns> public static String Decode(String strEncoded) { CharacterReferenceEx key; int amp; int index; int length; System.Text.StringBuilder buffer = new System.Text.StringBuilder(); char character; int number; int radix; int i; int semi; bool done; CharacterReference item; String ret; if (-1 == (amp = strEncoded.IndexOf('&'))) { ret = strEncoded; } else { key = null; index = 0; length = strEncoded.Length; buffer = new System.Text.StringBuilder(length); do { // equivalent to buffer.append (string.substring (index, amp)); // but without the allocation of a new String while (index < amp) { buffer.Append(strEncoded[index++]); } index++; if (index < length) { character = strEncoded[index]; if ('#' == character) { // numeric character reference index++; number = 0; radix = 0; i = index; done = false; while ((i < length) && !done) { character = strEncoded[i]; switch (character) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': if (0 == radix) { radix = 10; } number = number * radix + (character - '0'); break; case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': if (16 == radix) { number = number * radix + (character - 'A' + 10); } else { done = true; } break; case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': if (16 == radix) { number = number * radix + (character - 'a' + 10); } else { done = true; } break; case 'x': case 'X': if (0 == radix) { radix = 16; } else { done = true; } break; case ';': done = true; i++; break; default: done = true; break; } if (!done) { i++; } } if (0 != number) { buffer.Append((char)number); index = i; amp = index; } } else if (char.IsLetter(character)) // Can't start with a digit { // character entity reference i = index + 1; done = false; semi = length; while ((i < length) && !done) { character = strEncoded[i]; if (';' == character) { done = true; semi = i; i++; } else if (char.IsLetterOrDigit(character)) { i++; } else { done = true; semi = i; } } // new CharacterReference (string.substring (index, semi), 0); if (null == key) { key = new CharacterReferenceEx(); } key.Kernel = strEncoded; key.Start = index; key.End = semi; item = LookUp(key); if (null != item) { buffer.Append((char)item.Character); index += item.Kernel.Length; if ((index < length) && (';' == strEncoded[index])) { index++; } amp = index; } } else { // need do nothing here, the ampersand will be consumed below } } // gather up unconsumed characters while (amp < index) { buffer.Append(strEncoded[amp++]); } }while ((index < length) && (-1 != (amp = strEncoded.IndexOf('&', index)))); // equivalent to buffer.append (string.substring (index)); // but without the allocation of a new String while (index < length) { buffer.Append(strEncoded[index++]); } ret = buffer.ToString(); } return(ret); }