예제 #1
0
 public void MatchesAny()
 {
     char[] scan = { ' ', '\n', '\t' };
     CharacterReader r = new CharacterReader("One\nTwo\tThree");
     Assert.IsFalse(r.MatchesAny(scan));
     Assert.AreEqual("One", r.ConsumeToAny(scan));
     Assert.IsTrue(r.MatchesAny(scan));
     Assert.AreEqual('\n', r.Consume());
     Assert.IsFalse(r.MatchesAny(scan));
 }
예제 #2
0
파일: Tokeniser.cs 프로젝트: wushian/dcsoup
 internal char[] ConsumeCharacterReference(char?additionalAllowedCharacter, bool inAttribute)
 {
     if (reader.IsEmpty())
     {
         return(null);
     }
     if (additionalAllowedCharacter != null && additionalAllowedCharacter == reader.Current())
     {
         return(null);
     }
     if (reader.MatchesAny('\t', '\n', '\r', '\f', ' ', '<', '&'))
     {
         return(null);
     }
     reader.Mark();
     if (reader.MatchConsume("#"))
     {
         // numbered
         bool   isHexMode = reader.MatchConsumeIgnoreCase("X");
         string numRef    = isHexMode ? reader.ConsumeHexSequence() : reader.ConsumeDigitSequence();
         if (numRef.Length == 0)
         {
             // didn't match anything
             CharacterReferenceError("numeric reference with no numerals");
             reader.RewindToMark();
             return(null);
         }
         if (!reader.MatchConsume(";"))
         {
             CharacterReferenceError("missing semicolon");
         }
         // missing semi
         int charval = -1;
         try
         {
             int @base = isHexMode ? 16 : 10;
             charval = Convert.ToInt32(numRef, @base);
         }
         catch (FormatException)
         {
         }
         // skip
         if (charval == -1 || (charval >= 0xD800 && charval <= 0xDFFF) || charval > 0x10FFFF)
         {
             CharacterReferenceError("character outside of valid range");
             return(new char[] { replacementChar });
         }
         else
         {
             // todo: implement number replacement table
             // todo: check for extra illegal unicode points as parse errors
             return(char.ConvertFromUtf32(charval).ToCharArray());
         }
     }
     else
     {
         // named
         // get as many letters as possible, and look for matching entities.
         string nameRef    = reader.ConsumeLetterThenDigitSequence();
         bool   looksLegit = reader.Matches(';');
         // found if a base named entity without a ;, or an extended entity with the ;.
         bool found = (Entities.IsBaseNamedEntity(nameRef) || (Entities.IsNamedEntity(nameRef) && looksLegit));
         if (!found)
         {
             reader.RewindToMark();
             if (looksLegit)
             {
                 // named with semicolon
                 CharacterReferenceError(string.Format("invalid named referenece '{0}'", nameRef));
             }
             return(null);
         }
         if (inAttribute && (reader.MatchesLetter() || reader.MatchesDigit() || reader.MatchesAny('=', '-', '_')))
         {
             // don't want that to match
             reader.RewindToMark();
             return(null);
         }
         if (!reader.MatchConsume(";"))
         {
             CharacterReferenceError("missing semicolon");
         }
         // missing semi
         int charval = Entities.GetCharacterByName(nameRef);
         return(char.ConvertFromUtf32(charval).ToCharArray());
     }
 }