public void Vexing() { var str = new utf8("0000;<control>;Cc;0;BN;;;;;N;NULL;;;;"); var list = new Splitter(';', str).ToList(); foreach (var c in list) { System.Console.WriteLine(c); } Assert.That(list, Is.EqualTo(new List <utf8> { new utf8("0000"), new utf8("<control>"), new utf8("Cc"), new utf8("0"), new utf8("BN"), utf8.Empty, utf8.Empty, utf8.Empty, utf8.Empty, new utf8("N"), new utf8("NULL"), utf8.Empty, utf8.Empty, utf8.Empty, utf8.Empty, })); }
static void ParsePrimitive(ref JParser parser, utf8 json, JToken *tokens, int numberOfTokens) { int start = parser.Position; for (; parser.Position < json.Length; parser.Position++) { switch (json[parser.Position]) { case TAB: case CR: case LF: case SPACE: case Comma: case ObjectClose: case ArrayClose: goto found; } if (json[parser.Position] < 32 || json[parser.Position] >= 127) { parser.Position = start; return; //throw new System.Exception("ERROR"); } } found: var token = Allocate(ref parser, tokens, numberOfTokens); Fill(token, JType.Primitive, start, parser.Position); token->Parent = parser.SuperToken; parser.Position -= 1; }
static void ParseString(ref JParser parser, utf8 json, JToken *tokens, int numberOfTokens) { int start = parser.Position; parser.Position += 1; for (; parser.Position < json.Length; parser.Position++) { byte c = json[parser.Position]; // end quote if (c == DoubleQuote) { var token = Allocate(ref parser, tokens, numberOfTokens); Fill(token, JType.String, start + 1, parser.Position); token->Parent = parser.SuperToken; return; } if (c == BackSlash && parser.Position + 1 < json.Length) { parser.Position += 1; switch (json[parser.Position]) { case (byte)'\"': case (byte)'/': case (byte)'\\': case (byte)'b': case (byte)'f': case (byte)'r': case (byte)'n': case (byte)'t': break; case (byte)'u': parser.Position += 1; for (int i = 0; i < 4 && parser.Position < json.Length; i++) { if ((json[parser.Position] >= 48 && json[parser.Position] <= 58) || (json[parser.Position] >= 65 && json[parser.Position] <= 70) || (json[parser.Position] >= 97 && json[parser.Position] <= 102)) { parser.Position = start; return; //throw new System.Exception("ERROR"); } parser.Position += 1; } break; default: parser.Position = start; return; //throw new System.Exception("ERROR"); } } } parser.Position = start; //throw new System.Exception("ERROR"); }
public void WriteCodepoint(utf8 codepoint) { if (!codepoint.HasValue) { WriteUint32(0); return; } WriteUint32((uint)utf8.ParseInt(codepoint, 16)); }
public void WriteCodepoint(utf8 codepoint, utf8 backup) { if (codepoint.HasValue) { WriteCodepoint(codepoint); return; } WriteCodepoint(backup); }
utf8 Bake() { var b = new utf8( new ArraySegment <byte>(stream.GetBuffer(), 0, (int)stream.Position)); Console.WriteLine(b.Length); Console.WriteLine("[{0}]", b); return(b); }
public void IterateAsciiRange() { uint[] expected = { 0x68, 0x65, 0x6c, 0x6c, 0x6f }; var it = new utf8("hello").GetEnumerator(); for (int i = 0; i < 5; i++) { Assert.IsTrue(it.MoveNext()); Assert.That(it.Current.Index, Is.EqualTo(i)); Assert.That(it.Current.Value, Is.EqualTo(expected[i])); } }
public void OnlySeparator() { var str = new utf8(";;;;;"); Assert.That(new Splitter(';', str).ToList(), Is.EqualTo(new List <utf8> { utf8.Empty, utf8.Empty, utf8.Empty, utf8.Empty, utf8.Empty, utf8.Empty, })); }
public void MultibyteCharsBytes() { var list = new utf8("“hello”").Bytes; Assert.That(list, Is.EqualTo(new List <byte> { 0xe2, 0x80, 0x9c, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0xe2, 0x80, 0x9d })); }
public void Simple() { var str = new utf8("Hi!\n"); var it = new ReverseUtf8Enumerator(str); it.MoveNext(); Assert.That(it.Current.Value, Is.EqualTo((int)'\n')); it.MoveNext(); Assert.That(it.Current.Value, Is.EqualTo((int)'!')); it.MoveNext(); Assert.That(it.Current.Value, Is.EqualTo((int)'i')); it.MoveNext(); Assert.That(it.Current.Value, Is.EqualTo((int)'H')); }
public void Split() { var str = new utf8(";a;b;cdef;;k;"); var target = new Splitter(';', str); var parts = target.ToList(); Assert.That(parts, Is.EqualTo(new List <utf8> { utf8.Empty, new utf8("a"), new utf8("b"), new utf8("cdef"), utf8.Empty, new utf8("k"), utf8.Empty })); }
public void IterateMultibyteUtf8ButSingleByteUtf16() { var str = new utf8("“hat”"); var it = str.GetEnumerator(); Assert.IsTrue(it.MoveNext()); Assert.That(it.Current.Value, Is.EqualTo(0x201c)); Assert.IsTrue(it.MoveNext()); Assert.That(it.Current.Value, Is.EqualTo(0x68)); Assert.IsTrue(it.MoveNext()); Assert.That(it.Current.Value, Is.EqualTo(0x61)); Assert.IsTrue(it.MoveNext()); Assert.That(it.Current.Value, Is.EqualTo(0x74)); Assert.IsTrue(it.MoveNext()); Assert.That(it.Current.Value, Is.EqualTo(0x201d)); Assert.IsFalse(it.MoveNext()); }
public void Split() { var str = new utf8("0000;<control>;Cc;0;BN;;;;;N;NULL;;;;"); var list = str.Split(new char[] { ';' }); Assert.That(list, Is.EqualTo(new utf8[] { new utf8("0000"), new utf8("<control>"), new utf8("Cc"), new utf8("0"), new utf8("BN"), utf8.Empty, utf8.Empty, utf8.Empty, utf8.Empty, new utf8("N"), new utf8("NULL"), utf8.Empty, utf8.Empty, utf8.Empty, utf8.Empty, })); }
public void DoThings(string[] args) { // Data file available at http://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt var data = new utf8(File.ReadAllBytes(args[0])); var lines = new Splitter('\n', data); var main = new FancyFile(new FileStream("chardata", FileMode.Create, FileAccess.Write)); var toUpper = new FancyFile(new FileStream("chartoupper", FileMode.Create, FileAccess.Write)); var toLower = new FancyFile(new FileStream("chartolower", FileMode.Create, FileAccess.Write)); var namesRaw = new FileStream("charnames", FileMode.Create, FileAccess.Write); var names = new Utf8Writer(namesRaw); // TODO normalization map int i = 0; long per = 0; foreach (var line in lines) { i++; if (line.IsEmpty) { continue; } var start = main.Position; //Console.WriteLine("writing {0} at offset {1}", i, main.Position); var parts = line.Split(separator); // Schema given in ftp://ftp.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.html // The current codepoint (eg U+00A2). main.WriteCodepoint(parts[0]); // The name of this codepoint (eg LATIN SMALL LETTER E WITH MACRON). main.WriteOffset(namesRaw.Position); names.Append(parts[1]); main.WriteOffset(namesRaw.Position); // The major category, like Ll or Sm main.WriteByte(ParseCategory(parts[2])); // Numeric value. main.WriteFloat(GetNumericValue(parts)); // Whether this thing is mirrored main.WriteByte((byte)(parts[9] == IsMirrored ? 1 : 0)); // Upper main.WriteCodepoint(parts[12], parts[0]); // Lower main.WriteCodepoint(parts[13], parts[0]); // Title main.WriteCodepoint(parts[14], parts[0]); if (parts[12].HasValue) { toUpper.WriteCodepoint(parts[0]); toUpper.WriteCodepoint(parts[12]); Console.WriteLine("({0}).ToUpper => {1}", parts[0], parts[12]); } if (parts[14].HasValue) { toLower.WriteCodepoint(parts[0]); toLower.WriteCodepoint(parts[14]); } var end = main.Position; if (per == 0) { per = end - start; } else { if (per != end - start) { throw new Exception($"at entry $i, expected $per bytes written; actual was ${end - start}"); } } } main.Flush(); main.Close(); namesRaw.Flush(); namesRaw.Close(); }
public extern static void mbSetCookie(mbWebView webView, [MarshalAs(UnmanagedType.CustomMarshaler, MarshalTypeRef = typeof(Tnelab.MiniBlinkV.Utf8Marshaler))] utf8 url, [MarshalAs(UnmanagedType.CustomMarshaler, MarshalTypeRef = typeof(Tnelab.MiniBlinkV.Utf8Marshaler))] utf8 cookie);//cookie格式必须是:Set-cookie: PRODUCTINFO=webxpress; domain=.fidelity.com; path=/; secure
static UnicodeCategory Category(utf8 str) { if (str == Lu) { return(UnicodeCategory.UppercaseLetter); } if (str == Ll) { return(UnicodeCategory.LowercaseLetter); } if (str == Lt) { return(UnicodeCategory.TitlecaseLetter); } if (str == Mn) { return(UnicodeCategory.NonSpacingMark); } if (str == Mc) { return(UnicodeCategory.SpacingCombiningMark); } if (str == Me) { return(UnicodeCategory.EnclosingMark); } if (str == Nd) { return(UnicodeCategory.DecimalDigitNumber); } if (str == Nl) { return(UnicodeCategory.LetterNumber); } if (str == No) { return(UnicodeCategory.OtherNumber); } if (str == Zs) { return(UnicodeCategory.SpaceSeparator); } if (str == Zl) { return(UnicodeCategory.LineSeparator); } if (str == Zp) { return(UnicodeCategory.ParagraphSeparator); } if (str == Cc) { return(UnicodeCategory.Control); } if (str == Cf) { return(UnicodeCategory.Format); } if (str == Cs) { return(UnicodeCategory.Surrogate); } if (str == Co) { return(UnicodeCategory.PrivateUse); } if (str == Cn) { return(UnicodeCategory.OtherNotAssigned); } if (str == Lm) { return(UnicodeCategory.ModifierLetter); } if (str == Lo) { return(UnicodeCategory.OtherLetter); } if (str == Pc) { return(UnicodeCategory.ConnectorPunctuation); } if (str == Pd) { return(UnicodeCategory.DashPunctuation); } if (str == Ps) { return(UnicodeCategory.OpenPunctuation); } if (str == Pe) { return(UnicodeCategory.ClosePunctuation); } if (str == Pi) { return(UnicodeCategory.InitialQuotePunctuation); } if (str == Pf) { return(UnicodeCategory.FinalQuotePunctuation); } if (str == Po) { return(UnicodeCategory.OtherPunctuation); } if (str == Sm) { return(UnicodeCategory.MathSymbol); } if (str == Sc) { return(UnicodeCategory.CurrencySymbol); } if (str == Sk) { return(UnicodeCategory.ModifierSymbol); } if (str == So) { return(UnicodeCategory.OtherSymbol); } throw new ArgumentOutOfRangeException("invalid unicode category " + str.ToString()); }
// --- public static utf8 GetString(this JToken self, utf8 json) { // @TODO ensure self is string //Ensure(self.Type == JType.String); return(json.Substring(self.Start, self.End - self.Start)); }
public extern static mbWebUrlRequestPtr mbNetCreateWebUrlRequest([MarshalAs(UnmanagedType.CustomMarshaler, MarshalTypeRef = typeof(Tnelab.MiniBlinkV.Utf8Marshaler))] utf8 url, [MarshalAs(UnmanagedType.CustomMarshaler, MarshalTypeRef = typeof(Tnelab.MiniBlinkV.Utf8Marshaler))] utf8 method, [MarshalAs(UnmanagedType.CustomMarshaler, MarshalTypeRef = typeof(Tnelab.MiniBlinkV.Utf8Marshaler))] utf8 mime);
public extern static void mbLoadHtmlWithBaseUrl(mbWebView webView, [MarshalAs(UnmanagedType.CustomMarshaler, MarshalTypeRef = typeof(Tnelab.MiniBlinkV.Utf8Marshaler))] utf8 html, [MarshalAs(UnmanagedType.CustomMarshaler, MarshalTypeRef = typeof(Tnelab.MiniBlinkV.Utf8Marshaler))] utf8 baseUrl);
public extern static void mbLoadURL(mbWebView webView, [MarshalAs(UnmanagedType.CustomMarshaler, MarshalTypeRef = typeof(Tnelab.MiniBlinkV.Utf8Marshaler))] utf8 url);
public extern static void mbSetUserAgent(mbWebView webView, utf8 userAgent);
public extern static void mbResponseQuery(mbWebView webView, int64_t queryId, int customMsg, [MarshalAs(UnmanagedType.CustomMarshaler, MarshalTypeRef = typeof(Tnelab.MiniBlinkV.Utf8Marshaler))] utf8 response);
public extern static void mbNetAddHTTPHeaderFieldToUrlRequest(mbWebUrlRequestPtr request, [MarshalAs(UnmanagedType.CustomMarshaler, MarshalTypeRef = typeof(Tnelab.MiniBlinkV.Utf8Marshaler))] utf8 name, [MarshalAs(UnmanagedType.CustomMarshaler, MarshalTypeRef = typeof(Tnelab.MiniBlinkV.Utf8Marshaler))] utf8 value);
public extern static void mbRunJs(mbWebView webView, mbWebFrameHandle frameId, [MarshalAs(UnmanagedType.CustomMarshaler, MarshalTypeRef = typeof(Tnelab.MiniBlinkV.Utf8Marshaler))] utf8 script, bool isInClosure, mbRunJsCallback callback, IntPtr param, IntPtr unuse);
public extern static mbJsValue mbRunJsSync(mbWebView webView, mbWebFrameHandle frameId, [MarshalAs(UnmanagedType.CustomMarshaler, MarshalTypeRef = typeof(Tnelab.MiniBlinkV.Utf8Marshaler))] utf8 script, bool isInClosure);
public static int ParseJson(utf8 json, JToken *tokens, int numberOfTokens) { var parser = new JParser { Position = 0, SuperToken = -1, NextToken = 0, Count = 0 }; int count = parser.NextToken; for (; parser.Position < json.Length; parser.Position++) { byte c = json[parser.Position]; switch (c) { case ObjectOpen: case ArrayOpen: { count += 1; var tok = Allocate(ref parser, tokens, numberOfTokens); if (parser.SuperToken != -1) { tokens[parser.SuperToken].Size += 1; tok->Parent = parser.SuperToken; } tok->Type = c == ObjectOpen ? JType.Object : JType.Array; tok->Start = parser.Position; parser.SuperToken = parser.NextToken - 1; } break; case ObjectClose: case ArrayClose: { if (parser.NextToken < 1) { return(0); //throw new System.Exception("ERROR"); } var type = c == ObjectClose ? JType.Object : JType.Array; var tok = &tokens[parser.NextToken - 1]; for (;;) { if (tok->Start != -1 && tok->End == -1) { if (tok->Type != type) { return(0); //throw new System.Exception("ERROR"); } tok->End = parser.Position + 1; parser.SuperToken = tok->Parent; break; } if (tok->Parent == -1) { if (tok->Type != type || parser.SuperToken == -1) { return(0); //throw new System.Exception("ERROR"); } } tok = &tokens[tok->Parent]; } } break; case DoubleQuote: ParseString(ref parser, json, tokens, numberOfTokens); count += 1; if (parser.SuperToken != -1) { tokens[parser.SuperToken].Size += 1; } break; case Comma: if (parser.SuperToken != -1 && tokens[parser.SuperToken].Type != JType.Array && tokens[parser.SuperToken].Type != JType.Object) { parser.SuperToken = tokens[parser.SuperToken].Parent; } break; case Colon: parser.SuperToken = parser.NextToken - 1; break; case LF: case CR: case TAB: case SPACE: break; default: ParsePrimitive(ref parser, json, tokens, numberOfTokens); count += 1; if (parser.SuperToken != -1) { tokens[parser.SuperToken].Size += 1; } break; } } for (int i = parser.NextToken - 1; i >= 0; i--) { if (tokens[i].Start != -1 && tokens[i].End == -1) { return(0); //throw new System.Exception("ERROR"); } } return(count); }
byte ParseCategory(utf8 str) { return((byte)Category(str)); }