private static void CheckJsonEquivalent(StringTokens tokens, TokenPredictor l, TokenPredictor r) { var lString = l.ToJson(tokens); var rString = r.ToJson(tokens); Assert.AreEqual(lString, rString); }
/// <summary> /// Convert predictor to JSON. /// </summary> /// <param name="predictor"></param> /// <param name="tokens"></param> /// <returns></returns> public static string ToJson(this TokenPredictor predictor, StringTokens tokens) { var utf8 = predictor.ToJsonUtf8(tokens); var json = Encoding.UTF8.GetString(utf8); return(json); }
/// <summary> /// Convert predictor to JSON. /// </summary> /// <param name="predictor"></param> /// <param name="tokens"></param> /// <returns></returns> public static byte[] ToJsonUtf8(this TokenPredictor predictor, StringTokens tokens) { var dictionary = predictor.ToJsonDictionary(tokens); var utf8 = JsonSerializer.SerializeToUtf8Bytes(dictionary); return(utf8); }
public void CreateAlphabetTest() { var tokens = new StringTokens(); var predictor = new TokenPredictor(3); List <int> alphabet = GetAlphabet(tokens); predictor.AddSequence(alphabet, 1); var json = predictor.ToJson(tokens); Assert.AreEqual(AlphabetJson, json); }
private static List <int> GetAlphabet(StringTokens tokens) { var alphabet = new List <int>(); alphabet.Add(0); for (var letter = 'A'; letter <= 'H'; letter++) { var token = tokens.GetToken(letter.ToString()); alphabet.Add(token); } return(alphabet); }
public void CreateAlphabetLetterByLetterTest() { var tokens = new StringTokens(); var predictor = new TokenPredictor(3); var alphabet = GetAlphabet(tokens); var incrementalAlphabet = new List <int>(); foreach (var token in alphabet) { incrementalAlphabet.Add(token); predictor.AddSequenceTail(incrementalAlphabet, 1); } var json = predictor.ToJson(tokens); Assert.AreEqual(AlphabetJson, json); }
public void BuildSeedPredictor() { var tokens = new StringTokens(); var predictor1 = new TokenPredictor(3); var predictor2 = new TokenPredictor(3); var predictor3 = new TokenPredictor(3); var predictor1plus2 = predictor1.CreateCopy(); CheckJsonEquivalent(tokens, predictor1plus2, predictor1); predictor1plus2.Add(predictor2); CheckJsonEquivalent(tokens, predictor1plus2, predictor3); var predictor3minus2 = predictor3.CreateCopy(); CheckJsonEquivalent(tokens, predictor3minus2, predictor3); predictor3minus2.Subtract(predictor2); CheckJsonEquivalent(tokens, predictor3minus2, predictor1); predictor3minus2.Subtract(predictor1); var predictor0 = predictor1.CreateEmpty(); CheckJsonEquivalent(tokens, predictor3minus2, predictor0); }
public void ConstructionTest() { var words = DefaultWriterEnvironment.Instance.GetOrderedSeedWords(); var tokens = StringTokens.Create(words); Assert.IsNotNull(tokens); for (var index = tokens.TokenLimit - 1; tokens.TokenStart <= index; index--) { var str = tokens[index]; var token = tokens[str]; Assert.AreEqual(index, token); var roundTrip = tokens[token]; Assert.AreEqual(str, roundTrip); } var oldTokenLimit = tokens.TokenLimit; var newToken = tokens.GetToken("new lowercase string"); Assert.AreEqual(oldTokenLimit, newToken); Assert.AreEqual(oldTokenLimit + 1, tokens.TokenLimit); }
public LexerToken NextToken() { if (TokenCache != null && TokenCache.Kind != LexerTokenKindEnum.UNKNOWN) { LexerToken tok = TokenCache; TokenCache = new LexerToken(); return(tok); } Begin = Begin.TrimStart(); if (String.IsNullOrEmpty(Begin)) { return(new LexerToken(LexerTokenKindEnum.END_REACHED)); } switch (Begin[0]) { case '/': return(new LexerToken(LexerTokenKindEnum.TOK_SLASH)); case '-': return(new LexerToken(LexerTokenKindEnum.TOK_DASH)); case '.': return(new LexerToken(LexerTokenKindEnum.TOK_DOT)); default: break; } // If the first character is a digit, try parsing the whole argument as a // date using the typical date formats. This allows not only dates like // "2009/08/01", but also dates that fit the user's --input-date-format, // assuming their format fits in one argument and begins with a digit. if (Char.IsDigit(Begin[0])) { int pos = Begin.IndexOf(" "); string possibleDate = Begin.Substring(0, pos > 0 ? pos : Begin.Length); try { DateTraits dateTraits; Date when = TimesCommon.Current.ParseDateMask(possibleDate, out dateTraits); if (!when.IsNotADate()) { Begin = Begin.Substring(possibleDate.Length); return(new LexerToken(LexerTokenKindEnum.TOK_DATE, new BoostVariant(new DateSpecifier(when, dateTraits)))); } } catch (DateError) { if (possibleDate.IndexOfAny(PossibleDateContains) >= 0) { throw; } } } string start = Begin; bool alNul = Char.IsLetterOrDigit(Begin[0]); int pos1 = 0; while (pos1 < Begin.Length && !Char.IsWhiteSpace(Begin[pos1]) && ((alNul && Char.IsLetterOrDigit(Begin[pos1])) || (!alNul && !Char.IsLetterOrDigit(Begin[pos1])))) { pos1++; } string term = Begin.Substring(0, pos1); Begin = Begin.Substring(pos1); if (!String.IsNullOrEmpty(term)) { if (Char.IsDigit(term[0])) { return(new LexerToken(LexerTokenKindEnum.TOK_INT, new BoostVariant(Int32.Parse(term)))); } else if (Char.IsLetter(term[0])) { term = term.ToLower(); MonthEnum?month = StringToMonthsOfYear(term); if (month != null) { return(new LexerToken(LexerTokenKindEnum.TOK_A_MONTH, new BoostVariant(month.Value))); } DayOfWeek?wday = StringToDayOfWeek(term); if (wday != null) { return(new LexerToken(LexerTokenKindEnum.TOK_A_WDAY, new BoostVariant(wday.Value))); } LexerTokenKindEnum stringTokenEnum; if (StringTokens.TryGetValue(term, out stringTokenEnum)) { return(new LexerToken(stringTokenEnum)); } } else { LexerToken.Expected(default(char), term[0]); Begin = start.Substring(1); } } else { LexerToken.Expected(default(char), term[0]); } return(new LexerToken(LexerTokenKindEnum.UNKNOWN, new BoostVariant(term))); }