// in data state, gather characters until a character reference or tag is found internal override void Read(Tokeniser t, CharacterReader r) { switch (r.Current()) { case '&': t.AdvanceTransition(TokeniserState.CharacterReferenceInData); break; case '<': t.AdvanceTransition(TokeniserState.TagOpen); break; case TokeniserState.nullChar: t.Error(this); // NOT replacement character (oddly?) t.Emit(r.Consume()); break; case TokeniserState.eof: t.Emit(new Token.EOF()); break; default: string data = r.ConsumeToAny('&', '<', TokeniserState.nullChar); t.Emit(data); break; } }
public NullToken(Tokeniser tokeniser, ObjectId?objectId, bool isErrorNull = false) : base(tokeniser, objectId) { //null if (!isErrorNull) { var b = tokeniser.SkipWhiteSpace(); if (b == 'n') { b = tokeniser.GetNextByte(); if (b == 'u') { b = tokeniser.GetNextByte(); if (b == 'l') { b = tokeniser.GetNextByte(); if (b == 'l') { tokeniser.GetNextByte(); tokeniser.ValidateDelimiter(Tokeniser.ErrorEnum.Bool); return; } } } } throw tokeniser.Exception($"Null token not valid, should be 'null'. Invalid character: {(char)b}"); } }
public void TestTokeniseFirstMatchWins() { var context = new Context(ExpressiveOptions.None); var tokeniser = new Tokeniser(context, new[] { CreateTokenExtractor("hello", context), CreateTokenExtractor("world", context), CreateTokenExtractor("hello world", context) }); var tokens = tokeniser.Tokenise("hello world"); Assert.IsTrue(tokens.Count == 2); Assert.AreEqual("hello", tokens.First().CurrentToken); Assert.AreEqual("world", tokens.Last().CurrentToken); tokeniser = new Tokeniser(context, new[] { CreateTokenExtractor("hello world", context), CreateTokenExtractor("hello", context), CreateTokenExtractor("world", context) }); tokens = tokeniser.Tokenise("hello world"); Assert.IsTrue(tokens.Count == 1); Assert.AreEqual("hello world", tokens.First().CurrentToken); }
public void TestAnalyse() { Tokeniser tokeniser = new Tokeniser( new CharacterTokenBuilder(), new WordTokenBuilder(), new SentenceTokenBuilder(), new NumberTokenBuilder()); TextAnalyser textAnalyser = new TextAnalyser(tokeniser); var mostFrecCharMetric = new MostFrequentCharacterMetric(); var wordsCountMetric = new WordsCountMetric(); var exclamationSentenceCountMetric = new ExclamationSentenceCountMetric(); var numbersSumMetric = new NumbersSumMetric(); textAnalyser.Analyse("Hello World! 1 Lorem ipsum 49... Lorem ipsum!! Dolor 23 sit amet? Consectetur adipiscing elit.", mostFrecCharMetric, wordsCountMetric, exclamationSentenceCountMetric, numbersSumMetric); Assert.Equal('e', mostFrecCharMetric.Character); Assert.Equal(7, mostFrecCharMetric.OccurenciesCount); Assert.Equal(15, wordsCountMetric.Count); Assert.Equal(2, exclamationSentenceCountMetric.Count); Assert.Equal(73, numbersSumMetric.Sum); }
private (IEnumerable <Token> tokens, ulong value) ParseHex(string firstValue) { var tokens = new List <Token>(); var valueBuilder = new StringBuilder(); if (firstValue.Length > 1) { valueBuilder.Append(firstValue.Substring(1, firstValue.Length - 1)); } var allowedLetters = "abcdef".ToCharArray(); foreach (var token in Tokeniser.TakeSequentialOfType(TokenType.Word, TokenType.Number)) { if (token.TokenType == TokenType.Word) { if (token.Value.Any(c => !allowedLetters.Contains(c))) { throw new InvalidLiteralException(Tokeniser.CurrentPosition); } } tokens.Add(token); valueBuilder.Append(token.Value); } var valueStr = valueBuilder.ToString(); if (!ulong.TryParse(valueStr, NumberStyles.HexNumber, CultureInfo.InvariantCulture, out var value)) { throw new InvalidLiteralException(Tokeniser.CurrentPosition); } return(tokens.AsEnumerable(), value); }
public Unit Parse(Token token) { var name = Parser.ParseUnitOfType <NameUnit>(allowSingleUnitTransform: false); var rightSquare = Tokeniser.TakeOfType(TokenType.RightSquareBracket); return(new SectionUnit(name, new [] { token }.Concat(name.SourceTokens).Append(rightSquare))); }
public static void Run() { Tokeniser tokeniser = new Tokeniser(Console.ReadLine()); List <Token> tokens = tokeniser.Tokenise().ToList(); TreeNode bin1 = TreeBuilder.BuildAST(tokens); // e.g "5 * 2 + 1" -> "5 2 * 1 +" // Using TreeNode type, not BinOp (Binary Operator) as we cannot guarantee the root node of the abstract syntax tree will be an operator. Console.WriteLine("To postfix:"); foreach (TreeNode node in Traversal.postOrder(bin1)) { Console.Write(node.value + " "); } Console.WriteLine("\nTo infix:"); foreach (TreeNode node in Traversal.inOrder(bin1)) { Console.Write(node.value + " "); } Console.WriteLine("\nTo prefix:"); foreach (TreeNode node in Traversal.preOrder(bin1)) { Console.Write(node.value + " "); } Console.WriteLine(); // Now using reverse polish notation, calculate what the result is. This takes in a postfix-ordered list of TreeNodes. Console.WriteLine("Answer: " + RPN.Evaluate(Traversal.postOrder(bin1))); }
public PartListImpl ParseBody(Tokeniser tk, string mt) { // Body -> Term {, Term...} PartListImpl p = new PartListImpl(); var i = 0; Term t; while ((t = (Term)ParseTerm(tk, mt)) != null) { p.AddPart(t); i++; if (tk.current != ",") { break; } tk.consume(); } if (i == 0) { return(null); } return(p); }
private (IEnumerable <Token> tokens, ulong value) ParseBinary() { var tokens = new List <Token>(); var valueBuilder = new StringBuilder(); var bin = Tokeniser.TakeOfType(TokenType.Number); if (bin.Value.Any(d => d != '0' && d != '1')) { throw new InvalidLiteralException(Tokeniser.CurrentPosition); } tokens.Add(bin); valueBuilder.Append(bin.Value); var valueStr = valueBuilder.ToString(); ulong value; try { value = Convert.ToUInt64(valueStr, 2); } catch (OverflowException e) { throw new InvalidLiteralException(Tokeniser.CurrentPosition, e); } catch (FormatException e) { throw new InvalidLiteralException(Tokeniser.CurrentPosition, e); } return(tokens.AsEnumerable(), value); }
public void RejectsIntegersContainingStrings() { using var t = new Tokeniser("123four5"); Token token = t.GetNextToken(); token.Type.Should().NotBe(NodeType.IntegerConstant); }
public void Process() { foreach (TFIDF_Document art in documentList.Values) { Tokeniser tokeniser = new Tokeniser(); Token[] tokens = tokeniser.Partition(art.Content); Punctuation_Remover pRemover = new Punctuation_Remover(); tokens = pRemover.MarkPunc(tokens); tokens = Stemmer.Stem(tokens); Stopword_Remover swRemover = new Stopword_Remover(); tokens = swRemover.RemoveStopwords(tokens); Number_Remover nRemover = new Number_Remover(); tokens = nRemover.RemoveNumber(tokens); foreach (Token t in tokens) { if (t.WordType == Token.WORDTYPE.DEFAULT) { t.WordType = Token.WORDTYPE.REGULAR; } } art.SetToken(tokens); art.CalTF(); } CalIDF(); foreach (TFIDF_Document art in documentList.Values) { art.CalTFIDF(inverseDocumentFrequency); } graph.InitializeGraph(documentList.Values.ToArray()); }
internal ExpressionParser(Context context) { this.context = context; this.tokeniser = new Tokeniser( this.context, new List <ITokenExtractor> { new KeywordTokenExtractor(this.context.FunctionNames), new KeywordTokenExtractor(this.context.OperatorNames), // Variables new ParenthesisedTokenExtractor('[', ']'), new NumericTokenExtractor(), // Dates new ParenthesisedTokenExtractor('#'), new ValueTokenExtractor(","), new ParenthesisedTokenExtractor('"'), new ParenthesisedTokenExtractor('\''), // TODO: Probably a better way to achieve this. new ValueTokenExtractor("true"), new ValueTokenExtractor("TRUE"), new ValueTokenExtractor("false"), new ValueTokenExtractor("FALSE"), new ValueTokenExtractor("null"), new ValueTokenExtractor("NULL") }); }
public static void Main(string[] args) { Tokeniser t = new Tokeniser(); t.ParseRules(File.ReadAllText("CalcDemo.saneparse")); Console.Write(t.Run("(1+1)+1")); }
private void MyInit() { ISimilarity editdistance = new Leven(); getSimilarity = new Similarity(editdistance.GetSimilarity); //ISimilarity lexical=new LexicalSimilarity() ; //getSimilarity=new Similarity(lexical.GetSimilarity) ; Tokeniser tokeniser = new Tokeniser(); _leftTokens = tokeniser.Partition(_lString); _rightTokens = tokeniser.Partition(_rString); if (_leftTokens.Length > _rightTokens.Length) { string[] tmp = _leftTokens; _leftTokens = _rightTokens; _rightTokens = tmp; string s = _lString; _lString = _rString; _rString = s; } leftLen = _leftTokens.Length - 1; rightLen = _rightTokens.Length - 1; Initialize(); }
protected override Response ProcessInternal(Engine engine, Tokeniser tokens) { var ego = engine.GameState.Ego; // get list of potential things to look at from the tokens var possibles = tokens.Unrecognised; if (possibles.Count < 1) { // nothing provided to examine return(new Response("Examine what?")); } var response = new Response(); foreach (var token in possibles) { if (engine.GameState.IsValidItem(token.Word)) { var item = engine.GameState.GetItem(token.Word); if (item.IsExaminable && (ego.CurrentRoom.HasItem(item) || ego.IsCarrying(item))) { // an item in the current room or inventory response.AddMessage(item.Examine(true)); response.Merge(engine.RunOccurrences(new Examine.Trigger(item))); return(response); } } if (engine.GameState.IsValidNpc(token.Word)) { var npc = engine.GameState.GetNpc(token.Word); if (npc.IsExaminable && ego.CurrentRoom.HasNpc(npc)) { // an npc in the current room response.AddMessage(npc.Examine(true)); response.Merge(engine.RunOccurrences(new Examine.Trigger(npc))); return(response); } } if (ego.CurrentRoom.HasFeature(token.Word)) { // a feature of the current room var feature = ego.CurrentRoom.GetFeature(token.Word); if (feature.IsExaminable) { response.AddMessage(feature.Examine(true)); response.Merge(engine.RunOccurrences(new Examine.Trigger(feature))); return(response); } } } // item/npc not in inventory or current room return(new Response("I can't examine that.")); }
public void TestTokeniseWithNull() { var tokeniser = new Tokeniser(new Context(ExpressiveOptions.None), Enumerable.Empty <ITokenExtractor>()); Assert.IsNull(tokeniser.Tokenise(null)); Assert.IsNull(tokeniser.Tokenise(string.Empty)); }
protected override Response ProcessInternal(Engine engine, Tokeniser tokens) { var ego = engine.GameState.Ego; if (ego.Inventory.Count == 0) { return(new Response("You are not carrying any items.")); } var sb = new StringBuilder(); sb.Append("You are carrying:" + Environment.NewLine + " "); int i = 0; foreach (var item in ego.Inventory) { // show at most six items per line sb.Append(item.Title + " "); if ((++i % 6) == 0) { sb.Append(Environment.NewLine + " "); } } // append total weight carried sb.Append(Environment.NewLine + "Weighing: " + ego.Inventory.TotalWeight); var response = new Response(sb.ToString()); response.Merge(engine.RunOccurrences(new Inventory.Trigger())); return(response); }
/// handles data in title, textarea etc internal override void Read(Tokeniser t, CharacterReader r) { switch (r.Current()) { case '&': t.AdvanceTransition(TokeniserState.CharacterReferenceInRcdata); break; case '<': t.AdvanceTransition(TokeniserState.RcdataLessthanSign); break; case TokeniserState.nullChar: t.Error(this); r.Advance(); t.Emit(TokeniserState.replacementChar); break; case TokeniserState.eof: t.Emit(new Token.EOF()); break; default: string data = r.ConsumeToAny('&', '<', TokeniserState.nullChar); t.Emit(data); break; } }
/// <summary> /// Merge program from ascii or utf8 (if utf8_files is True) stream. /// </summary> /// <param name="stream">ASCII program stream to be merged</param> private void Merge(Stream stream) { while (true) { var line = stream.ReadLine(); if (string.IsNullOrEmpty(line)) { break; } var tokenizedLine = Tokeniser.Tokenise(line); if (tokenizedLine.Length > 0 && tokenizedLine[0] == '\0') { // line starts with a number, add to program memory; store_line seeks to 1 first StoreLine(tokenizedLine.AsStream()); } else { // we have read the : var next = stream.SkipWhitespace(); if (next != -1 && next != '\0') { throw new ReplRuntimeException(ReplExceptionCode.DirectStatementInFile); } } } }
public void TestTokinise(string text, string[] expectedWordTokens, long[] expectedWordTokenPositions, string[] expectedSentenceTokens, long[] expectedSenteceTokenPositions) { Tokeniser tokeniser = new Tokeniser( new CharacterTokenBuilder(), new WordTokenBuilder(), new SentenceTokenBuilder() ); IList <Token> tokens; using (MemoryStream ms = new MemoryStream(Encoding.UTF8.GetBytes(text))) using (StreamReader sr = new StreamReader(ms)) { tokens = tokeniser.Tokenise(sr).Select(t => (Token)t.Clone()).ToList(); } Assert.Equal(expectedWordTokens, tokens.OfType <WordToken>().Select(t => t.Value)); Assert.Equal(expectedWordTokenPositions, tokens.OfType <WordToken>().Select(t => t.Position)); Assert.Equal(expectedSentenceTokens, tokens.OfType <SentenceToken>().Select(t => t.Value)); Assert.Equal(expectedSenteceTokenPositions, tokens.OfType <SentenceToken>().Select(t => t.Position)); }
public Unit Parse(Token token) { var commentBuilder = new StringBuilder(); while (true) { while (Tokeniser.Peek().TokenType != TokenType.Newline) { commentBuilder.Append(Tokeniser.Take().Value); } // take the trailing newline Tokeniser.Take(); var temp = Tokeniser.Peek(); if (temp.TokenType == TokenType.Semicolon || temp.TokenType == TokenType.Hash) { // take the comment mark Tokeniser.Take(); commentBuilder.Append("\n"); continue; } break; } var unit = Parser.ParseUnit(); unit.Comment = commentBuilder.ToString(); return(unit); }
public string[] GetLines(int startLine, int endLine) { if (_protected) { throw new ReplRuntimeException(ReplExceptionCode.IllegalFunctionCall); } // 65529 is max insertable line number for GW-BASIC 3.23. // however, 65530-65535 are executed if present in tokenised form. // in GW-BASIC, 65530 appears in LIST, 65531 and above are hidden // sort by positions, not line numbers! var linesByPostion = _lineNumberMap.Where(x => x.Key >= startLine && x.Key <= endLine) .Select(x => x.Value).OrderBy(x => x); var lines = new List <string>(); var current = Bytecode.Position; foreach (var position in linesByPostion) { Bytecode.Seek(position + 1, SeekOrigin.Begin); lines.Add(Tokeniser.DetokeniseLine(Bytecode).Text); } Bytecode.Seek(current, SeekOrigin.Begin); return(lines.ToArray()); }
protected override Response ProcessInternal(Engine engine, Tokeniser tokens) { var ego = engine.GameState.Ego; var possibles = tokens.Unrecognised; if (possibles.Count > 0) { foreach (var token in possibles) { if (engine.GameState.IsValidItem(token.Word)) { var item = engine.GameState.GetItem(token.Word); if (ego.IsCarrying(item)) { // remove the item from player inv and put it in the current room ego.Inventory.RemoveItem(item); ego.CurrentRoom.AddItem(item); var response = engine.RunOccurrences(new Drop.Trigger(item)); if (!response.HasMessage) { response.AddMessage("You carefully drop the " + item.Title + "."); } return(response); } } } } return(new Response("Put down what?")); }
public ArrayToken(Tokeniser tokeniser, ObjectId?objectId) : base(tokeniser, objectId) { //[/someName false -0 (string)] this.tokeniser = tokeniser; tokens = new List <Token>(); var b = tokeniser.SkipWhiteSpace(); if (b != '[') { throw tokeniser.Exception($"illegal array format, leading character '[' expected but was {(char)b}."); } b = tokeniser.GetNextByte(); while (b != ']') { var token = tokeniser.GetNextToken(isThrowExceptionWhenError: false); if (token != null) { tokens.Add(token); b = tokeniser.SkipWhiteSpace(); } else { b = tokeniser.GetByte(); if (b != ']') { throw tokeniser.Exception($"NextToken(): unexpected character '{(char)b}'."); } //we come here when array is empty but has some whitespace } } tokeniser.GetNextByte(); }
public DictionaryToken(Tokeniser tokeniser, ObjectId?objectId) : base(tokeniser, objectId) { // << // /Name1 123 // /Name2 [(string) (array) 123] // /Name3 <</subDictionaryName1 123 /subDictionaryName2 true>> // /Name4 (another string) // /Name5 <112233EE> // >> this.tokeniser = tokeniser; var b = tokeniser.SkipWhiteSpace(); if (b != '<' || tokeniser.GetNextByte() != '<') { throw tokeniser.Exception($"illegal dictionary format, leading characters '<<' expected, but was'{(char)b}{(char)tokeniser.LookaheadByte()}'."); } //parse key tokens = new Dictionary <string, Token>(); tokeniser.GetNextByte(); b = tokeniser.SkipWhiteSpace(); while (b != '>' && tokeniser.LookaheadByte() != '>') { if (b != '/') { throw tokeniser.Exception($"Invalid dictionary format, '/' expected as leading character for dictionary key name, but was {(char)b}."); } var key = new NameToken(tokeniser, null); var value = tokeniser.GetNextToken(); if (key.Value == "Type" && value is NameToken typeNameToken) { Type = typeNameToken.Value; } if (tokens.TryGetValue(key.Value, out var existingToken)) { if (existingToken is ArrayToken existingArrayToken) { existingArrayToken.Add(value); } else { tokens[key.Value] = new ArrayToken(tokeniser, existingToken) { value }; } } else { tokens.Add(key.Value, value); } b = tokeniser.SkipWhiteSpace(); } tokeniser.GetNextByte(); tokeniser.GetNextByte(); StreamStartIndex = tokeniser.GetStreamStartIndex(this, out var length); Length = length; keys = tokens.Keys.ToArray(); }
public void TestReplace_StringWithNoTokens() { var dictionary = new Dictionary<string, string>(); var tokeniser = new Tokeniser(); TestReplace(tokeniser, dictionary, "some text.", "some text."); }
public void WalksThroughTheInput() { using var t = new Tokeniser("static field let"); t.GetNextToken().Value.Should().Be("static"); t.GetNextToken().Value.Should().Be("field"); t.GetNextToken().Value.Should().Be("let"); }
private (IEnumerable <Token> tokens, object value) ParseNumeric(string firstValue) { var tokens = new List <Token>(); var valueBuilder = new StringBuilder(firstValue); var isDouble = false; object value; if (firstValue == "-") { var numberToken = Tokeniser.TakeOfType(TokenType.Number); tokens.Add(numberToken); valueBuilder.Append(numberToken.Value); } if (Tokeniser.Peek().TokenType == TokenType.Period) { var periodToken = Tokeniser.Take(); tokens.Add(periodToken); valueBuilder.Append(periodToken.Value); Token decimalToken; try { decimalToken = Tokeniser.TakeOfType(TokenType.Number); } catch (InvalidTokenException e) { throw new InvalidLiteralException(Tokeniser.CurrentPosition, e); } tokens.Add(decimalToken); valueBuilder.Append(decimalToken.Value); isDouble = true; } var valueStr = valueBuilder.ToString(); // the ternary expression messes up the typing and causes invalid casting // ReSharper disable once ConvertIfStatementToConditionalTernaryExpression if (isDouble) { if (!double.TryParse(valueStr, out var temp)) { throw new InvalidLiteralException(Tokeniser.CurrentPosition); } value = temp; } else { if (!long.TryParse(valueStr, out var temp)) { throw new InvalidLiteralException(Tokeniser.CurrentPosition); } value = temp; } return(tokens.AsEnumerable(), value); }
protected override Response ProcessInternal(Engine engine, Tokeniser tokens) { var ego = engine.GameState.Ego; var possibles = tokens.Unrecognised; if (possibles.Count > 0) { Npc npc = null; Item item = null; foreach (var token in possibles) { if (engine.GameState.IsValidItem(token.Word)) { item = engine.GameState.GetItem(token.Word); } if (engine.GameState.IsValidNpc(token.Word)) { npc = engine.GameState.GetNpc(token.Word); } // if we've found candidates for both, we can stop looking further if (item != null && npc != null) { break; } } if (item == null && npc == null) { return(new Response("Give who what?")); } if (item == null) { return(new Response("Give " + npc.Name + " what?")); } if (npc == null) { return(new Response("Give the " + item.Name + " to whom?")); } engine.GameState.Ego.Inventory.RemoveItem(item); npc.Inventory.AddItem(item); var response = engine.RunOccurrences(new Give.Trigger(npc, item)); if (!response.HasMessage) { response.AddMessage("You give the " + item.Name + " to " + npc.Name); } return(response); } return(new Response("Give what to whom?")); }
public void DoesNotGetFooledByDoubleSlashesInStringLiterals() { using var t = new Tokeniser("return \"// this is not a comment\";\n"); t.GetNextToken().Value.Should().Be("return"); t.GetNextToken().Value.Should().Be("// this is not a comment"); t.GetNextToken().Value.Should().Be(";"); t.GetNextToken().Should().BeNull(); }
public void RecognisesStraightforwardIdentifiers() { using var t = new Tokeniser("counter"); Token token = t.GetNextToken(); token.Type.Should().Be(NodeType.Identifier); token.Value.Should().Be("counter"); }
public void IgnoresLineCommentsAtTheEndOfLines() { using var t = new Tokeniser("return; // this is a comment\n}"); t.GetNextToken().Value.Should().Be("return"); t.GetNextToken().Value.Should().Be(";"); t.GetNextToken().Value.Should().Be("}"); t.GetNextToken().Should().BeNull(); }
public void RecognisesMoreComplexIdentifiers() { using var t = new Tokeniser("first_3_entries"); Token token = t.GetNextToken(); token.Type.Should().Be(NodeType.Identifier); token.Value.Should().Be("first_3_entries"); }
public void SkipsOverWhitespace() { using var t = new Tokeniser(" static field\nlet \r\n\tclass"); t.GetNextToken().Value.Should().Be("static"); t.GetNextToken().Value.Should().Be("field"); t.GetNextToken().Value.Should().Be("let"); t.GetNextToken().Value.Should().Be("class"); }
public void ConstructorSpecification() { Tokeniser tokeniser = null; "Given a new Tokeniser".Context(() => tokeniser = new Tokeniser()); "CurrentPosition is SourceLocation.None".Assert(() => tokeniser.CurrentPosition.ShouldBe(SourceLocation.None)); }
public void TestReplace_StringWithOneToken_FoundInDictionary() { var dictionary = new Dictionary<string, string>(); dictionary.Add("tokenName", "tokenised"); var tokeniser = new Tokeniser(); TestReplace(tokeniser, dictionary, "some [%tokenName%] text.", "some tokenised text."); TestReplace(tokeniser, dictionary, "[%tokenName%]", "tokenised"); }
public void ReadTokenSpecification_ForVariableDeclaration() { Tokeniser tokeniser = null; "Given new Tokeniser initialised with \"var\"".Context(() => tokeniser = new Tokeniser()); "with Initialise() called".Do(() => tokeniser.Initialize(new StringReader("var"))); "ReadToken returns Token where Type is TokenTypes.Var".Assert(() => tokeniser.ReadToken().Type.ShouldBe(TokenTypes.Var)); "ReadToken returns Token where SourceSpan is 1,1-1,3".Assert(() => tokeniser.ReadToken().SourceSpan.ShouldBe(new SourceSpan(new SourceLocation(0, 1, 1), new SourceLocation(0, 1, 3)))); }
public void ReadTokenSpecification_ForEmptyString() { Tokeniser tokeniser = null; "Given new Tokeniser initialised with empty string".Context(() => tokeniser = new Tokeniser()); "with Initialise() called".Do(() => tokeniser.Initialize(new StringReader(string.Empty))); "ReadToken returns Token where Type is TokenTypes.EndOfFile".Assert(() => tokeniser.ReadToken().Type.ShouldBe(TokenTypes.EndOfFile)); "ReadToken returns Token where SourceSpan is SourceSpan.None".Assert(() => tokeniser.ReadToken().SourceSpan.ShouldBe(SourceSpan.None)); }
public void InitializeSpecification() { var reader = new StringReader(""); Tokeniser tokeniser = null; "Given new Tokeniser".Context(() => tokeniser = new Tokeniser()); "with Initialise() called".Do(() => tokeniser.Initialize(reader)); "Initialise throws ArgumentNullException when sourceReader passed is null".Assert(() => Assert.Throws<ArgumentNullException>(() => tokeniser.Initialize(null))); }
public void TestReplace_StringWithTwoTokens_FoundInDictionary() { var dictionary = new Dictionary<string, string>(); dictionary.Add("tokenName1", "tokenised1"); dictionary.Add("tokenName2", "tokenised2"); var tokeniser = new Tokeniser(); TestReplace(tokeniser, dictionary, "some [%tokenName1%] text [%tokenName2%].", "some tokenised1 text tokenised2."); TestReplace(tokeniser, dictionary, "[%tokenName1%]", "tokenised1"); }
public void TestReplace_StringWithTwoIdenticalTokens_FoundInDictionary() { var dictionary = new Dictionary<string, string>(); dictionary.Add("tokenName", "tokenised"); var tokeniser = new Tokeniser(); TestReplace(tokeniser, dictionary, "some [%tokenName%] text [%tokenName%].", "some tokenised text tokenised."); TestReplace(tokeniser, dictionary, "some [%tokenName%] [%tokenName%].", "some tokenised tokenised."); TestReplace(tokeniser, dictionary, "some [%tokenName%][%tokenName%].", "some tokenisedtokenised."); TestReplace(tokeniser, dictionary, "[%tokenName%][%tokenName%]", "tokenisedtokenised"); }
public void Tokeniser_Tokenise_33090() { Tokeniser tokeniser = new Tokeniser(); string expr = "33090"; List<Token> expected = new List<Token> { new Token("Integer", "33090") }; IEnumerable<Token> tokens = tokeniser.Tokenise(expr); Assert.IsNotNull(tokens); Assert.IsTrue(AreTheSame(expr, tokens, expected)); }
public void Tokeniser_Tokenise_345dp000dp34565() { Tokeniser tokeniser = new Tokeniser(); string expr = "345.000.34565"; List<Token> expected = new List<Token> { new Token("Number", "345.000") , new Token("Number", ".34565") }; IEnumerable<Token> tokens = tokeniser.Tokenise(expr); Assert.IsNotNull(tokens); Assert.IsTrue(AreTheSame(expr, tokens, expected)); }
// from & in data internal override void Read(Tokeniser t, CharacterReader r) { char[] c = t.ConsumeCharacterReference(null, false); if (c == null) { t.Emit('&'); } else { t.Emit(c); } t.Transition(TokeniserState.Data); }
public void ParseTest() { string tf = "test.input"; ParsedTemplate pt = new ParsedTemplate ("test.input"); Tokeniser tk = new Tokeniser (tf, ParseSample1); DummyHost host = new DummyHost (); pt.Parse (host, tk); Assert.AreEqual (0, pt.Errors.Count); var content = new List<TemplateSegment> (pt.Content); var dirs = new List<Directive> (pt.Directives); Assert.AreEqual (1, dirs.Count); Assert.AreEqual (6, content.Count); Assert.AreEqual ("template", dirs[0].Name); Assert.AreEqual (1, dirs[0].Attributes.Count); Assert.AreEqual ("C#v3.5", dirs[0].Attributes["language"]); Assert.AreEqual (new Location (tf, 1, 1), dirs[0].TagStartLocation); Assert.AreEqual (new Location (tf, 1, 34), dirs[0].EndLocation); Assert.AreEqual ("Line One\r\nLine Two\r\n", content[0].Text); Assert.AreEqual ("\r\nfoo\r\n", content[1].Text); Assert.AreEqual ("Line Three ", content[2].Text); Assert.AreEqual (" bar ", content[3].Text); Assert.AreEqual ("\r\nLine Four\r\n", content[4].Text); Assert.AreEqual (" \r\nbaz \\#>\r\n", content[5].Text); Assert.AreEqual (SegmentType.Content, content[0].Type); Assert.AreEqual (SegmentType.Block, content[1].Type); Assert.AreEqual (SegmentType.Content, content[2].Type); Assert.AreEqual (SegmentType.Expression, content[3].Type); Assert.AreEqual (SegmentType.Content, content[4].Type); Assert.AreEqual (SegmentType.Helper, content[5].Type); Assert.AreEqual (new Location (tf, 4, 1), content[1].TagStartLocation); Assert.AreEqual (new Location (tf, 7, 12), content[3].TagStartLocation); Assert.AreEqual (new Location (tf, 9, 1), content[5].TagStartLocation); Assert.AreEqual (new Location (tf, 2, 1), content[0].StartLocation); Assert.AreEqual (new Location (tf, 4, 3), content[1].StartLocation); Assert.AreEqual (new Location (tf, 7, 1), content[2].StartLocation); Assert.AreEqual (new Location (tf, 7, 15), content[3].StartLocation); Assert.AreEqual (new Location (tf, 7, 22), content[4].StartLocation); Assert.AreEqual (new Location (tf, 9, 4), content[5].StartLocation); Assert.AreEqual (new Location (tf, 6, 3), content[1].EndLocation); Assert.AreEqual (new Location (tf, 7, 22), content[3].EndLocation); Assert.AreEqual (new Location (tf, 11, 3), content[5].EndLocation); }
public void Tokeniser_Tokenise_3dp004_4() { Tokeniser tokeniser = new Tokeniser(); string expr = " 3.004 4 "; List<Token> expected = new List<Token> { new Token("Whitespace", " ") , new Token("Number", "3.004") , new Token("Whitespace", " ") , new Token("Integer", "4") , new Token("Whitespace", " ") }; IEnumerable<Token> tokens = tokeniser.Tokenise(expr); Assert.IsNotNull(tokens); Assert.IsTrue(AreTheSame(expr, tokens, expected)); }
public void Tokeniser_Tokenise_equation_001() { Tokeniser tokeniser = new Tokeniser(); string expr = "(3^4)/(2*5.5)"; List<Token> expected = new List<Token> { new Token("Bracket", "(") , new Token("Integer", "3") , new Token("Operator", "^") , new Token("Integer", "4") , new Token("Bracket", ")") , new Token("Operator", "/") , new Token("Bracket", "(") , new Token("Integer", "2") , new Token("Operator", "*") , new Token("Decimal", "5.5") , new Token("Bracket", ")") }; IEnumerable<Token> tokens = tokeniser.Tokenise(expr); Assert.IsNotNull(tokens); Assert.IsTrue(AreTheSame(expr, tokens, expected)); }
public void Tokeniser_constructor() { Tokeniser tokeniser = new Tokeniser(); Assert.IsNotNull(tokeniser); }
public void Tokeniser_Tokenise_hello_World() { Tokeniser tokeniser = new Tokeniser(); string expr = " \"hello\" + \" World!\" "; List<Token> expected = new List<Token> { new Token("Whitespace", " ") , new Token("String", "hello") , new Token("Whitespace", " ") , new Token("Operator", "+") , new Token("Whitespace", " ") , new Token("String", " World!") , new Token("Whitespace", " ") }; IEnumerable<Token> tokens = tokeniser.Tokenise(expr); Assert.IsNotNull(tokens); Assert.IsTrue(AreTheSame(expr, tokens, expected)); }
public void Tokeniser_Tokenise_identifiers_001() { Tokeniser tokeniser = new Tokeniser(); string expr = "ase sd345 _this0ne"; List<Token> expected = new List<Token> { new Token("Identifier", "ase") , new Token("Whitespace", " ") , new Token("Identifier", "sd345a") , new Token("Whitespace", " ") , new Token("Identifier", "_this0ne") }; IEnumerable<Token> tokens = tokeniser.Tokenise(expr); Assert.IsNotNull(tokens); Assert.IsTrue(AreTheSame(expr, tokens, expected)); }
public void Tokeniser_Tokenise_identifiers_002() { Tokeniser tokeniser = new Tokeniser(); string expr = "as123 this345 true0986"; List<Token> expected = new List<Token> { new Token("Identifier", "as123") , new Token("Whitespace", " ") , new Token("Identifier", "this345") , new Token("Whitespace", " ") , new Token("Identifier", "true0987") }; IEnumerable<Token> tokens = tokeniser.Tokenise(expr); Assert.IsNotNull(tokens); Assert.IsTrue(AreTheSame(expr, tokens, expected)); }
public void TokenTest() { string tf = "test.input"; Tokeniser tk = new Tokeniser (tf, ParseSample1); //line 1 Assert.IsTrue (tk.Advance ()); Assert.AreEqual (new Location (tf, 1, 1), tk.Location); Assert.AreEqual (State.Content, tk.State); Assert.AreEqual ("", tk.Value); Assert.IsTrue (tk.Advance ()); Assert.AreEqual (State.Directive, tk.State); Assert.IsTrue (tk.Advance ()); Assert.AreEqual (new Location (tf, 1, 5), tk.Location); Assert.AreEqual (State.DirectiveName, tk.State); Assert.AreEqual ("template", tk.Value); Assert.IsTrue (tk.Advance ()); Assert.AreEqual (State.Directive, tk.State); Assert.IsTrue (tk.Advance ()); Assert.AreEqual (new Location (tf, 1, 14), tk.Location); Assert.AreEqual (State.DirectiveName, tk.State); Assert.AreEqual ("language", tk.Value); Assert.IsTrue (tk.Advance ()); Assert.AreEqual (State.Directive, tk.State); Assert.IsTrue (tk.Advance ()); Assert.AreEqual (State.DirectiveValue, tk.State); Assert.AreEqual (new Location (tf, 1, 23), tk.Location); Assert.AreEqual ("C#v3.5", tk.Value); Assert.IsTrue (tk.Advance ()); Assert.AreEqual (State.Directive, tk.State); //line 2, 3 Assert.IsTrue (tk.Advance ()); Assert.AreEqual (new Location (tf, 2, 1), tk.Location); Assert.AreEqual (State.Content, tk.State); Assert.AreEqual ("Line One\r\nLine Two\r\n", tk.Value); //line 4, 5, 6 Assert.IsTrue (tk.Advance ()); Assert.AreEqual (new Location (tf, 4, 1), tk.TagStartLocation); Assert.AreEqual (new Location (tf, 4, 3), tk.Location); Assert.AreEqual (new Location (tf, 6, 3), tk.TagEndLocation); Assert.AreEqual (State.Block, tk.State); Assert.AreEqual ("\r\nfoo\r\n", tk.Value); //line 7 Assert.IsTrue (tk.Advance ()); Assert.AreEqual (new Location (tf, 7, 1), tk.Location); Assert.AreEqual (State.Content, tk.State); Assert.AreEqual ("Line Three ", tk.Value); Assert.IsTrue (tk.Advance ()); Assert.AreEqual (new Location (tf, 7, 12), tk.TagStartLocation); Assert.AreEqual (new Location (tf, 7, 15), tk.Location); Assert.AreEqual (new Location (tf, 7, 22), tk.TagEndLocation); Assert.AreEqual (State.Expression, tk.State); Assert.AreEqual (" bar ", tk.Value); //line 8 Assert.IsTrue (tk.Advance ()); Assert.AreEqual (new Location (tf, 7, 22), tk.Location); Assert.AreEqual (State.Content, tk.State); Assert.AreEqual ("\r\nLine Four\r\n", tk.Value); //line 9, 10, 11 Assert.IsTrue (tk.Advance ()); Assert.AreEqual (new Location (tf, 9, 1), tk.TagStartLocation); Assert.AreEqual (new Location (tf, 9, 4), tk.Location); Assert.AreEqual (new Location (tf, 11, 3), tk.TagEndLocation); Assert.AreEqual (State.Helper, tk.State); Assert.AreEqual (" \r\nbaz \\#>\r\n", tk.Value); //line 12 Assert.IsTrue (tk.Advance ()); Assert.AreEqual (new Location (tf, 12, 1), tk.Location); Assert.AreEqual (State.Content, tk.State); Assert.AreEqual ("", tk.Value); //EOF Assert.IsFalse (tk.Advance ()); Assert.AreEqual (new Location (tf, 12, 1), tk.Location); Assert.AreEqual (State.EOF, tk.State); }
public void Tokeniser_Tokenise_one_space() { Tokeniser tokeniser = new Tokeniser(); string expr = " "; List<Token> expected = new List<Token> { new Token("Whitespace", " ") }; IEnumerable<Token> tokens = tokeniser.Tokenise(expr); Assert.IsNotNull(tokens); Assert.AreEqual<int>(1, tokens.Count()); Assert.IsTrue(AreTheSame(expr, tokens, expected)); }
private void MyInit() { ISimilarity editdistance = new Leven(); getSimilarity = new Similarity(editdistance.GetSimilarity); //ISimilarity lexical=new LexicalSimilarity() ; //getSimilarity=new Similarity(lexical.GetSimilarity) ; Tokeniser tokeniser = new Tokeniser(); _leftTokens = tokeniser.Partition(_lString); _rightTokens = tokeniser.Partition(_rString); if (_leftTokens.Length > _rightTokens.Length) { string[] tmp = _leftTokens; _leftTokens = _rightTokens; _rightTokens = tmp; string s = _lString; _lString = _rString; _rString = s; } leftLen = _leftTokens.Length - 1; rightLen = _rightTokens.Length - 1; Initialize(); }
public void Tokeniser_Tokenise_simple_addition_with_whitespace() { Tokeniser tokeniser = new Tokeniser(); string expr = " 3 + 4 "; List<Token> expected = new List<Token> { new Token("Whitespace", " ") , new Token("Integer", "3") , new Token("Whitespace", " ") , new Token("Operator", "+") , new Token("Whitespace", " ") , new Token("Integer", "4") , new Token("Whitespace", " ") }; IEnumerable<Token> tokens = tokeniser.Tokenise(expr); Assert.IsNotNull(tokens); Assert.IsTrue(AreTheSame(expr, tokens, expected)); }
public void Tokeniser_Tokenise_simple_two_spaces() { Tokeniser tokeniser = new Tokeniser(); string expr = " "; List<Token> expected = new List<Token> { new Token("Whitespace", " ") , new Token("Whitespace", " ") }; IEnumerable<Token> tokens = tokeniser.Tokenise(expr); Assert.IsNotNull(tokens); Assert.IsTrue(AreTheSame(expr, tokens, expected)); }
public void Tokeniser_Tokenise_space_330_comma_90_comma_12() { Tokeniser tokeniser = new Tokeniser(); string expr = " 330,90,12 "; List<Token> expected = new List<Token> { new Token("Whitespace", " ") , new Token("Integer", "330") , new Token("Comma", ",") , new Token("Integer", "90") , new Token("Comma", ",") , new Token("Integer", "12") , new Token("Whitespace", " ") }; IEnumerable<Token> tokens = tokeniser.Tokenise(expr); Assert.IsNotNull(tokens); Assert.IsTrue(AreTheSame(expr, tokens, expected)); }
public void Tokeniser_Tokenise_two_integers() { Tokeniser tokeniser = new Tokeniser(); string expr = " 33090 04023 "; List<Token> expected = new List<Token> { new Token("Whitespace", " ") , new Token("Integer", "33090") , new Token("Whitespace", " ") , new Token("Integer", "04023") , new Token("Whitespace", " ") }; IEnumerable<Token> tokens = tokeniser.Tokenise(expr); Assert.IsNotNull(tokens); Assert.IsTrue(AreTheSame(expr, tokens, expected)); }
public void Tokeniser_Tokenise_empty_string() { Tokeniser tokeniser = new Tokeniser(); IEnumerable<Token> tokens = tokeniser.Tokenise(""); Assert.IsNotNull(tokens); Assert.AreEqual<int>(0, tokens.Count()); }
/// <summary> /// Parses the specified template text. /// </summary> /// <param name="templateText">The template text.</param> private void Parse(string templateText) { //var filePath = Path.GetFullPath(Path.Combine(templateCodePath, TemplateFileName)); var tokeniser = new Tokeniser(TemplateFileName, templateText); AddCode(tokeniser.Location, ""); bool skip = false; while ((skip || tokeniser.Advance()) && tokeniser.State != State.EOF) { skip = false; switch (tokeniser.State) { case State.Block: if (!String.IsNullOrEmpty(tokeniser.Value)) AddDoTemplateCode(tokeniser.Location, tokeniser.Value); break; case State.Content: if (!String.IsNullOrEmpty(tokeniser.Value)) AddContent(tokeniser.Location, tokeniser.Value); break; case State.Expression: if (!String.IsNullOrEmpty(tokeniser.Value)) AddExpression(tokeniser.Location, tokeniser.Value); break; case State.Helper: _isTemplateClassCode = true; if (!String.IsNullOrEmpty(tokeniser.Value)) AddDoTemplateClassCode(tokeniser.Location, tokeniser.Value); break; case State.Directive: Directive directive = null; string attName = null; while (!skip && tokeniser.Advance()) { switch (tokeniser.State) { case State.DirectiveName: if (directive == null) directive = new Directive {Name = tokeniser.Value.ToLower()}; else attName = tokeniser.Value; break; case State.DirectiveValue: if (attName != null && directive != null) directive.Attributes.Add(attName.ToLower(), tokeniser.Value); attName = null; break; case State.Directive: //if (directive != null) // directive.EndLocation = tokeniser.TagEndLocation; break; default: skip = true; break; } } if (directive != null) { if (directive.Name == "include") { string includeFile = directive.Attributes["file"]; if (OnInclude == null) throw new InvalidOperationException("Include file found. OnInclude event must be implemented"); var includeArgs = new TemplateIncludeArgs() {IncludeName = includeFile}; OnInclude(this, includeArgs); Parse(includeArgs.Text ?? ""); } _directives.Add(directive); } break; default: throw new InvalidOperationException(); } } }