/// <summary> /// Highlights the tokens in the given lines in the given document. /// </summary> /// <param name="document">The document being highlighted.</param> /// <param name="lines">The lines to highlight.</param> public void MarkTokens(IDocument document, List <LineSegment> lines) { Dictionary <Int32, List <IToken> > dicTokens = new Dictionary <int, List <IToken> >(); AntlrLexerBase lex = LexerFactory.CreateLexer(document.TextContent, new ErrorTracker()); for (IToken tknToken = lex.NextToken(); tknToken.Type > -1; tknToken = lex.NextToken()) { if (!dicTokens.ContainsKey(tknToken.Line - 1)) { dicTokens[tknToken.Line - 1] = new List <IToken>(); } dicTokens[tknToken.Line - 1].Add(tknToken); } foreach (LineSegment lsgLine in lines) { lsgLine.Words = new List <TextWord>(); if (dicTokens.ContainsKey(lsgLine.LineNumber)) { foreach (IToken tknToken in dicTokens[lsgLine.LineNumber]) { HandleToken(document, tknToken); } } document.RequestUpdate(new TextAreaUpdate(TextAreaUpdateType.SingleLine, lsgLine.LineNumber)); } }
public static void Run() { // Create a lexer returning type object var lexer = LexerFactory <object> .Configure(configurator => { // Returns an integer for each number it finds configurator.Token(@"\d+", f => int.Parse(f)); // Returns a string for each string found configurator.Token(@"[a-zA-Z]+", f => f); // Ignores all white space configurator.Ignore(@"\s+"); }); // Run the lexer string input = "10 piglets 5 boars 1 big sow"; foreach (var token in lexer.Tokenize(input)) { if (token.Item2 is int) { Console.WriteLine("Lexer found an integer {0}", token.Item2); } else { Console.WriteLine("Lexer found a string {0}", token.Item2); } } }
/// <summary> /// Removes all the unnecessary characters in files with the correct extensions. /// </summary> /// <param name="correctFiles">A collection containing file names with the correct extensions.</param> /// <param name="clearedFilesPath">The path where the files with the correct extensions are located.</param> /// <param name="taskFiles">A collection containing files of a given task.</param> /// <param name="task">The task for which the files were loaded.</param> /// <param name="author">The author of the correct files that are being processed.</param> public void ProcessCorrectFiles(List <string> correctFiles, string clearedFilesPath, List <FileEntry> taskFiles, Model.Models.Task task, string author) { foreach (var cf in correctFiles) { string path = clearedFilesPath + Path.GetFileName(cf); try { File.Copy(cf, path); } catch (Exception ex) { File.Copy(cf, path + "(1)"); } string fileContent = File.ReadAllText(path); string hashBeforePreprocessing = CalculateHash(fileContent); long fileSize = new System.IO.FileInfo(path).Length; int noOfLines = File.ReadLines(cf).Count(); string fileExtension = DetermineFileExtension(Path.GetExtension(Path.GetFileName(cf))); int noOfEmptyLines = Regex.Matches(fileContent, "\n\n").Count + Regex.Matches(fileContent, "\r\r").Count + Regex.Matches(fileContent, "\r\n\r\n").Count; noOfLineComments = 0; noOfBlockComments = 0; fileContent = RemoveComments(path, fileContent); fileContent = RemoveUnnecessaryCharsFromFile(fileContent); ILexicalAnalyzer lexAnalyzer = LexerFactory.GetLexicalAnalyzer(fileExtension); string fileContentAfterLexer = lexAnalyzer.Tokenize(fileContent); fileContent = PrepareInputForAnalysis(fileContent); fileContentAfterLexer = PrepareInputForAnalysis(fileContentAfterLexer); string hashAfterPreprocessing = CalculateHash(fileContent); taskFiles.Add(new FileEntry() { FileName = Path.GetFileName(cf), FileAuthor = author, FileCheckSumBeforePreprocessing = hashBeforePreprocessing, FileCheckSumAfterPreprocessing = hashAfterPreprocessing, FileSize = fileSize, FileNoOfLines = noOfLines, FileNoOfEmptyLines = noOfEmptyLines, FileNoOfLineComments = noOfLineComments, FileNoOfBlockComments = noOfBlockComments, FileContent = fileContent, FileContentAfterLexAnalysis = fileContentAfterLexer, FileExtension = fileExtension, TaskId = task.Id }); } }
private IEnumerable <ILexer <string> > CreateLexers(string regEx) { Action <ILexerConfigurator <string> > tokenAction = c => c.Token(regEx, f => f); return(GetAllConfigurationOptions().Select(f => LexerFactory <string> .Configure( c => { f(c); tokenAction(c); }))); }
public void TestUnicodeDfa() { var lexer = LexerFactory <string> .Configure(c => { c.Token("خنزير صغير", f => "arabic"); c.Token("nasse", f => "swedish"); c.Ignore(" "); c.Runtime = LexerRuntime.Dfa; }); var lexerInstance = lexer.Begin("خنزير صغير" + " nasse"); Assert.AreEqual("arabic", lexerInstance.Next().Item2); Assert.AreEqual("swedish", lexerInstance.Next().Item2); }
public void TestLexerConstruction() { ILexer <string> lexer = LexerFactory <string> .Configure(c => { c.Token("a+", f => "A+"); c.Token("abb", f => "ABB"); c.Token("a*b+", f => "A*B+"); }); var li = lexer.Begin(new StringReader("abb")); Tuple <int, string> tuple = li.Next(); Assert.AreEqual(1, tuple.Item1); Assert.AreEqual("ABB", tuple.Item2); }
public void TestCommentRegex() { var lexer = LexerFactory <string> .Configure( f => { f.Token(@";[^\n]*\n", a => a); f.Token("nextLine", a => a + "%"); }); var lexerInstance = lexer.Begin(@"; this is a comment nextLine"); Assert.AreEqual("; this is a comment\r\n", lexerInstance.Next().Item2); Assert.AreEqual("nextLine%", lexerInstance.Next().Item2); }
public void TestLexDigits() { ILexer <int> lexer = LexerFactory <int> .Configure(c => { c.Token("\\d+", int.Parse); c.Ignore(" *"); }); var li = lexer.Begin(new StringReader(" 123 42")); Tuple <int, int> tuple = li.Next(); Assert.AreEqual(123, tuple.Item2); tuple = li.Next(); Assert.AreEqual(42, tuple.Item2); }
public void TestMinimizationWontMessUpLexing() { var lexer = LexerFactory <string> .Configure(c => { c.MinimizeDfa = true; c.Token("aa", f => "aa"); c.Token("a+", f => "a+"); c.Ignore(" "); }); var li = lexer.Begin("aa aaaaaaa aa aaaa aa"); Assert.AreEqual("aa", li.Next().Item2); Assert.AreEqual("a+", li.Next().Item2); Assert.AreEqual("aa", li.Next().Item2); Assert.AreEqual("a+", li.Next().Item2); Assert.AreEqual("aa", li.Next().Item2); }
public void TestPerformanceWhenHandlingVeryLargeCharRanges() { // foreach (var runtime in Enum.GetValues(typeof(LexerRuntime))) { // Console.WriteLine(runtime.ToString()); // var ticks = System.DateTime.Now.Ticks; var lexer = LexerFactory <int> .Configure(configurator => { configurator.Runtime = LexerRuntime.Tabular; configurator.Token("\\w[0-9]", null); configurator.Token("\\d\\D\\W", null); configurator.Token("abcdefghijklmnopqrstuvxyz", null); configurator.Token("01234567890&%#", null); }); // Console.WriteLine(System.DateTime.Now.Ticks - ticks); } }
public void TestGetsEndOfInputTokenIfIgnoredStuffAtEnd() { ILexer <string> lexer = LexerFactory <string> .Configure(c => { c.Token("a+", f => f); c.Ignore("b+"); c.EndOfInputTokenNumber = -1; }); var li = lexer.Begin("bbbbbbaaabbbaaaaabbbb"); Tuple <int, string> lexVal = li.Next(); Assert.AreEqual(0, lexVal.Item1); Assert.AreEqual("aaa", lexVal.Item2); lexVal = li.Next(); Assert.AreEqual(0, lexVal.Item1); Assert.AreEqual("aaaaa", lexVal.Item2); lexVal = li.Next(); Assert.AreEqual(-1, lexVal.Item1); Assert.AreEqual(null, lexVal.Item2); }
public static void Run() { int positionX = 0; int positionY = 0; var ticks = System.DateTime.Now.Ticks; var lexer = LexerFactory <string> .Configure(configurator => { configurator.Token(@"(up|north)", s => { positionY--; return("Moved north"); }); configurator.Token(@"(down|south)", s => { positionY++; return("Moved south"); }); configurator.Token(@"(right|east)", s => { positionX++; return("Moved east"); }); configurator.Token(@"(left|west)", s => { positionX--; return("Moved west"); }); configurator.Ignore(@"\s+"); }); foreach (var token in lexer.Tokenize("up down left right right north west left north up")) { Console.WriteLine("{0} Current position is {1},{2}", token.Item2, positionX, positionY); } Console.WriteLine(System.DateTime.Now.Ticks - ticks); }
public void TestLexLargeText() { const string text = "Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy " + "nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat. Ut wisi enim ad " + "minim veniam, quis nostrud exerci tation ullamcorper suscipit lobortis nisl ut aliquip " + "ex ea commodo consequat. Duis autem vel eum iriure dolor in hendrerit in vulputate velit " + "esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et " + "accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue " + "duis dolore te feugait nulla facilisi. Nam liber tempor cum soluta nobis eleifend " + "option congue nihil imperdiet doming id quod mazim placerat facer possim assum. " + "Typi non habent claritatem insitam; est usus legentis in iis qui facit eorum " + "claritatem. Investigationes demonstraverunt lectores legere me lius quod ii " + "legunt saepius. Claritas est etiam processus dynamicus, qui sequitur mutationem " + "consuetudium lectorum. Mirum est notare quam littera gothica, quam nunc putamus parum claram, " + "anteposuerit litterarum formas humanitatis per seacula quarta decima et quinta decima. " + "Eodem modo typi, qui nunc nobis videntur parum clari, fiant sollemnes in futurum."; int numWords = 0; int numPunctuation = 0; var lexer = LexerFactory <int> .Configure(c => { c.Token("\\w+", s => ++ numWords); c.Token("[.,]", s => ++ numPunctuation); c.Ignore("\\s+"); }); int numTokens = 0; foreach (var token in lexer.Tokenize(text)) { numTokens++; } Assert.AreEqual(172, numWords); Assert.AreEqual(18, numPunctuation); Assert.AreEqual(190, numTokens); Console.WriteLine("asas"); }
public void TestLexErrorOnThirdLine() { ILexer <string> lexer = LexerFactory <string> .Configure(c => { c.Token("a+", s => s); c.Ignore("( |\\n)+"); }); var li = lexer.Begin("aaa aa \n aaa aa\n error \n aaaa"); try { for (int i = 0; i < 10; ++i) { li.Next(); } Assert.Fail(); } catch (LexerException e) { Assert.AreEqual(3, e.LineNumber); Assert.AreEqual(" ", e.LineContents); } }
public LexerFactoryTests() { factory = new LexerFactory(projectLogger.Object, mapLogger.Object); }
public ILexer <T> CreateLexer() { // User wants a default lexer, great. Use the lexer from grammar factory // to fix him up return(LexerFactory <T> .ConfigureFromGrammar(this, LexerSettings)); }
public IDocumentParser Create() { var lexer = LexerFactory.Create(); return(new DocumentParser(lexer, DocumentManager)); }