public async Task SplitIntoGroups() { OcrDocument testDocument = new OcrDocument("test") { OcrText = "I solemnly swear\r\nI am up to no good." }; _viewModel.SelectedDocument = testDocument; // 1 Word at a time _viewModel.NumberOfGroups = 1; List <string> expected1 = new List <string>() { "I", "solemnly", "swear", "I", "am", "up", "to", "no", "good." }; List <string> result1 = await _viewModel.SplitIntoGroups(); CollectionAssert.AreEqual(expected1, result1); // 2 Words at a time _viewModel.NumberOfGroups = 2; List <string> expected2 = new List <string>() { "I solemnly", "swear I", "am up", "to no", "good." }; List <string> result2 = await _viewModel.SplitIntoGroups(); CollectionAssert.AreEqual(expected2, result2); // 3 Words at a time _viewModel.NumberOfGroups = 3; List <string> expected3 = new List <string>() { "I solemnly swear", "I am up", "to no good." }; List <string> result3 = await _viewModel.SplitIntoGroups(); CollectionAssert.AreEqual(expected3, result3); // 4 Words at a time _viewModel.NumberOfGroups = 4; List <string> expected4 = new List <string>() { "I solemnly swear I", "am up to no", "good." }; List <string> result4 = await _viewModel.SplitIntoGroups(); CollectionAssert.AreEqual(expected4, result4); // 5 Words at a time _viewModel.NumberOfGroups = 5; List <string> expected5 = new List <string>() { "I solemnly swear I am", "up to no good." }; List <string> result5 = await _viewModel.SplitIntoGroups(); CollectionAssert.AreEqual(expected5, result5); }
/// <summary> /// Reads Text from images /// </summary> /// <param name="name">Name of the OCR Document</param> /// <param name="images">List of bitmaps to be read</param> /// <param name="lang">language of the Text within the pictures</param> /// <returns></returns> public static OcrDocument ImageToText(string name, List <Bitmap> images, string lang = "deu") { var ocrDocument = new OcrDocument(name); var threads = new List <Thread>(); for (int i = 0; i < images.Count - 1; i++) { var thread = new Thread( () => { using (var engine = new TesseractEngine(@"./tessdata", lang, EngineMode.Default)) { using (var img = PixConverter.ToPix(images[i])) { using (var page = engine.Process(img)) { ocrDocument.Pages.Add(new OcrDocument.Page() { Number = i + 1, Content = page.GetText() }); } } } }); thread.Start(); threads.Add(thread); } var running = true; while (running) { running = false; foreach (var thread in threads) { if (thread.ThreadState == ThreadState.Running) { running = true; } } } return(ocrDocument); }
public async Task SplitIntoSentences() { string testingString = "Did you ever hear the tragedy of Darth Plagueis the Wise? " + "\r\nI thought not. It's not a story the Jedi would tell you. It's a Sith legend. " + "\r\nDarth Plagueis was a Dark Lord of the Sith, so powerful and so wise he could use the Force to influence the midichlorians to create life..."; OcrDocument testDocument = new OcrDocument("test") { OcrText = testingString }; _viewModel.SelectedDocument = testDocument; // 1 Sentence at a time _viewModel.NumberOfSentences = 1; string[] expected1 = { "Did you ever hear the tragedy of Darth Plagueis the Wise?", "I thought not.", "It's not a story the Jedi would tell you.", "It's a Sith legend.", "Darth Plagueis was a Dark " + "Lord of the Sith, so powerful and so wise he could use the Force to influence the midichlorians to create life…" }; List <string> result1 = await _viewModel.SplitIntoSentences(); CollectionAssert.AreEqual(expected1, result1); // 2 Sentences at a time _viewModel.NumberOfSentences = 2; string[] expected2 = { "Did you ever hear the tragedy of Darth Plagueis the Wise? I thought not.", "It's not a story the Jedi would tell you. It's a Sith legend.", "Darth Plagueis was a Dark " + "Lord of the Sith, so powerful and so wise he could use the Force to influence the midichlorians to create life…" }; List <string> result2 = await _viewModel.SplitIntoSentences(); CollectionAssert.AreEqual(expected2, result2); // 3 Sentences at a time _viewModel.NumberOfSentences = 3; string[] expected3 = { "Did you ever hear the tragedy of Darth Plagueis the Wise? I thought not. " + "It's not a story the Jedi would tell you.", "It's a Sith legend. Darth Plagueis was a Dark " + "Lord of the Sith, so powerful and so wise he could use the Force to influence the midichlorians to create life…" }; List <string> result3 = await _viewModel.SplitIntoSentences(); CollectionAssert.AreEqual(expected3, result3); }
public void LibrarySaveAndLoadTest() { OcrDocument doc1 = new OcrDocument("g") { FileName = "Hello!", IsBusy = false, IsEditingFileName = false, OcrText = "g" }; OcrDocument doc2 = new OcrDocument("C:\\Users\\dan\\Pictures\\GDB.PNG") { FileName = "GDB.PNG", OcrText = "00081 Oxbfffea68 --> 0x342\r\n\r\n00121 0xbfffea6c --> 0xbfffed24 --> 0xbfffef2b (\"/h0me/seed/Desktop/exploit\")\r\n00161 0xbfffea70 --> Oxb7fe3d39 (<check_match+9>: add ebx,0xlb2c7)\r\n00201 Oxbfffea74 --> Oxb7bf73d0 --> 0X94b90ca0\r\n\r\n00241 0xbfffea78 --> 0x53d\r\n\r\n00281 0xbfffea7c --> 0xb7ffd5b0 --> Oxb7bf3000 --> 0x464c457f\r\n\r\n[ ------------------------------------------------------------------------------ ]\r\n\r\nLegend: code, data, rodata, value\r\n\r\nBreakpoint 1, main (argc=0x1, argv=0xbfffed24) at exploit.c:25\r\n25 memset(&buffer, 0x90, 500);\r\n\r\ngdb-peda$ p &buffer\r\n\r\n$1 = (char (*)[500]) Oxbfffea78\r\n\r\ngdb-peda$ p $ebp\r\n\r\n$2 = (void *) 0xbfffec78\r\n\r\ngdb-peda$ p 0xbfffec78 .. 0xbfffea78\r\n\r\n$3 = OXZOO", IsBusy = false, IsEditingFileName = false }; OcrDocument doc3 = new OcrDocument("C:\\Users\\dan\\Pictures\\GDB2.PNG") { FileName = "GDB2.PNG", OcrText = "(gdb) info frame\r\nStack level. 0, frame at 0xbfffeae0:\r\n\r\neip = 0x80484c1 in bof (stack.c:11); saved eip = 0x804852e\r\ncalled by frame at OxbfffedIO\r\n\r\nsource language c.\r\nArglist at Oxbfffead8, args:\r\n\r\nstr=0xbfffeaf8 \"1N300Ph//shh/bin\\211N343PS\\211.'Nj\"\r\n\r\nLocals at Oxbfffead8, Previous frame's sp is Oxbfffeae0\r\nSaved registers:\r\n\r\nebp at Oxbfffead8, eip at Oxbfffeadc", IsBusy = false, IsEditingFileName = false }; List <OcrDocument> testLibrary = new List <OcrDocument> { doc1, doc2, doc3 }; string serializedTestLibrary = JsonConvert.SerializeObject(testLibrary, Formatting.Indented); Assert.AreEqual(File.ReadAllText(@"TestFiles\library.json"), serializedTestLibrary); List <OcrDocument> deserializedLibrary = JsonConvert.DeserializeObject <List <OcrDocument> >(File.ReadAllText(@"TestFiles\library.json")); Assert.IsNotNull(deserializedLibrary); Assert.AreEqual(testLibrary.Count, deserializedLibrary.Count); }