private async Task TokenizationAsync(FileHelper.Config config, List <FileHelper.FileProcess> fp) { var responseString = ""; int?ProjectID = null; int?CategoryForWords = null; int?TypeForWords = null; var ExampleCategory = new List <ExampleCategory>(); using (var db = new MLEEntities()) { var project = new Project(); project.Name = config.Project; project.PerPage = config.PerPage; project.Description = "Automatic upload (ZIP)"; project.DateCreated = DateTime.Now; project.Start_Date = DateTime.Now; project.StatusId = 3; project.IsActive = true; db.Project.Add(project); db.SaveChanges(); ProjectID = project.Id; ProcessCategory(db, config.LabelingScheme.ForWords, ref TypeForWords, ref CategoryForWords); foreach (var item in config.LabelingScheme.ForExample) { int?TypeForExample = 0, CategoryForExample = 0; ProcessCategory(db, item, ref TypeForExample, ref CategoryForExample); ExampleCategory.Add(new DB.ExampleCategory { TypeId = TypeForExample, CategoryId = CategoryForExample }); } } var counter = 0; var list = fp.Where(x => x.Name != "config.txt").OrderBy(x => x.Name.Length).ThenBy(c => c.Name).ToList(); foreach (var e in list) { counter++; var fname = e.Name; var file = fp.FirstOrDefault(x => x.Name == fname); //var path = Server.MapPath("/DataImport/export/" + e.FileName); if (file != null) { var content = POSTOptions(file.Text, false); var response = await client.PostAsync("http://tasservice.s11.novenaweb.info:7777/rest/break-to-sentences", content); //var response = await client.PostAsync("http://tasservice.s11.novenaweb.info:7777/rest/lemmatizes-sentence", content); responseString = await response.Content.ReadAsStringAsync(); if (responseString != "") { using (var db = new MLEEntities()) { var fileIsAlreadyInDB = db.Example.Where(x => x.FileName == fname && x.ProjectId == ProjectID).FirstOrDefault() != null ? true : false; if (!fileIsAlreadyInDB) { var er = JsonConvert.DeserializeObject <ExampleResponse>(responseString); var example = new Example { FileName = fname, //Content = t, Description = "Automatic upload (ZIP)", DateCreated = DateTime.Now, StatusId = 3, // U tijeku ProjectId = ProjectID, OrdinalNumber = counter, // Left sidebar TypeId = TypeForWords, CategoryId = CategoryForWords, }; db.Example.Attach(example); db.Example.Add(example); db.SaveChanges(); // Right sidebar - Fill Example Categories foreach (var item in ExampleCategory) { var ec = new ExampleCategory(); ec.ExampleId = example.Id; ec.CategoryId = item.CategoryId; ec.TypeId = item.TypeId; db.ExampleCategory.Add(ec); db.SaveChanges(); } string t = ""; int startIndex = 0; int endIndex = 0; for (int i = 0; i < er.BreakToSentencesRESTResult.Count(); i++) { t += "<span id='Content_" + example.Id + "_" + (i + 1) + "'>"; var doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(er.BreakToSentencesRESTResult[i]); var nodes = doc.DocumentNode.SelectNodes("//span"); var c = POSTOptions(er.BreakToSentencesRESTResult[i], nodes.Count > 0 ? true : false); var r = await client.PostAsync("http://tasservice.s11.novenaweb.info:7777/rest/tokenize-sentence", c); var rs = await r.Content.ReadAsStringAsync(); var sr = JsonConvert.DeserializeObject <SentenceResponse>(rs); for (int j = 0; j < sr.TokenizeSentenceRESTResult.Count(); j++) { var exampleID = example.Id; var sentenceID = i + 1; var entityID = j + 1; var spaceBefore = "<span> </span>"; var spaceAfter = ""; var regex = new Regex(@"[^\w\s]"); int addon = 0; if (j == 0) { spaceBefore = ""; addon = 1; } if (regex.Matches(sr.TokenizeSentenceRESTResult[j]).Count == sr.TokenizeSentenceRESTResult[j].Length) { spaceBefore = ""; } if (j == sr.TokenizeSentenceRESTResult.Count() - 1) { spaceAfter = "<span> </span>"; } endIndex = startIndex + sr.TokenizeSentenceRESTResult[j].Length; var current_text = sr.TokenizeSentenceRESTResult[j]; var htmldoc = new HtmlAgilityPack.HtmlDocument(); htmldoc.LoadHtml(sr.TokenizeSentenceRESTResult[j]); var node = htmldoc.DocumentNode.SelectSingleNode("//span"); if (node != null) { current_text = node.InnerHtml; var _class = node.GetClasses().First(); //CategoryForWords var sc = db.Subcategory.FirstOrDefault(x => x.CategoryId == CategoryForWords && x.Name == _class); if (sc != null) { var m = new Marked() { ExampleId = exampleID, SentenceId = sentenceID, EntityId = entityID, SubcategoryId = sc.Id, UserId = null, Text = "" }; db.Marked.Add(m); db.SaveChanges(); } } t += spaceBefore + "<span id='Content_" + exampleID + "_" + sentenceID + "_" + entityID + "' data-startIndex='" + startIndex + "' data-endIndex='" + endIndex + "'>" + current_text + "</span>" + spaceAfter; var sb = spaceBefore == "<span> </span>" ? 1 : 0; var sa = spaceAfter == "<span> </span>" ? 1 : 0; // next start index startIndex = endIndex + sb + sa + addon; } t += "</span>"; } db.Example.Attach(example); example.Content = t; db.SaveChanges(); UploadSuccessful++; } else { UploadExists++; } } } else { UploadError++; } } else { UploadError++; } } }
private async void BreakToSentenceV2Async(FileHelper.Config conf, List <FileHelper.FileProcess> fp) { await TokenizationAsync(conf, fp); }