/// <summary> /// make html text /// </summary> /// <param name="verses"></param> /// <returns></returns> private static string PrepareHtmlText(List <GanjoorVerse> verses) { string htmlText = ""; int coupletIndex = 0; for (int vIndex = 0; vIndex < verses.Count; vIndex++) { GanjoorVerse v = verses[vIndex]; if (v.VersePosition == VersePosition.CenteredVerse1) { coupletIndex++; if (((vIndex + 1) < verses.Count) && (verses[vIndex + 1].VersePosition == VersePosition.CenteredVerse2)) { htmlText += $"<div class=\"b2\" id=\"bn{coupletIndex}\"><p>{v.Text}</p>{Environment.NewLine}"; } else { htmlText += $"<div class=\"b2\" id=\"bn{coupletIndex}\"><p>{v.Text}</p></div>{Environment.NewLine}"; } } else if (v.VersePosition == VersePosition.CenteredVerse2) { htmlText += $"<p>{v.Text}</p></div>{Environment.NewLine}"; } else if (v.VersePosition == VersePosition.Right) { coupletIndex++; htmlText += $"<div class=\"b\" id=\"bn{coupletIndex}\"><div class=\"m1\"><p>{v.Text}</p></div>{Environment.NewLine}"; } else if (v.VersePosition == VersePosition.Left) { htmlText += $"<div class=\"m2\"><p>{v.Text}</p></div></div>{Environment.NewLine}"; } else if (v.VersePosition == VersePosition.Comment) { htmlText += $"<div class=\"c\"><p>{v.Text}</p></div>{Environment.NewLine}"; } else if (v.VersePosition == VersePosition.Paragraph || v.VersePosition == VersePosition.Single) { coupletIndex++; string[] lines = v.Text.Split(new char[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries); string cssClass = v.VersePosition == VersePosition.Paragraph ? "n" : "l"; if (lines.Length != 0) { if (v.Text.Length / lines.Length < 150) { htmlText += $"<div class=\"{cssClass}\" id=\"bn{coupletIndex}\"><p>{v.Text.Replace("\r\n", " ")}</p></div>{Environment.NewLine}"; } else { foreach (string line in lines) { htmlText += $"<div class=\"{cssClass}\" id=\"bn{coupletIndex}\"><p>{line}</p></div>{Environment.NewLine}"; } } } else { if (string.IsNullOrEmpty(v.Text)) { htmlText += $"<div class=\"{cssClass}\" id=\"bn{coupletIndex}\"><p> </p></div>{Environment.NewLine}";//empty line! } else { htmlText += $"<div class=\"{cssClass}\" id=\"bn{coupletIndex}\"><p>{v.Text}</p></div>{Environment.NewLine}";//not brave enough to ignore it! } } } } return(htmlText.Trim()); }
private List <GanjoorVerse> _extractVersesFromPoemHtmlText(int poemId, string poemtext) { List <GanjoorVerse> verses = new List <GanjoorVerse>(); //this spagetti code has been imported from my internal utilities: while (poemtext.IndexOf("<a href") != -1) { int ahrefStart = poemtext.IndexOf("<a href"); string part1 = poemtext.Substring(0, ahrefStart); string part2 = poemtext.Substring(poemtext.IndexOf(">", ahrefStart) + 1, poemtext.IndexOf("</a>") - (poemtext.IndexOf(">", ahrefStart) + 1)); poemtext = part1 + part2 + poemtext.Substring(poemtext.IndexOf("</a>") + 4, poemtext.Length - (poemtext.IndexOf("</a>") + 4)); } while (poemtext.IndexOf("<acronym") != -1) { int acroStart = poemtext.IndexOf("<acronym"); string part1 = poemtext.Substring(0, acroStart); string part2; try { part2 = poemtext.Substring(poemtext.IndexOf(">", acroStart) + 1, poemtext.IndexOf("</acronym>") - (poemtext.IndexOf(">", acroStart) + 1)); poemtext = part1 + part2 + poemtext.Substring(poemtext.IndexOf("</acronym>") + 10, poemtext.Length - (poemtext.IndexOf("</acronym>") + 10)); } catch { part2 = poemtext.Substring(poemtext.IndexOf(">", acroStart) + 1, poemtext.IndexOf("<acronym>") - (poemtext.IndexOf(">", acroStart) + 1)); poemtext = part1 + part2 + poemtext.Substring(poemtext.IndexOf("<acronym>") + 10, poemtext.Length - (poemtext.IndexOf("<acronym>") + 10)); } } while (poemtext.IndexOf("<sup>") != -1) { string part1 = poemtext.Substring(0, poemtext.IndexOf("<sup>")); try { poemtext = part1 + poemtext.Substring(poemtext.IndexOf("</sup>") + 6, poemtext.Length - (poemtext.IndexOf("</sup>") + 6)); poemtext = poemtext.Replace(" ", " "); } catch { throw new Exception($"poemtext.IndexOf(\"<sup>\": {poemtext}"); } } while (poemtext.IndexOf("id=\"bn") != -1) { int idxbn1 = poemtext.IndexOf(" id=\"bn"); int idxbn2 = poemtext.IndexOf("\"", idxbn1 + " id=\"bn".Length); poemtext = poemtext.Substring(0, idxbn1) + poemtext.Substring(idxbn2 + 1); } poemtext = poemtext.Replace("Adaptation du milieu", "یییییییییییییییییییی"); poemtext = poemtext.Replace("Empirique", "ببببببببب"); poemtext = poemtext.Replace(" >", ">"); poemtext = poemtext.Replace("<div class=\"b\" style=\"width:750px\">", "<div class=\"b\">").Replace("<div class=\"b\" style=\"width:660px\">", "<div class=\"b\">").Replace("<div class=\"b\" style=\"width:680px\">", "<div class=\"b\">").Replace("<div class=\"b\" style=\"width:650px\">", "<div class=\"b\">").Replace("<div class=\"b\" style=\"width:690px\">", "<div class=\"b\">").Replace("<p style=\"color:#911\">", "<p>").Replace("<p style=\"color:#191\">", "<p>").Replace("<div class=\"spacer\">", "").Replace(" ", "").Replace("<div class=\"spacer\" />", "").Replace("<div class=\"b\" style=\"width:700px\">", "<div class=\"b\">"); poemtext = poemtext.Replace("<em>", "").Replace("</em>", ""); poemtext = poemtext.Replace("<em>", "").Replace("</em>", "").Replace("<small>", "").Replace("</small>", ""); poemtext = poemtext.Replace("<b>", "").Replace("</b>", "").Replace("<strong>", "").Replace("</strong>", ""); poemtext = poemtext.Replace("<p><br style=\"clear:both;\"/></p>", "").Replace("<br style=\"clear:both;\"/>", ""); if (poemtext.IndexOf("\r\n") == 0) { poemtext = poemtext.Substring(2); } poemtext = poemtext.Replace("\r", "").Replace("\n", ""); poemtext = poemtext.Replace("</div>", "").Replace("</p>", ""); poemtext = poemtext.Replace("<div class=\"b2\">", "a"); poemtext = poemtext.Replace("<div class=\"b\">", "b"); poemtext = poemtext.Replace("<div class=\"m1\">", "m"); poemtext = poemtext.Replace("<div class=\"m2\">", "n"); poemtext = poemtext.Replace("<div class=\"n\"><p>", "s"); poemtext = poemtext.Replace("<div class=\"n\">", "s"); poemtext = poemtext.Replace("<div class=\"l\"><p>", "l"); poemtext = poemtext.Replace("<div class=\"l\">", "l"); poemtext = poemtext.Replace("<div class=\"c\"><p>", "c"); poemtext = poemtext.Replace("<div class=\"c\">", "c"); poemtext = poemtext.Replace("<p>", "p"); poemtext = poemtext.Replace("bmp", "b"); poemtext = poemtext.Replace("np", "n"); poemtext = poemtext.Replace("ap", "a"); poemtext = poemtext.Replace("\"", "").Replace("'", ""); if (poemtext.IndexOfAny(new char[] { '<', '>' }) != -1) { throw new Exception($"Invalid Characteres: {poemtext}"); } if (poemtext.IndexOf("mp") != -1) { throw new Exception($"مصرع اول بدون مصرع دوم: {poemtext}"); } if (poemtext.Length > 0) { int idx = poemtext.IndexOfAny(new char[] { 'a', 'b', 'm', 'n', 'p', 's', 'l', 'c' }); bool preWasBand = false; while (idx != -1) { GanjoorVerse verse = new GanjoorVerse(); verse.PoemId = poemId; verse.VOrder = verses.Count + 1; switch (poemtext[idx]) { case 'p': if (preWasBand) { verse.VersePosition = VersePosition.CenteredVerse2; } else { verse.VersePosition = VersePosition.Paragraph; } preWasBand = false; break; case 'b': verse.VersePosition = VersePosition.Right; preWasBand = false; break; case 'n': verse.VersePosition = VersePosition.Left; preWasBand = false; break; case 'a': verse.VersePosition = VersePosition.CenteredVerse1; preWasBand = true; break; case 's': verse.VersePosition = VersePosition.Paragraph; preWasBand = false; break; case 'l': verse.VersePosition = VersePosition.Single; preWasBand = false; break; case 'c': verse.VersePosition = VersePosition.Comment; preWasBand = false; break; } int nextIdx = poemtext.IndexOfAny(new char[] { 'a', 'b', 'm', 'n', 'p', 's', 'l', 'c' }, idx + 1); if (nextIdx == -1) { verse.Text = poemtext.Substring(idx + 1).Replace("یییییییییییییییییییی", "Adaptation du milieu").Replace("ببببببببب", "Empirique"); } else { verse.Text = poemtext.Substring(idx + 1, nextIdx - idx - 1).Replace("یییییییییییییییییییی", "Adaptation du milieu").Replace("ببببببببب", "Empirique"); } verse.Text = verse.Text.Trim(); verses.Add(verse); idx = nextIdx; } } return(verses); }
private async Task <string> _ImportSQLiteCatChildren(RMuseumDbContext context, IDbConnection sqlite, int poetId, int sqliteParentCatId, GanjoorCat parentCat, string parentFullTitle, LongRunningJobProgressServiceEF jobProgressServiceEF, RLongRunningJobStatus job, int parentPagId) { try { string catHtmlText = ""; foreach (var cat in await sqlite.QueryAsync($"SELECT * FROM cat WHERE parent_id = {sqliteParentCatId} ORDER BY id")) { await jobProgressServiceEF.UpdateJob(job.Id, 0, $"Importing - {cat.text}"); var poetCatId = 1 + await context.GanjoorCategories.MaxAsync(c => c.Id); string catTitle = cat.text; string url = GPersianTextSync.Farglisize(catTitle); if (catTitle.IndexOf('|') != -1) { string[] catParts = catTitle.Split('|', StringSplitOptions.RemoveEmptyEntries); if (catParts.Length == 2) { catTitle = catParts[0].Trim(); url = catParts[1].Trim(); } } else { switch (catTitle) { case "دیوان اشعار": url = "divan"; break; case "قصاید": case "قصائد": case "قصیده": case "قصیده ها": url = "ghaside"; break; case "غزلیات": case "غزل": case "غزل ها": url = "ghazal"; break; case "قطعات": case "مقطعات": case "قطعه": url = "ghete"; break; case "مثنویات": case "مثنوی": case "مثنوی ها": url = "masnavi"; break; case "ترکیبات": case "ترکیب بند": url = "tarkib"; break; case "ترجیعات": case "ترجیع بند": url = "tarjee"; break; case "مسمطات": case "مسمط": url = "mosammat"; break; case "مخمسات": case "مخمس": url = "mokhammas"; break; case "رباعیات": case "رباعی": case "رباعی ها": url = "robaee"; break; case "ملمعات": case "ملمع": url = "molamma"; break; case "هجویات": case "هجو": url = "hajv"; break; case "هزلیات": case "هزل": url = "hazl"; break; case "مراثی": case "مرثیه": case "رثا": case "مرثیه ها": url = "marsie"; break; case "مفردات": url = "mofradat"; break; case "ملحقات": url = "molhaghat"; break; case "اشعار عربی": url = "arabi"; break; case "ماده تاریخها": case "ماده تاریخها": case "ماده تاریخ": url = "tarikh"; break; case "معمیات": url = "moammiyat"; break; case "چیستان": url = "chistan"; break; case "لغز": case "لغزها": url = "loghaz"; break; } } GanjoorCat dbCat = new GanjoorCat() { Id = poetCatId, PoetId = poetId, Title = catTitle, UrlSlug = url, FullUrl = $"{parentCat.FullUrl}/{url}", ParentId = parentCat.Id, TableOfContentsStyle = GanjoorTOC.Analyse, Published = true, }; context.GanjoorCategories.Add(dbCat); var maxPageId = await context.GanjoorPages.MaxAsync(p => p.Id); if (await context.GanjoorPoems.MaxAsync(p => p.Id) > maxPageId) { maxPageId = await context.GanjoorPoems.MaxAsync(p => p.Id); } var catPageId = 1 + maxPageId; while (await context.GanjoorPoems.Where(p => p.Id == catPageId).AnyAsync()) { catPageId++; } GanjoorPage dbPageCat = new GanjoorPage() { Id = catPageId, GanjoorPageType = GanjoorPageType.CatPage, Published = false, PageOrder = -1, Title = dbCat.Title, FullTitle = $"{parentFullTitle} » {dbCat.Title}", UrlSlug = dbCat.UrlSlug, FullUrl = dbCat.FullUrl, HtmlText = "", PoetId = poetId, CatId = poetCatId, PostDate = DateTime.Now, ParentId = parentPagId }; context.GanjoorPages.Add(dbPageCat); await context.SaveChangesAsync(); catHtmlText += $"<p><a href=\"{dbCat.FullUrl}\">{dbCat.Title}</a></p>{Environment.NewLine}"; var resChild = await _ImportSQLiteCatChildren(context, sqlite, poetId, (int)cat.id, dbCat, $"{parentFullTitle} » {dbCat.Title}", jobProgressServiceEF, job, dbPageCat.Id); if (!string.IsNullOrEmpty(resChild)) { return(resChild); } } var maxPoemId = await context.GanjoorPoems.MaxAsync(p => p.Id); if (await context.GanjoorPages.MaxAsync(p => p.Id) > maxPoemId) { maxPoemId = await context.GanjoorPages.MaxAsync(p => p.Id); } var poemId = 1 + maxPoemId; int poemNumber = 0; foreach (var poem in await sqlite.QueryAsync($"SELECT * FROM poem WHERE cat_id = {sqliteParentCatId} ORDER BY id")) { poemNumber++; await jobProgressServiceEF.UpdateJob(job.Id, poemNumber, "", false); string title = poem.title; string urlSlug = $"sh{poemNumber}"; if (title.IndexOf('|') != -1) { string[] titleParts = title.Split('|', StringSplitOptions.RemoveEmptyEntries); if (titleParts.Length == 2) { title = titleParts[0].Trim(); urlSlug = titleParts[1].Trim(); } } GanjoorPoem dbPoem = new GanjoorPoem() { Id = poemId, CatId = parentCat.Id, Title = title, UrlSlug = urlSlug, FullTitle = $"{parentFullTitle} » {title}", FullUrl = $"{parentCat.FullUrl}/{urlSlug}", Published = true, }; List <GanjoorVerse> poemVerses = new List <GanjoorVerse>(); foreach (var verse in await sqlite.QueryAsync($"SELECT * FROM verse WHERE poem_id = {poem.id} ORDER BY vorder")) { int vOrder = int.Parse(verse.vorder.ToString()); int position = int.Parse(verse.position.ToString()); string text = verse.text; GanjoorVerse dbVerse = new GanjoorVerse() { PoemId = poemId, VOrder = vOrder, VersePosition = (VersePosition)position, Text = text.Replace("ـ", "").Replace(" ", " ").ApplyCorrectYeKe().Trim() }; poemVerses.Add(dbVerse); } if (poemVerses.Count == 0) { poemNumber--; continue; } dbPoem.PlainText = PreparePlainText(poemVerses); dbPoem.HtmlText = PrepareHtmlText(poemVerses); context.GanjoorPoems.Add(dbPoem); await context.SaveChangesAsync(); foreach (var dbVerse in poemVerses) { context.GanjoorVerses.Add(dbVerse); await context.SaveChangesAsync();//id set should be in order } await _FillPoemCoupletIndices(context, poemId); try { var poemRhymeLettersRes = LanguageUtils.FindRhyme(poemVerses); if (!string.IsNullOrEmpty(poemRhymeLettersRes.Rhyme)) { dbPoem.RhymeLetters = poemRhymeLettersRes.Rhyme; context.GanjoorPoems.Update(dbPoem); } } catch { } GanjoorPage dbPoemPage = new GanjoorPage() { Id = poemId, GanjoorPageType = GanjoorPageType.PoemPage, Published = false, PageOrder = -1, Title = dbPoem.Title, FullTitle = dbPoem.FullTitle, UrlSlug = dbPoem.UrlSlug, FullUrl = dbPoem.FullUrl, HtmlText = dbPoem.HtmlText, PoetId = poetId, CatId = parentCat.Id, PoemId = poemId, PostDate = DateTime.Now, ParentId = parentPagId }; context.GanjoorPages.Add(dbPoemPage); await context.SaveChangesAsync(); catHtmlText += $"<p><a href=\"{dbPoemPage.FullUrl}\">{dbPoemPage.Title}</a></p>{Environment.NewLine}"; poemId++; } if (!string.IsNullOrEmpty(catHtmlText)) { var parentCatPage = await context.GanjoorPages.Where(p => p.FullUrl == parentCat.FullUrl).SingleAsync(); parentCatPage.HtmlText += catHtmlText; context.GanjoorPages.Update(parentCatPage); } await context.SaveChangesAsync(); } catch (Exception exp) { return(exp.ToString()); } return(""); }