Ejemplo n.º 1
0
        /// <summary>
        /// make html text
        /// </summary>
        /// <param name="verses"></param>
        /// <returns></returns>
        private static string PrepareHtmlText(List <GanjoorVerse> verses)
        {
            string htmlText     = "";
            int    coupletIndex = 0;

            for (int vIndex = 0; vIndex < verses.Count; vIndex++)
            {
                GanjoorVerse v = verses[vIndex];
                if (v.VersePosition == VersePosition.CenteredVerse1)
                {
                    coupletIndex++;
                    if (((vIndex + 1) < verses.Count) && (verses[vIndex + 1].VersePosition == VersePosition.CenteredVerse2))
                    {
                        htmlText += $"<div class=\"b2\" id=\"bn{coupletIndex}\"><p>{v.Text}</p>{Environment.NewLine}";
                    }
                    else
                    {
                        htmlText += $"<div class=\"b2\" id=\"bn{coupletIndex}\"><p>{v.Text}</p></div>{Environment.NewLine}";
                    }
                }
                else
                if (v.VersePosition == VersePosition.CenteredVerse2)
                {
                    htmlText += $"<p>{v.Text}</p></div>{Environment.NewLine}";
                }
                else

                if (v.VersePosition == VersePosition.Right)
                {
                    coupletIndex++;
                    htmlText += $"<div class=\"b\" id=\"bn{coupletIndex}\"><div class=\"m1\"><p>{v.Text}</p></div>{Environment.NewLine}";
                }
                else
                if (v.VersePosition == VersePosition.Left)
                {
                    htmlText += $"<div class=\"m2\"><p>{v.Text}</p></div></div>{Environment.NewLine}";
                }
                else
                if (v.VersePosition == VersePosition.Comment)
                {
                    htmlText += $"<div class=\"c\"><p>{v.Text}</p></div>{Environment.NewLine}";
                }
                else
                if (v.VersePosition == VersePosition.Paragraph || v.VersePosition == VersePosition.Single)
                {
                    coupletIndex++;
                    string[] lines = v.Text.Split(new char[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries);

                    string cssClass = v.VersePosition == VersePosition.Paragraph ? "n" : "l";

                    if (lines.Length != 0)
                    {
                        if (v.Text.Length / lines.Length < 150)
                        {
                            htmlText += $"<div class=\"{cssClass}\" id=\"bn{coupletIndex}\"><p>{v.Text.Replace("\r\n", " ")}</p></div>{Environment.NewLine}";
                        }
                        else
                        {
                            foreach (string line in lines)
                            {
                                htmlText += $"<div class=\"{cssClass}\" id=\"bn{coupletIndex}\"><p>{line}</p></div>{Environment.NewLine}";
                            }
                        }
                    }
                    else
                    {
                        if (string.IsNullOrEmpty(v.Text))
                        {
                            htmlText += $"<div class=\"{cssClass}\" id=\"bn{coupletIndex}\"><p>&nbsp;</p></div>{Environment.NewLine}";//empty line!
                        }
                        else
                        {
                            htmlText += $"<div class=\"{cssClass}\" id=\"bn{coupletIndex}\"><p>{v.Text}</p></div>{Environment.NewLine}";//not brave enough to ignore it!
                        }
                    }
                }
            }
            return(htmlText.Trim());
        }
        private List <GanjoorVerse> _extractVersesFromPoemHtmlText(int poemId, string poemtext)
        {
            List <GanjoorVerse> verses = new List <GanjoorVerse>();

            //this spagetti code has been imported from my internal utilities:
            while (poemtext.IndexOf("<a href") != -1)
            {
                int    ahrefStart = poemtext.IndexOf("<a href");
                string part1      = poemtext.Substring(0, ahrefStart);
                string part2      = poemtext.Substring(poemtext.IndexOf(">", ahrefStart) + 1, poemtext.IndexOf("</a>") - (poemtext.IndexOf(">", ahrefStart) + 1));
                poemtext = part1 + part2 + poemtext.Substring(poemtext.IndexOf("</a>") + 4, poemtext.Length - (poemtext.IndexOf("</a>") + 4));
            }
            while (poemtext.IndexOf("<acronym") != -1)
            {
                int    acroStart = poemtext.IndexOf("<acronym");
                string part1     = poemtext.Substring(0, acroStart);
                string part2;
                try
                {
                    part2    = poemtext.Substring(poemtext.IndexOf(">", acroStart) + 1, poemtext.IndexOf("</acronym>") - (poemtext.IndexOf(">", acroStart) + 1));
                    poemtext = part1 + part2 + poemtext.Substring(poemtext.IndexOf("</acronym>") + 10, poemtext.Length - (poemtext.IndexOf("</acronym>") + 10));
                }
                catch
                {
                    part2    = poemtext.Substring(poemtext.IndexOf(">", acroStart) + 1, poemtext.IndexOf("<acronym>") - (poemtext.IndexOf(">", acroStart) + 1));
                    poemtext = part1 + part2 + poemtext.Substring(poemtext.IndexOf("<acronym>") + 10, poemtext.Length - (poemtext.IndexOf("<acronym>") + 10));
                }
            }

            while (poemtext.IndexOf("<sup>") != -1)
            {
                string part1 = poemtext.Substring(0, poemtext.IndexOf("<sup>"));
                try
                {
                    poemtext = part1 + poemtext.Substring(poemtext.IndexOf("</sup>") + 6, poemtext.Length - (poemtext.IndexOf("</sup>") + 6));
                    poemtext = poemtext.Replace("  ", " ");
                }
                catch
                {
                    throw new Exception($"poemtext.IndexOf(\"<sup>\": {poemtext}");
                }
            }

            while (poemtext.IndexOf("id=\"bn") != -1)
            {
                int idxbn1 = poemtext.IndexOf(" id=\"bn");
                int idxbn2 = poemtext.IndexOf("\"", idxbn1 + " id=\"bn".Length);

                poemtext = poemtext.Substring(0, idxbn1) + poemtext.Substring(idxbn2 + 1);
            }


            poemtext = poemtext.Replace("Adaptation du milieu", "یییییییییییییییییییی");
            poemtext = poemtext.Replace("Empirique", "ببببببببب");

            poemtext = poemtext.Replace(" >", ">");
            poemtext = poemtext.Replace("<div class=\"b\" style=\"width:750px\">", "<div class=\"b\">").Replace("<div class=\"b\" style=\"width:660px\">", "<div class=\"b\">").Replace("<div class=\"b\" style=\"width:680px\">", "<div class=\"b\">").Replace("<div class=\"b\" style=\"width:650px\">", "<div class=\"b\">").Replace("<div class=\"b\" style=\"width:690px\">", "<div class=\"b\">").Replace("<p style=\"color:#911\">", "<p>").Replace("<p style=\"color:#191\">", "<p>").Replace("<div class=\"spacer\">", "").Replace("&nbsp;", "").Replace("<div class=\"spacer\" />", "").Replace("<div class=\"b\" style=\"width:700px\">", "<div class=\"b\">");
            poemtext = poemtext.Replace("<em>", "").Replace("</em>", "");
            poemtext = poemtext.Replace("<em>", "").Replace("</em>", "").Replace("<small>", "").Replace("</small>", "");
            poemtext = poemtext.Replace("<b>", "").Replace("</b>", "").Replace("<strong>", "").Replace("</strong>", "");
            poemtext = poemtext.Replace("<p><br style=\"clear:both;\"/></p>", "").Replace("<br style=\"clear:both;\"/>", "");
            if (poemtext.IndexOf("\r\n") == 0)
            {
                poemtext = poemtext.Substring(2);
            }
            poemtext = poemtext.Replace("\r", "").Replace("\n", "");
            poemtext = poemtext.Replace("</div>", "").Replace("</p>", "");
            poemtext = poemtext.Replace("<div class=\"b2\">", "a");
            poemtext = poemtext.Replace("<div class=\"b\">", "b");
            poemtext = poemtext.Replace("<div class=\"m1\">", "m");
            poemtext = poemtext.Replace("<div class=\"m2\">", "n");
            poemtext = poemtext.Replace("<div class=\"n\"><p>", "s");
            poemtext = poemtext.Replace("<div class=\"n\">", "s");
            poemtext = poemtext.Replace("<div class=\"l\"><p>", "l");
            poemtext = poemtext.Replace("<div class=\"l\">", "l");
            poemtext = poemtext.Replace("<div class=\"c\"><p>", "c");
            poemtext = poemtext.Replace("<div class=\"c\">", "c");
            poemtext = poemtext.Replace("<p>", "p");
            poemtext = poemtext.Replace("bmp", "b");
            poemtext = poemtext.Replace("np", "n");
            poemtext = poemtext.Replace("ap", "a");
            poemtext = poemtext.Replace("\"", "").Replace("'", "");
            if (poemtext.IndexOfAny(new char[] { '<', '>' }) != -1)
            {
                throw new Exception($"Invalid Characteres: {poemtext}");
            }
            if (poemtext.IndexOf("mp") != -1)
            {
                throw new Exception($"مصرع اول بدون مصرع دوم: {poemtext}");
            }

            if (poemtext.Length > 0)
            {
                int  idx        = poemtext.IndexOfAny(new char[] { 'a', 'b', 'm', 'n', 'p', 's', 'l', 'c' });
                bool preWasBand = false;
                while (idx != -1)
                {
                    GanjoorVerse verse = new GanjoorVerse();
                    verse.PoemId = poemId;
                    verse.VOrder = verses.Count + 1;

                    switch (poemtext[idx])
                    {
                    case 'p':
                        if (preWasBand)
                        {
                            verse.VersePosition = VersePosition.CenteredVerse2;
                        }
                        else
                        {
                            verse.VersePosition = VersePosition.Paragraph;
                        }
                        preWasBand = false;
                        break;

                    case 'b':
                        verse.VersePosition = VersePosition.Right;
                        preWasBand          = false;
                        break;

                    case 'n':
                        verse.VersePosition = VersePosition.Left;
                        preWasBand          = false;
                        break;

                    case 'a':
                        verse.VersePosition = VersePosition.CenteredVerse1;
                        preWasBand          = true;
                        break;

                    case 's':
                        verse.VersePosition = VersePosition.Paragraph;
                        preWasBand          = false;
                        break;

                    case 'l':
                        verse.VersePosition = VersePosition.Single;
                        preWasBand          = false;
                        break;

                    case 'c':
                        verse.VersePosition = VersePosition.Comment;
                        preWasBand          = false;
                        break;
                    }
                    int nextIdx = poemtext.IndexOfAny(new char[] { 'a', 'b', 'm', 'n', 'p', 's', 'l', 'c' }, idx + 1);
                    if (nextIdx == -1)
                    {
                        verse.Text = poemtext.Substring(idx + 1).Replace("یییییییییییییییییییی", "Adaptation du milieu").Replace("ببببببببب", "Empirique");
                    }
                    else
                    {
                        verse.Text = poemtext.Substring(idx + 1, nextIdx - idx - 1).Replace("یییییییییییییییییییی", "Adaptation du milieu").Replace("ببببببببب", "Empirique");
                    }

                    verse.Text = verse.Text.Trim();

                    verses.Add(verse);

                    idx = nextIdx;
                }
            }

            return(verses);
        }
Ejemplo n.º 3
0
        private async Task <string> _ImportSQLiteCatChildren(RMuseumDbContext context, IDbConnection sqlite, int poetId, int sqliteParentCatId, GanjoorCat parentCat, string parentFullTitle, LongRunningJobProgressServiceEF jobProgressServiceEF, RLongRunningJobStatus job, int parentPagId)
        {
            try
            {
                string catHtmlText = "";
                foreach (var cat in await sqlite.QueryAsync($"SELECT * FROM cat WHERE parent_id = {sqliteParentCatId} ORDER BY id"))
                {
                    await jobProgressServiceEF.UpdateJob(job.Id, 0, $"Importing - {cat.text}");

                    var poetCatId = 1 + await context.GanjoorCategories.MaxAsync(c => c.Id);

                    string catTitle = cat.text;

                    string url = GPersianTextSync.Farglisize(catTitle);
                    if (catTitle.IndexOf('|') != -1)
                    {
                        string[] catParts = catTitle.Split('|', StringSplitOptions.RemoveEmptyEntries);
                        if (catParts.Length == 2)
                        {
                            catTitle = catParts[0].Trim();
                            url      = catParts[1].Trim();
                        }
                    }
                    else
                    {
                        switch (catTitle)
                        {
                        case "دیوان اشعار":
                            url = "divan";
                            break;

                        case "قصاید":
                        case "قصائد":
                        case "قصیده":
                        case "قصیده ها":
                            url = "ghaside";
                            break;

                        case "غزلیات":
                        case "غزل":
                        case "غزل ها":
                            url = "ghazal";
                            break;

                        case "قطعات":
                        case "مقطعات":
                        case "قطعه":
                            url = "ghete";
                            break;

                        case "مثنویات":
                        case "مثنوی":
                        case "مثنوی ها":
                            url = "masnavi";
                            break;

                        case "ترکیبات":
                        case "ترکیب بند":
                            url = "tarkib";
                            break;

                        case "ترجیعات":
                        case "ترجیع بند":
                            url = "tarjee";
                            break;

                        case "مسمطات":
                        case "مسمط":
                            url = "mosammat";
                            break;

                        case "مخمسات":
                        case "مخمس":
                            url = "mokhammas";
                            break;

                        case "رباعیات":
                        case "رباعی":
                        case "رباعی ها":
                            url = "robaee";
                            break;

                        case "ملمعات":
                        case "ملمع":
                            url = "molamma";
                            break;

                        case "هجویات":
                        case "هجو":
                            url = "hajv";
                            break;

                        case "هزلیات":
                        case "هزل":
                            url = "hazl";
                            break;

                        case "مراثی":
                        case "مرثیه":
                        case "رثا":
                        case "مرثیه ها":
                            url = "marsie";
                            break;

                        case "مفردات":
                            url = "mofradat";
                            break;

                        case "ملحقات":
                            url = "molhaghat";
                            break;

                        case "اشعار عربی":
                            url = "arabi";
                            break;

                        case "ماده تاریخ‌ها":
                        case "ماده تاریخها":
                        case "ماده تاریخ":
                            url = "tarikh";
                            break;

                        case "معمیات":
                            url = "moammiyat";
                            break;

                        case "چیستان":
                            url = "chistan";
                            break;

                        case "لغز":
                        case "لغزها":
                            url = "loghaz";
                            break;
                        }
                    }

                    GanjoorCat dbCat = new GanjoorCat()
                    {
                        Id                   = poetCatId,
                        PoetId               = poetId,
                        Title                = catTitle,
                        UrlSlug              = url,
                        FullUrl              = $"{parentCat.FullUrl}/{url}",
                        ParentId             = parentCat.Id,
                        TableOfContentsStyle = GanjoorTOC.Analyse,
                        Published            = true,
                    };
                    context.GanjoorCategories.Add(dbCat);

                    var maxPageId = await context.GanjoorPages.MaxAsync(p => p.Id);

                    if (await context.GanjoorPoems.MaxAsync(p => p.Id) > maxPageId)
                    {
                        maxPageId = await context.GanjoorPoems.MaxAsync(p => p.Id);
                    }

                    var catPageId = 1 + maxPageId;
                    while (await context.GanjoorPoems.Where(p => p.Id == catPageId).AnyAsync())
                    {
                        catPageId++;
                    }

                    GanjoorPage dbPageCat = new GanjoorPage()
                    {
                        Id = catPageId,
                        GanjoorPageType = GanjoorPageType.CatPage,
                        Published       = false,
                        PageOrder       = -1,
                        Title           = dbCat.Title,
                        FullTitle       = $"{parentFullTitle} » {dbCat.Title}",
                        UrlSlug         = dbCat.UrlSlug,
                        FullUrl         = dbCat.FullUrl,
                        HtmlText        = "",
                        PoetId          = poetId,
                        CatId           = poetCatId,
                        PostDate        = DateTime.Now,
                        ParentId        = parentPagId
                    };

                    context.GanjoorPages.Add(dbPageCat);

                    await context.SaveChangesAsync();

                    catHtmlText += $"<p><a href=\"{dbCat.FullUrl}\">{dbCat.Title}</a></p>{Environment.NewLine}";

                    var resChild = await _ImportSQLiteCatChildren(context, sqlite, poetId, (int)cat.id, dbCat, $"{parentFullTitle} » {dbCat.Title}", jobProgressServiceEF, job, dbPageCat.Id);

                    if (!string.IsNullOrEmpty(resChild))
                    {
                        return(resChild);
                    }
                }
                var maxPoemId = await context.GanjoorPoems.MaxAsync(p => p.Id);

                if (await context.GanjoorPages.MaxAsync(p => p.Id) > maxPoemId)
                {
                    maxPoemId = await context.GanjoorPages.MaxAsync(p => p.Id);
                }
                var poemId = 1 + maxPoemId;

                int poemNumber = 0;
                foreach (var poem in await sqlite.QueryAsync($"SELECT * FROM poem WHERE cat_id = {sqliteParentCatId} ORDER BY id"))
                {
                    poemNumber++;
                    await jobProgressServiceEF.UpdateJob(job.Id, poemNumber, "", false);

                    string title   = poem.title;
                    string urlSlug = $"sh{poemNumber}";
                    if (title.IndexOf('|') != -1)
                    {
                        string[] titleParts = title.Split('|', StringSplitOptions.RemoveEmptyEntries);
                        if (titleParts.Length == 2)
                        {
                            title   = titleParts[0].Trim();
                            urlSlug = titleParts[1].Trim();
                        }
                    }


                    GanjoorPoem dbPoem = new GanjoorPoem()
                    {
                        Id        = poemId,
                        CatId     = parentCat.Id,
                        Title     = title,
                        UrlSlug   = urlSlug,
                        FullTitle = $"{parentFullTitle} » {title}",
                        FullUrl   = $"{parentCat.FullUrl}/{urlSlug}",
                        Published = true,
                    };

                    List <GanjoorVerse> poemVerses = new List <GanjoorVerse>();
                    foreach (var verse in await sqlite.QueryAsync($"SELECT * FROM verse WHERE poem_id = {poem.id} ORDER BY vorder"))
                    {
                        int          vOrder   = int.Parse(verse.vorder.ToString());
                        int          position = int.Parse(verse.position.ToString());
                        string       text     = verse.text;
                        GanjoorVerse dbVerse  = new GanjoorVerse()
                        {
                            PoemId        = poemId,
                            VOrder        = vOrder,
                            VersePosition = (VersePosition)position,
                            Text          = text.Replace("ـ", "").Replace("  ", " ").ApplyCorrectYeKe().Trim()
                        };
                        poemVerses.Add(dbVerse);
                    }

                    if (poemVerses.Count == 0)
                    {
                        poemNumber--;
                        continue;
                    }

                    dbPoem.PlainText = PreparePlainText(poemVerses);
                    dbPoem.HtmlText  = PrepareHtmlText(poemVerses);

                    context.GanjoorPoems.Add(dbPoem);
                    await context.SaveChangesAsync();

                    foreach (var dbVerse in poemVerses)
                    {
                        context.GanjoorVerses.Add(dbVerse);
                        await context.SaveChangesAsync();//id set should be in order
                    }

                    await _FillPoemCoupletIndices(context, poemId);

                    try
                    {
                        var poemRhymeLettersRes = LanguageUtils.FindRhyme(poemVerses);
                        if (!string.IsNullOrEmpty(poemRhymeLettersRes.Rhyme))
                        {
                            dbPoem.RhymeLetters = poemRhymeLettersRes.Rhyme;
                            context.GanjoorPoems.Update(dbPoem);
                        }
                    }
                    catch
                    {
                    }



                    GanjoorPage dbPoemPage = new GanjoorPage()
                    {
                        Id = poemId,
                        GanjoorPageType = GanjoorPageType.PoemPage,
                        Published       = false,
                        PageOrder       = -1,
                        Title           = dbPoem.Title,
                        FullTitle       = dbPoem.FullTitle,
                        UrlSlug         = dbPoem.UrlSlug,
                        FullUrl         = dbPoem.FullUrl,
                        HtmlText        = dbPoem.HtmlText,
                        PoetId          = poetId,
                        CatId           = parentCat.Id,
                        PoemId          = poemId,
                        PostDate        = DateTime.Now,
                        ParentId        = parentPagId
                    };

                    context.GanjoorPages.Add(dbPoemPage);
                    await context.SaveChangesAsync();

                    catHtmlText += $"<p><a href=\"{dbPoemPage.FullUrl}\">{dbPoemPage.Title}</a></p>{Environment.NewLine}";

                    poemId++;
                }

                if (!string.IsNullOrEmpty(catHtmlText))
                {
                    var parentCatPage = await context.GanjoorPages.Where(p => p.FullUrl == parentCat.FullUrl).SingleAsync();

                    parentCatPage.HtmlText += catHtmlText;
                    context.GanjoorPages.Update(parentCatPage);
                }

                await context.SaveChangesAsync();
            }
            catch (Exception exp)
            {
                return(exp.ToString());
            }
            return("");
        }