C# (CSharp) ChaptersService.HandleChaptersの例

プログラミング言語: C# (CSharp)

クラス/型: ChaptersService

メソッド/関数: HandleChapters

hotexamples.comのコード掲載数: 2

C# (CSharp) ChaptersService.HandleChapters - 2件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたC# (CSharp)のChaptersService.HandleChaptersの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

HandleChapters(2)

コード例 #1

ファイルを表示

ファイル: XRayService.cs プロジェクト: 3jimboslice/xray-builder.gui

        // TODO split this up, possible return a result instead of modifying xray
        public void ExpandFromRawMl(
            XRay xray,
            IMetadata metadata,
            Stream rawMlStream,
            bool enableEdit,
            bool useNewVersion,
            bool skipNoLikes,
            int minClipLen,
            bool overwriteChapters,
            SafeShowDelegate safeShow,
            IProgressBar progress,
            CancellationToken token,
            bool ignoreSoftHypen = false,
            bool shortEx         = true)
        {
            var locOffset = metadata.IsAzw3 ? -16 : 0;

            // If there is an apostrophe, attempt to match 's at the end of the term
            // Match end of word, then search for any lingering punctuation
            var apostrophes      = _encoding.GetString(Encoding.UTF8.GetBytes("('|\u2019|\u0060|\u00B4)"));                                                                     // '\u2019\u0060\u00B4
            var quotes           = _encoding.GetString(Encoding.UTF8.GetBytes("(\"|\u2018|\u2019|\u201A|\u201B|\u201C|\u201D|\u201E|\u201F)"));
            var dashesEllipsis   = _encoding.GetString(Encoding.UTF8.GetBytes("(-|\u2010|\u2011|\u2012|\u2013|\u2014|\u2015|\u2026|&#8211;|&#8212;|&#8217;|&#8218;|&#8230;)")); //U+2010 to U+2015 and U+2026
            var punctuationMarks = string.Format(@"({0}s|{0})?{1}?[!\.?,""\);:]*{0}*{1}*{2}*", apostrophes, quotes, dashesEllipsis);

            var excerptId = 0;
            var web       = new HtmlDocument();

            web.Load(rawMlStream, _encoding);

            // Only load chapters when building the old format
            if (!useNewVersion)
            {
                rawMlStream.Seek(0, SeekOrigin.Begin);
                // TODO: passing stream, doc, and contents probably not necessary)
                using var streamReader = new StreamReader(rawMlStream, Encoding.UTF8);
                var readContents = streamReader.ReadToEnd();
                var utf8Doc      = new HtmlDocument();
                utf8Doc.LoadHtml(readContents);
                _chaptersService.HandleChapters(xray, xray.Asin, rawMlStream.Length, utf8Doc, readContents, overwriteChapters, safeShow, xray.Unattended, enableEdit);
            }
            else
            {
                // set default ERL to prevent filtering
                xray.Srl = 1;
                xray.Erl = rawMlStream.Length;
            }

            _logger.Log("Scanning book content...");
            var timer = new System.Diagnostics.Stopwatch();

            timer.Start();
            //Iterate over all paragraphs in book
            var nodes = web.DocumentNode.SelectNodes("//p")
                        ?? web.DocumentNode.SelectNodes("//div[@class='paragraph']")
                        ?? web.DocumentNode.SelectNodes("//div[@class='p-indent']");

            if (nodes == null)
            {
                nodes = web.DocumentNode.SelectNodes("//div");
                _logger.Log("Warning: Could not locate paragraphs normally (p elements or divs of class 'paragraph').\r\n" +
                            "Searching all book contents (all divs), which may produce odd results.");
            }
            if (nodes == null)
            {
                throw new Exception("Could not locate any paragraphs in this book.\r\n" +
                                    "Report this error along with a copy of the book to improve parsing.");
            }
            progress?.Set(0, nodes.Count);
            for (var i = 0; i < nodes.Count; i++)
            {
                token.ThrowIfCancellationRequested();
                var node = nodes[i];
                if (node.FirstChild == null)
                {
                    continue;                          //If the inner HTML is just empty, skip the paragraph!
                }
                var lenQuote = node.InnerHtml.Length;
                var location = node.FirstChild.StreamPosition;
                if (location < 0)
                {
                    throw new Exception($"Unable to locate paragraph {i} within the book content.");
                }

                //Skip paragraph if outside chapter range
                if (location < xray.Srl || location > xray.Erl)
                {
                    continue;
                }
                var noSoftHypen = "";
                if (ignoreSoftHypen)
                {
                    noSoftHypen = node.InnerText;
                    noSoftHypen = noSoftHypen.Replace("\u00C2\u00AD", "");
                    noSoftHypen = noSoftHypen.Replace("&shy;", "");
                    noSoftHypen = noSoftHypen.Replace("&#xad;", "");
                    noSoftHypen = noSoftHypen.Replace("&#173;", "");
                    noSoftHypen = noSoftHypen.Replace("&#0173;", "");
                }
                foreach (var character in xray.Terms)
                {
                    //Search for character name and aliases in the html-less text. If failed, try in the HTML for rare situations.
                    //TODO: Improve location searching as IndexOf will not work if book length exceeds 2,147,483,647...
                    //If soft hyphen ignoring is turned on, also search hyphen-less text.
                    if (!character.Match)
                    {
                        continue;
                    }
                    var termFound = false;
                    // Convert from UTF8 string to default-encoded representation
                    var search = character.Aliases.Select(alias => _encoding.GetString(Encoding.UTF8.GetBytes(alias)))
                                 .ToList();
                    if (character.RegexAliases)
                    {
                        if (search.Any(r => Regex.Match(node.InnerText, r).Success) ||
                            search.Any(r => Regex.Match(node.InnerHtml, r).Success) ||
                            (ignoreSoftHypen && search.Any(r => Regex.Match(noSoftHypen, r).Success)))
                        {
                            termFound = true;
                        }
                    }
                    else
                    {
                        // Search for character name and aliases
                        // If there is an apostrophe, attempt to match 's at the end of the term
                        // Match end of word, then search for any lingering punctuation
                        search.Add(character.TermName);
                        // Search list should be in descending order by length, even the term name itself
                        search = search.OrderByDescending(s => s.Length).ToList();
                        if ((character.MatchCase && (search.Any(node.InnerText.Contains) || search.Any(node.InnerHtml.Contains))) ||
                            (!character.MatchCase && (search.Any(node.InnerText.ContainsIgnorecase) || search.Any(node.InnerHtml.ContainsIgnorecase))) ||
                            (ignoreSoftHypen && (character.MatchCase && search.Any(noSoftHypen.Contains)) ||
                             (!character.MatchCase && search.Any(noSoftHypen.ContainsIgnorecase))))
                        {
                            termFound = true;
                        }
                    }

                    if (!termFound)
                    {
                        continue;
                    }

                    var locHighlight = new List <int>();
                    var lenHighlight = new List <int>();
                    //Search html for character name and aliases
                    foreach (var s in search)
                    {
                        var matches = Regex.Matches(node.InnerHtml, $@"{quotes}?\b{s}{punctuationMarks}", character.MatchCase || character.RegexAliases ? RegexOptions.None : RegexOptions.IgnoreCase);
                        foreach (Match match in matches)
                        {
                            if (locHighlight.Contains(match.Index) && lenHighlight.Contains(match.Length))
                            {
                                continue;
                            }
                            locHighlight.Add(match.Index);
                            lenHighlight.Add(match.Length);
                        }
                    }
                    //If normal search fails, use regexp to search in case there is some wacky html nested in term
                    //Regexp may be less than ideal for parsing HTML but seems to work ok so far in these small paragraphs
                    //Also search in soft hyphen-less text if option is set to do so
                    if (locHighlight.Count == 0)
                    {
                        foreach (var s in search)
                        {
                            var          patterns    = new List <string>();
                            const string patternHtml = "(?:<[^>]*>)*";
                            //Match HTML tags -- provided there's nothing malformed
                            const string patternSoftHypen = "(\u00C2\u00AD|&shy;|&#173;|&#xad;|&#0173;|&#x00AD;)*";
                            var          pattern          = string.Format("{0}{1}{0}{2}",
                                                                          patternHtml,
                                                                          string.Join(patternHtml + patternSoftHypen, character.RegexAliases ? s.ToCharArray() : Regex.Unescape(s).ToCharArray()),
                                                                          punctuationMarks);
                            patterns.Add(pattern);
                            foreach (var pat in patterns)
                            {
                                MatchCollection matches;
                                if (character.MatchCase || character.RegexAliases)
                                {
                                    matches = Regex.Matches(node.InnerHtml, pat);
                                }
                                else
                                {
                                    matches = Regex.Matches(node.InnerHtml, pat, RegexOptions.IgnoreCase);
                                }
                                foreach (Match match in matches)
                                {
                                    if (locHighlight.Contains(match.Index) && lenHighlight.Contains(match.Length))
                                    {
                                        continue;
                                    }
                                    locHighlight.Add(match.Index);
                                    lenHighlight.Add(match.Length);
                                }
                            }
                        }
                    }
                    if (locHighlight.Count == 0 || locHighlight.Count != lenHighlight.Count) //something went wrong
                    {
                        // _logger.Log($"An error occurred while searching for start of highlight.\r\nWas looking for (or one of the aliases of): {character.TermName}\r\nSearching in: {node.InnerHtml}");
                        continue;
                    }

                    //If an excerpt is too long, the X-Ray reader cuts it off.
                    //If the location of the highlighted word (character name) within the excerpt is far enough in to get cut off,
                    //this section attempts to shorted the excerpt by locating the start of a sentence that is just far enough away from the highlight.
                    //The length is determined by the space the excerpt takes up rather than its actual length... so 135 is just a guess based on what I've seen.
                    const int lengthLimit = 135;
                    for (var j = 0; j < locHighlight.Count; j++)
                    {
                        if (!shortEx || locHighlight[j] + lenHighlight[j] <= lengthLimit)
                        {
                            continue;
                        }
                        var  start           = locHighlight[j];
                        long newLoc          = -1;
                        var  newLenQuote     = 0;
                        var  newLocHighlight = 0;

                        while (start > -1)
                        {
                            var at = node.InnerHtml.LastIndexOfAny(new[] { '.', '?', '!' }, start);
                            if (at > -1)
                            {
                                start = at - 1;
                                if (locHighlight[j] + lenHighlight[j] + 1 - at - 2 <= lengthLimit)
                                {
                                    newLoc          = location + at + 2;
                                    newLenQuote     = lenQuote - at - 2;
                                    newLocHighlight = locHighlight[j] - at - 2;
                                }
                                else
                                {
                                    break;
                                }
                            }
                            else
                            {
                                break;
                            }
                        }
                        //Only add new locs if shorter excerpt was found
                        if (newLoc >= 0)
                        {
                            character.Locs.Add(new []
                            {
                                newLoc + locOffset,
                                newLenQuote,
                                newLocHighlight,
                                lenHighlight[j]
                            });
                            locHighlight.RemoveAt(j);
                            lenHighlight.RemoveAt(j--);
                        }
                    }

                    for (var j = 0; j < locHighlight.Count; j++)
                    {
                        // For old format
                        character.Locs.Add(new long[]
                        {
                            location + locOffset,
                            lenQuote,
                            locHighlight[j],
                            lenHighlight[j]
                        });
                        // For new format
                        character.Occurrences.Add(new[] { location + locOffset + locHighlight[j], lenHighlight[j] });
                    }
                    var exCheck = xray.Excerpts.Where(t => t.Start.Equals(location + locOffset)).ToArray();
                    if (exCheck.Length > 0)
                    {
                        if (!exCheck[0].RelatedEntities.Contains(character.Id))
                        {
                            exCheck[0].RelatedEntities.Add(character.Id);
                        }
                    }
                    else
                    {
                        var newExcerpt = new Excerpt
                        {
                            Id     = excerptId++,
                            Start  = location + locOffset,
                            Length = lenQuote
                        };
                        newExcerpt.RelatedEntities.Add(character.Id);
                        xray.Excerpts.Add(newExcerpt);
                    }
                }

                // Attempt to match downloaded notable clips, not worried if no matches occur as some will be added later anyway
                if (useNewVersion && xray.NotableClips != null)
                {
                    foreach (var quote in xray.NotableClips)
                    {
                        var index = node.InnerText.IndexOf(quote.Text, StringComparison.Ordinal);
                        if (index > -1)
                        {
                            // See if an excerpt already exists at this location
                            var excerpt = xray.Excerpts.FirstOrDefault(e => e.Start == index);
                            if (excerpt == null)
                            {
                                if (skipNoLikes && quote.Likes == 0 ||
                                    quote.Text.Length < minClipLen)
                                {
                                    continue;
                                }
                                excerpt = new Excerpt
                                {
                                    Id         = excerptId++,
                                    Start      = location,
                                    Length     = node.InnerHtml.Length,
                                    Notable    = true,
                                    Highlights = quote.Likes
                                };
                                excerpt.RelatedEntities.Add(0); // Mark the excerpt as notable
                                // TODO: also add other related entities
                                xray.Excerpts.Add(excerpt);
                            }
                            else
                            {
                                excerpt.RelatedEntities.Add(0);
                            }

                            xray.FoundNotables++;
                        }
                    }
                }
                progress?.Add(1);
            }

            timer.Stop();
            _logger.Log($"Scan time: {timer.Elapsed}");
            //output list of terms with no locs
            foreach (var t in xray.Terms.Where(t => t.Match && t.Locs.Count == 0))
            {
                _logger.Log($"No locations were found for the term \"{t.TermName}\".\r\nYou should add aliases for this term using the book or rawml as a reference.");
            }
        }

コード例 #2

ファイルを表示

        // TODO split this up, possible return a result instead of modifying xray
        public void ExpandFromRawMl(
            XRay xray,
            IMetadata metadata,
            Stream rawMlStream,
            bool useNewVersion,
            bool skipNoLikes,
            int minClipLen,
            bool overwriteChapters,
            Func <bool> editChaptersCallback,
            IProgressBar progress,
            CancellationToken token,
            bool ignoreSoftHypen = false,
            bool shortEx         = true)
        {
            // Only load chapters when building the old format
            if (!useNewVersion)
            {
                rawMlStream.Seek(0, SeekOrigin.Begin);
                // TODO: passing stream, doc, and contents probably not necessary)
                using var streamReader = new StreamReader(rawMlStream, Encoding.UTF8);
                var readContents = streamReader.ReadToEnd();
                var utf8Doc      = new HtmlDocument();
                utf8Doc.LoadHtml(readContents);

                _chaptersService.HandleChapters(xray, xray.Asin, rawMlStream.Length, utf8Doc, readContents, overwriteChapters, editChaptersCallback);
            }
            else
            {
                // set default ERL to prevent filtering
                xray.Srl = 1;
                xray.Erl = rawMlStream.Length;
            }

            _logger.Log(CoreStrings.ScanningEbookContent);
            var timer = new System.Diagnostics.Stopwatch();

            timer.Start();

            var paragraphs = _paragraphsService.GetParagraphs(metadata).ToArray();

            if (!paragraphs.Any())
            {
                throw new Exception(CoreStrings.CouldNotLocateAnyParagraphs);
            }

            progress?.Set(0, paragraphs.Length);
            foreach (var paragraph in paragraphs)
            {
                token.ThrowIfCancellationRequested();

                //Skip paragraph if outside known chapter range or if html is missing (shouldn't be, just a safety check)
                if (paragraph.Location < xray.Srl || paragraph.Location > xray.Erl || paragraph.ContentHtml == null)
                {
                    continue;
                }

                var noSoftHypen = "";
                if (ignoreSoftHypen)
                {
                    noSoftHypen = paragraph.ContentText;
                    noSoftHypen = noSoftHypen.Replace("\u00C2\u00AD", "");
                    noSoftHypen = noSoftHypen.Replace("&shy;", "");
                    noSoftHypen = noSoftHypen.Replace("&#xad;", "");
                    noSoftHypen = noSoftHypen.Replace("&#173;", "");
                    noSoftHypen = noSoftHypen.Replace("&#0173;", "");
                }

                foreach (var character in xray.Terms)
                {
                    //Search for character name and aliases in the html-less text. If failed, try in the HTML for rare situations.
                    //TODO: Improve location searching as IndexOf will not work if book length exceeds 2,147,483,647...
                    //If soft hyphen ignoring is turned on, also search hyphen-less text.
                    if (!character.Match)
                    {
                        continue;
                    }

                    var termFound = false;
                    // Convert from UTF8 string to default-encoded representation
                    var search = character.Aliases.Select(alias => _encoding.GetString(Encoding.UTF8.GetBytes(alias)))
                                 .ToList();
                    if (character.RegexAliases)
                    {
                        if (search.Any(r => Regex.Match(paragraph.ContentText, r).Success) ||
                            search.Any(r => Regex.Match(paragraph.ContentHtml !, r).Success) ||
                            (ignoreSoftHypen && search.Any(r => Regex.Match(noSoftHypen, r).Success)))
                        {
                            termFound = true;
                        }
                    }
                    else
                    {
                        // Search for character name and aliases
                        // If there is an apostrophe, attempt to match 's at the end of the term
                        // Match end of word, then search for any lingering punctuation
                        search.Add(character.TermName);
                        // Search list should be in descending order by length, even the term name itself
                        search = search.OrderByDescending(s => s.Length).ToList();

                        // TODO consider removing this "termfound" section 'cause it might be redundant and pointless now
                        if ((character.MatchCase && (search.Any(paragraph.ContentText.Contains) || search.Any(paragraph.ContentHtml.Contains))) ||
                            (!character.MatchCase && (search.Any(paragraph.ContentText.ContainsIgnorecase) || search.Any(paragraph.ContentHtml.ContainsIgnorecase))) ||
                            (ignoreSoftHypen && (character.MatchCase && search.Any(noSoftHypen.Contains)) ||
                             (!character.MatchCase && search.Any(noSoftHypen.ContainsIgnorecase))))
                        {
                            termFound = true;
                        }
                    }

                    if (!termFound)
                    {
                        continue;
                    }

                    var occurrences = _termsService.FindOccurrences(metadata, character, paragraph);
                    if (!occurrences.Any())
                    {
                        // _logger.Log($"An error occurred while searching for start of highlight.\r\nWas looking for (or one of the aliases of): {character.TermName}\r\nSearching in: {node.InnerHtml}");
                        continue;
                    }

                    character.Occurrences.UnionWith(occurrences);

                    ExcerptHelper.EnhanceOrAddExcerpts(xray.Excerpts, character.Id, new IndexLength(paragraph.Location, paragraph.Length));
                }

                // Attempt to match downloaded notable clips, not worried if no matches occur as some will be added later anyway
                if (useNewVersion && xray.NotableClips != null)
                {
                    ExcerptHelper.ProcessNotablesForParagraph(paragraph.ContentText, paragraph.Location, xray.NotableClips, xray.Excerpts, skipNoLikes, minClipLen);
                }

                progress?.Add(1);
            }

            timer.Stop();
            _logger.Log(string.Format(CoreStrings.ScanTime, timer.Elapsed));
            //output list of terms with no occurrences
            foreach (var t in xray.Terms.Where(t => t.Match && t.Occurrences.Count == 0))
            {
                _logger.Log(string.Format(CoreStrings.NoLocationsFoundForTerm, t.TermName));
            }
        }