示例#1
0
文件: Blog.cs 项目: michlG/Caloom
 public void PerformBeforeStoreUpdate()
 {
     if (ReferenceToInformation == null)
     {
         ReferenceToInformation = OIP.ReferenceToInformation.CreateDefault();
     }
     this.ReferenceToInformation.Title              = this.Title;
     ReferenceToInformation.URL                     = DefaultViewSupport.GetDefaultViewURL(this);
     this.LocationCollection.IsCollectionFiltered   = true;
     this.CategoryCollection.IsCollectionFiltered   = true;
     this.ImageGroupCollection.IsCollectionFiltered = true;
     if (Excerpt == null)
     {
         Excerpt = "";
     }
     if (Excerpt.Length > 200)
     {
         Excerpt = Excerpt.Substring(0, 200);
     }
     SetProfileImageAsFeaturedImage();
     if (Published == default(DateTime))
     {
         Published = DateTime.UtcNow.Date;
     }
 }
示例#2
0
        public void ExcerptAlternateQuerySelector()
        {
            // Given
            string       input    = @"<html>
                    <head>
                        <title>Foobar</title>
                    </head>
                    <body>
                        <h1>Title</h1>
                        <p>This is some Foobar text</p>
                        <div>This is some other text</div>
                    </body>
                </html>";
            IDocument    document = Substitute.For <IDocument>();
            MemoryStream stream   = new MemoryStream(Encoding.UTF8.GetBytes(input));

            document.GetStream().Returns(stream);
            Excerpt excerpt = new Excerpt("div");

            // When
            excerpt.Execute(new[] { document }, null).ToList();  // Make sure to materialize the result list

            // Then
            document.Received(1).Clone(Arg.Any <IEnumerable <KeyValuePair <string, object> > >());
            document.Received().Clone(Arg.Is <IEnumerable <KeyValuePair <string, object> > >(x => x.SequenceEqual(new[]
            {
                new KeyValuePair <string, object>("Excerpt", "<div>This is some other text</div>")
            })));
            stream.Dispose();
        }
示例#3
0
            public void ExcerptAlternateMetadataKey()
            {
                // Given
                string input = @"<html>
                        <head>
                            <title>Foobar</title>
                        </head>
                        <body>
                            <h1>Title</h1>
                            <p>This is some Foobar text</p>
                            <p>This is some other text</p>
                        </body>
                    </html>";
                IDocument document = Substitute.For<IDocument>();
                IExecutionContext context = Substitute.For<IExecutionContext>();
                MemoryStream stream = new MemoryStream(Encoding.UTF8.GetBytes(input));
                document.GetStream().Returns(stream);
                Excerpt excerpt = new Excerpt().SetMetadataKey("Baz");

                // When
                excerpt.Execute(new[] { document }, context).ToList();  // Make sure to materialize the result list

                // Then
                context.Received(1).GetDocument(Arg.Any<IDocument>(), Arg.Any<IEnumerable<KeyValuePair<string, object>>>());
                context.Received().GetDocument(document, Arg.Is<IEnumerable<KeyValuePair<string, object>>>(x => x.SequenceEqual(new[]
                {
                    new KeyValuePair<string, object>("Baz", "<p>This is some Foobar text</p>")
                })));
                stream.Dispose();
            }
示例#4
0
            public void SeparatorInsideParagraphWithSiblings()
            {
                // Given
                string               input    = @"<html>
                        <head>
                            <title>Foobar</title>
                        </head>
                        <body>
                            <h1>Title</h1>
                            <p>This is some Foobar text</p>
                            <p>This <b>is</b> some <!-- excerpt --><i>other</i> text</p>
                        </body>
                    </html>";
                TestDocument         document = new TestDocument(input);
                TestExecutionContext context  = new TestExecutionContext();
                Excerpt              excerpt  = new Excerpt();

                // When
                IEnumerable <IDocument> results = excerpt.Execute(new[] { document }, context).ToList();  // Make sure to materialize the result list

                // Then
                results.Single()["Excerpt"].ToString().ShouldBe(
                    @"<p>This is some Foobar text</p>
                            <p>This <b>is</b> some </p>",
                    StringCompareShould.IgnoreLineEndings);
            }
示例#5
0
        public void ExcerptAlternateMetadataKey()
        {
            // Given
            string input = @"<html>
                    <head>
                        <title>Foobar</title>
                    </head>
                    <body>
                        <h1>Title</h1>
                        <p>This is some Foobar text</p>
                        <p>This is some other text</p>
                    </body>
                </html>";
            IDocument document = Substitute.For<IDocument>();
            MemoryStream stream = new MemoryStream(Encoding.UTF8.GetBytes(input));
            document.GetStream().Returns(stream);
            IEnumerable<KeyValuePair<string, object>> metadata = null;
            document
                .When(x => x.Clone(Arg.Any<IEnumerable<KeyValuePair<string, object>>>()))
                .Do(x => metadata = x.Arg<IEnumerable<KeyValuePair<string, object>>>());
            Excerpt excerpt = new Excerpt().SetMetadataKey("Baz");

            // When
            excerpt.Execute(new[] { document }, null).ToList();  // Make sure to materialize the result list

            // Then
            document.Received().Clone(Arg.Any<IEnumerable<KeyValuePair<string, object>>>());
            CollectionAssert.AreEqual(new[] { new KeyValuePair<string, object>("Baz", "<p>This is some Foobar text</p>") }, metadata);
            stream.Dispose();
        }
示例#6
0
            public void NoExcerptReturnsSameDocument()
            {
                // Given
                string            input    = @"<html>
                        <head>
                            <title>Foobar</title>
                        </head>
                        <body>
                            <h1>Title</h1>
                            <div>This is some Foobar text</div>
                        </body>
                    </html>";
                IDocument         document = Substitute.For <IDocument>();
                IExecutionContext context  = Substitute.For <IExecutionContext>();
                MemoryStream      stream   = new MemoryStream(Encoding.UTF8.GetBytes(input));

                document.GetStream().Returns(stream);
                Excerpt excerpt = new Excerpt("p");

                // When
                excerpt.Execute(new[] { document }, null).ToList();  // Make sure to materialize the result list

                // Then
                context.DidNotReceiveWithAnyArgs().GetDocument((IDocument)null, (string)null);
                stream.Dispose();
            }
示例#7
0
            public void ExcerptAlternateMetadataKey()
            {
                // Given
                string            input    = @"<html>
                        <head>
                            <title>Foobar</title>
                        </head>
                        <body>
                            <h1>Title</h1>
                            <p>This is some Foobar text</p>
                            <p>This is some other text</p>
                        </body>
                    </html>";
                IDocument         document = Substitute.For <IDocument>();
                IExecutionContext context  = Substitute.For <IExecutionContext>();
                MemoryStream      stream   = new MemoryStream(Encoding.UTF8.GetBytes(input));

                document.GetStream().Returns(stream);
                Excerpt excerpt = new Excerpt().SetMetadataKey("Baz");

                // When
                excerpt.Execute(new[] { document }, context).ToList();  // Make sure to materialize the result list

                // Then
                context.Received(1).GetDocument(Arg.Any <IDocument>(), Arg.Any <IEnumerable <KeyValuePair <string, object> > >());
                context.Received().GetDocument(document, Arg.Is <IEnumerable <KeyValuePair <string, object> > >(x => x.SequenceEqual(new[]
                {
                    new KeyValuePair <string, object>("Baz", "<p>This is some Foobar text</p>")
                })));
                stream.Dispose();
            }
示例#8
0
        // todo just take paragraph instead of indexlength
        public static void EnhanceOrAddExcerpts(List <Excerpt> excerpts, int characterId, IndexLength excerptLocation)
        {
            var exCheck = excerpts.Where(t => t.Start.Equals(excerptLocation.Index)).ToArray();

            if (exCheck.Length > 0)
            {
                if (!exCheck[0].RelatedEntities.Contains(characterId))
                {
                    exCheck[0].RelatedEntities.Add(characterId);
                }
            }
            else
            {
                var newExcerpt = new Excerpt
                {
                    Id = excerpts.Any()
                        ? excerpts.Max(excerpt => excerpt.Id) + 1
                        : 1,
                    Start  = excerptLocation.Index,
                    Length = excerptLocation.Length
                };
                newExcerpt.RelatedEntities.Add(characterId);
                excerpts.Add(newExcerpt);
            }
        }
示例#9
0
        public void ExcerptAlternateQuerySelector()
        {
            // Given
            string input = @"<html>
                    <head>
                        <title>Foobar</title>
                    </head>
                    <body>
                        <h1>Title</h1>
                        <p>This is some Foobar text</p>
                        <div>This is some other text</div>
                    </body>
                </html>";
            IDocument document = Substitute.For<IDocument>();
            MemoryStream stream = new MemoryStream(Encoding.UTF8.GetBytes(input));
            document.GetStream().Returns(stream);
            Excerpt excerpt = new Excerpt("div");

            // When
            excerpt.Execute(new[] { document }, null).ToList();  // Make sure to materialize the result list

            // Then
            document.Received(1).Clone(Arg.Any<IEnumerable<KeyValuePair<string, object>>>());
            document.Received().Clone(Arg.Is<IEnumerable<KeyValuePair<string, object>>>(x => x.SequenceEqual(new[]
            {
                new KeyValuePair<string, object>("Excerpt", "<div>This is some other text</div>")
            })));
            stream.Dispose();
        }
示例#10
0
 public override int GetHashCode()
 {
     unchecked
     {
         var hashCode = 47;
         hashCode = (hashCode * 53) ^ Id.GetHashCode();
         hashCode = (hashCode * 53) ^ ResolvedId.GetHashCode();
         if (GivenUrl != null)
         {
             hashCode = (hashCode * 53) ^ GivenUrl.GetHashCode();
         }
         if (GivenTitle != null)
         {
             hashCode = (hashCode * 53) ^ GivenTitle.GetHashCode();
         }
         hashCode = (hashCode * 53) ^ IsFavorite.GetHashCode();
         hashCode = (hashCode * 53) ^ (int)Status;
         hashCode = (hashCode * 53) ^ TimeAdded.GetHashCode();
         hashCode = (hashCode * 53) ^ TimeUpdated.GetHashCode();
         hashCode = (hashCode * 53) ^ TimeRead.GetHashCode();
         hashCode = (hashCode * 53) ^ TimeFavorited.GetHashCode();
         hashCode = (hashCode * 53) ^ TimeSyncDatabaseAdded.GetHashCode();
         hashCode = (hashCode * 53) ^ TimeSyncDatabaseUpdated.GetHashCode();
         if (ResolvedTitle != null)
         {
             hashCode = (hashCode * 53) ^ ResolvedTitle.GetHashCode();
         }
         if (ResolvedUrl != null)
         {
             hashCode = (hashCode * 53) ^ ResolvedUrl.GetHashCode();
         }
         if (Excerpt != null)
         {
             hashCode = (hashCode * 53) ^ Excerpt.GetHashCode();
         }
         hashCode = (hashCode * 53) ^ IsArticle.GetHashCode();
         hashCode = (hashCode * 53) ^ IsIndex.GetHashCode();
         hashCode = (hashCode * 53) ^ ImageContent.GetHashCode();
         hashCode = (hashCode * 53) ^ VideoContent.GetHashCode();
         hashCode = (hashCode * 53) ^ WordCount.GetHashCode();
         if (AmpUrl != null)
         {
             hashCode = (hashCode * 53) ^ AmpUrl.GetHashCode();
         }
         if (Encoding != null)
         {
             hashCode = (hashCode * 53) ^ Encoding.GetHashCode();
         }
         if (MimeType != null)
         {
             hashCode = (hashCode * 53) ^ MimeType.GetHashCode();
         }
         if (LeadImage != null)
         {
             hashCode = (hashCode * 53) ^ LeadImage.GetHashCode();
         }
         return(hashCode);
     }
 }
示例#11
0
 public void Dispose()
 {
     lock (_lock)
     {
         Excerpt.Dispose();
         _list.Dispose();
     }
 }
示例#12
0
 public long Insert(Excerpt entity)
 {
     return(_repository.SqlMapper.ExecuteScalar <long>(new RequestContext
     {
         Scope = nameof(Excerpt),
         SqlId = "Insert",
         Request = entity,
     }));
 }
示例#13
0
        public string GetExcerpt(int Length, string Ellipses = "...")
        {
            var excerpt = Excerpt.Substring(0, (Excerpt.Length > Length) ? Length : Excerpt.Length);

            if (Excerpt.Length >= Length)
            {
                excerpt += Ellipses;
            }

            return(excerpt);
        }
示例#14
0
            public void SeparatorBetweenParagraphs()
            {
                // Given
                string            input    = @"<html>
                        <head>
                            <title>Foobar</title>
                        </head>
                        <body>
                            <h1>Title</h1>
                            <p>This is some Foobar text</p>
                            <p>This is some other text</p>
                            <!-- excerpt -->
                            <p>This is some more text</p>
                        </body>
                    </html>";
                IDocument         document = Substitute.For <IDocument>();
                IExecutionContext context  = Substitute.For <IExecutionContext>();
                string            result   = null;

                context.GetDocument(Arg.Any <IDocument>(), Arg.Any <IEnumerable <KeyValuePair <string, object> > >())
                .ReturnsForAnyArgs(
                    x =>
                {
                    result = (string)x.ArgAt <IEnumerable <KeyValuePair <string, object> > >(1)
                             .First(y => y.Key == "Excerpt")
                             .Value;
                    return(null);
                });
                MemoryStream stream = new MemoryStream(Encoding.UTF8.GetBytes(input));

                document.GetStream().Returns(stream);
                Excerpt excerpt = new Excerpt();

                // When
                excerpt.Execute(new[] { document }, context).ToList();  // Make sure to materialize the result list

                // Then
                Assert.AreEqual("<p>This is some Foobar text</p>\n                            <p>This is some other text</p>", result);
                stream.Dispose();
            }
示例#15
0
            public void NoExcerptReturnsSameDocument()
            {
                // Given
                string               input    = @"<html>
                        <head>
                            <title>Foobar</title>
                        </head>
                        <body>
                            <h1>Title</h1>
                            <div>This is some Foobar text</div>
                        </body>
                    </html>";
                TestDocument         document = new TestDocument(input);
                TestExecutionContext context  = new TestExecutionContext();
                Excerpt              excerpt  = new Excerpt("p");

                // When
                IEnumerable <IDocument> results = excerpt.Execute(new[] { document }, null).ToList();  // Make sure to materialize the result list

                // Then
                results.Single().ShouldBe(document);
            }
示例#16
0
            public void ExcerptAlternateQuerySelector()
            {
                // Given
                const string         input    = @"<html>
                        <head>
                            <title>Foobar</title>
                        </head>
                        <body>
                            <h1>Title</h1>
                            <p>This is some Foobar text</p>
                            <div>This is some other text</div>
                        </body>
                    </html>";
                TestDocument         document = new TestDocument(input);
                TestExecutionContext context  = new TestExecutionContext();
                Excerpt excerpt = new Excerpt("div");

                // When
                IEnumerable <IDocument> results = excerpt.Execute(new[] { document }, context).ToList();  // Make sure to materialize the result list

                // Then
                results.Single()["Excerpt"].ShouldBe("<div>This is some other text</div>");
            }
        public void Read_Replace_String()
        {
            var fileInfo =
                new FileInfo(Path.Combine(new FileInfo(Assembly.GetExecutingAssembly().Location).Directory.FullName,
                                          "如何使用本模板搭建博客.md"));

            var mdmetaFile = new MdmetaFile(fileInfo.OpenText());

            var excerpt = new Excerpt(new List <string>()
            {
                "<!--more-->"
            }, "<!--more1-->");

            mdmetaFile.MdmetaXsawJnfzmrs.Add(new SeparatorMdmetaXsawJnfzmr(excerpt));
            mdmetaFile.MdmetaXsawJnfzmrs.Add(new HzvhPaurvmoz());

            var str = mdmetaFile.Read().Text;

            Assert.AreEqual(str.IndexOf("<!--more-->"), -1);
            //Assert.AreEqual(str.IndexOf("<!--more1-->") >= 0, true);

            Console.WriteLine(str);
        }
示例#18
0
            public void MultipleSeparatorComments()
            {
                // Given
                string               input    = @"<html>
                        <head>
                            <title>Foobar</title>
                        </head>
                        <body>
                            <h1>Title</h1>
                            <p>This is some <!-- excerpt --> Foobar text</p>
                            <p>This is <!-- excerpt --> other text</p>
                        </body>
                    </html>";
                TestDocument         document = new TestDocument(input);
                TestExecutionContext context  = new TestExecutionContext();
                Excerpt              excerpt  = new Excerpt();

                // When
                IEnumerable <IDocument> results = excerpt.Execute(new[] { document }, context).ToList();  // Make sure to materialize the result list

                // Then
                results.Single()["Excerpt"].ShouldBe("<p>This is some </p>");
            }
示例#19
0
            public void ExcerptInnerHtml()
            {
                // Given
                string               input    = @"<html>
                        <head>
                            <title>Foobar</title>
                        </head>
                        <body>
                            <h1>Title</h1>
                            <p>This is some Foobar text</p>
                            <p>This is some other text</p>
                        </body>
                    </html>";
                TestDocument         document = new TestDocument(input);
                TestExecutionContext context  = new TestExecutionContext();
                Excerpt              excerpt  = new Excerpt().WithOuterHtml(false);

                // When
                IEnumerable <IDocument> results = excerpt.Execute(new[] { document }, context).ToList();  // Make sure to materialize the result list

                // Then
                results.Single()["Excerpt"].ShouldBe("This is some Foobar text");
            }
示例#20
0
        public static void ProcessNotablesForParagraph(string paragraph, int offset, IEnumerable <NotableClip> notableClips, List <Excerpt> excerpts, bool skipNoLikes, int minClipLength)
        {
            foreach (var quote in notableClips)
            {
                var index = paragraph.IndexOf(quote.Text, StringComparison.Ordinal);
                if (index <= -1)
                {
                    continue;
                }

                // See if an excerpt already exists at this location
                var excerpt = excerpts.FirstOrDefault(e => e.Start == index);
                if (excerpt == null)
                {
                    if (skipNoLikes && quote.Likes == 0 || quote.Text.Length < minClipLength)
                    {
                        continue;
                    }
                    excerpt = new Excerpt
                    {
                        Id         = excerpts.Max(e => e.Id) + 1,
                        Start      = offset + index,
                        Length     = paragraph.Length,
                        Notable    = true,
                        Highlights = quote.Likes
                    };
                    excerpt.RelatedEntities.Add(0); // Mark the excerpt as notable
                    // TODO: also add other related entities
                    excerpts.Add(excerpt);
                }
                else
                {
                    excerpt.Notable = true;
                    excerpt.RelatedEntities.Add(0);
                }
            }
        }
示例#21
0
 public void PerformBeforeStoreUpdate()
 {
     if (ReferenceToInformation == null)
     {
         ReferenceToInformation = OIP.ReferenceToInformation.CreateDefault();
     }
     ReferenceToInformation.Title = this.ActivityName;
     ReferenceToInformation.URL   = DefaultViewSupport.GetDefaultViewURL(this);
     this.LocationCollection.IsCollectionFiltered   = true;
     this.CategoryCollection.IsCollectionFiltered   = true;
     this.ImageGroupCollection.IsCollectionFiltered = true;
     if (Excerpt == null)
     {
         Excerpt = "";
     }
     if (Excerpt.Length > 200)
     {
         Excerpt = Excerpt.Substring(0, 200);
     }
     if (this.StartingTime == default(DateTime))
     {
         StartingTime = DateTime.UtcNow.Date;
     }
 }
示例#22
0
        public void NoExcerptReturnsSameDocument()
        {
            // Given
            string input = @"<html>
                    <head>
                        <title>Foobar</title>
                    </head>
                    <body>
                        <h1>Title</h1>
                        <div>This is some Foobar text</div>
                    </body>
                </html>";
            IDocument document = Substitute.For<IDocument>();
            MemoryStream stream = new MemoryStream(Encoding.UTF8.GetBytes(input));
            document.GetStream().Returns(stream);
            Excerpt excerpt = new Excerpt("p");

            // When
            excerpt.Execute(new[] { document }, null).ToList();  // Make sure to materialize the result list

            // Then
            document.DidNotReceiveWithAnyArgs().Clone((string)null);
            stream.Dispose();
        }
 public MarkdownFile(MarkdownOption options, LiteraryAnalyzerContext db, Excerpt parent)
 {
     //	this.db = db ?? new LiteraryAnalyzerContext();
     this.MarkdownOptions = options ?? new MarkdownOption();
     this.Parent          = parent;
 }
示例#24
0
        // TODO split this up, possible return a result instead of modifying xray
        public void ExpandFromRawMl(
            XRay xray,
            IMetadata metadata,
            Stream rawMlStream,
            bool enableEdit,
            bool useNewVersion,
            bool skipNoLikes,
            int minClipLen,
            bool overwriteChapters,
            SafeShowDelegate safeShow,
            IProgressBar progress,
            CancellationToken token,
            bool ignoreSoftHypen = false,
            bool shortEx         = true)
        {
            var locOffset = metadata.IsAzw3 ? -16 : 0;

            // If there is an apostrophe, attempt to match 's at the end of the term
            // Match end of word, then search for any lingering punctuation
            var apostrophes      = _encoding.GetString(Encoding.UTF8.GetBytes("('|\u2019|\u0060|\u00B4)"));                                                                     // '\u2019\u0060\u00B4
            var quotes           = _encoding.GetString(Encoding.UTF8.GetBytes("(\"|\u2018|\u2019|\u201A|\u201B|\u201C|\u201D|\u201E|\u201F)"));
            var dashesEllipsis   = _encoding.GetString(Encoding.UTF8.GetBytes("(-|\u2010|\u2011|\u2012|\u2013|\u2014|\u2015|\u2026|&#8211;|&#8212;|&#8217;|&#8218;|&#8230;)")); //U+2010 to U+2015 and U+2026
            var punctuationMarks = string.Format(@"({0}s|{0})?{1}?[!\.?,""\);:]*{0}*{1}*{2}*", apostrophes, quotes, dashesEllipsis);

            var excerptId = 0;
            var web       = new HtmlDocument();

            web.Load(rawMlStream, _encoding);

            // Only load chapters when building the old format
            if (!useNewVersion)
            {
                rawMlStream.Seek(0, SeekOrigin.Begin);
                // TODO: passing stream, doc, and contents probably not necessary)
                using var streamReader = new StreamReader(rawMlStream, Encoding.UTF8);
                var readContents = streamReader.ReadToEnd();
                var utf8Doc      = new HtmlDocument();
                utf8Doc.LoadHtml(readContents);
                _chaptersService.HandleChapters(xray, xray.Asin, rawMlStream.Length, utf8Doc, readContents, overwriteChapters, safeShow, xray.Unattended, enableEdit);
            }
            else
            {
                // set default ERL to prevent filtering
                xray.Srl = 1;
                xray.Erl = rawMlStream.Length;
            }

            _logger.Log("Scanning book content...");
            var timer = new System.Diagnostics.Stopwatch();

            timer.Start();
            //Iterate over all paragraphs in book
            var nodes = web.DocumentNode.SelectNodes("//p")
                        ?? web.DocumentNode.SelectNodes("//div[@class='paragraph']")
                        ?? web.DocumentNode.SelectNodes("//div[@class='p-indent']");

            if (nodes == null)
            {
                nodes = web.DocumentNode.SelectNodes("//div");
                _logger.Log("Warning: Could not locate paragraphs normally (p elements or divs of class 'paragraph').\r\n" +
                            "Searching all book contents (all divs), which may produce odd results.");
            }
            if (nodes == null)
            {
                throw new Exception("Could not locate any paragraphs in this book.\r\n" +
                                    "Report this error along with a copy of the book to improve parsing.");
            }
            progress?.Set(0, nodes.Count);
            for (var i = 0; i < nodes.Count; i++)
            {
                token.ThrowIfCancellationRequested();
                var node = nodes[i];
                if (node.FirstChild == null)
                {
                    continue;                          //If the inner HTML is just empty, skip the paragraph!
                }
                var lenQuote = node.InnerHtml.Length;
                var location = node.FirstChild.StreamPosition;
                if (location < 0)
                {
                    throw new Exception($"Unable to locate paragraph {i} within the book content.");
                }

                //Skip paragraph if outside chapter range
                if (location < xray.Srl || location > xray.Erl)
                {
                    continue;
                }
                var noSoftHypen = "";
                if (ignoreSoftHypen)
                {
                    noSoftHypen = node.InnerText;
                    noSoftHypen = noSoftHypen.Replace("\u00C2\u00AD", "");
                    noSoftHypen = noSoftHypen.Replace("&shy;", "");
                    noSoftHypen = noSoftHypen.Replace("&#xad;", "");
                    noSoftHypen = noSoftHypen.Replace("&#173;", "");
                    noSoftHypen = noSoftHypen.Replace("&#0173;", "");
                }
                foreach (var character in xray.Terms)
                {
                    //Search for character name and aliases in the html-less text. If failed, try in the HTML for rare situations.
                    //TODO: Improve location searching as IndexOf will not work if book length exceeds 2,147,483,647...
                    //If soft hyphen ignoring is turned on, also search hyphen-less text.
                    if (!character.Match)
                    {
                        continue;
                    }
                    var termFound = false;
                    // Convert from UTF8 string to default-encoded representation
                    var search = character.Aliases.Select(alias => _encoding.GetString(Encoding.UTF8.GetBytes(alias)))
                                 .ToList();
                    if (character.RegexAliases)
                    {
                        if (search.Any(r => Regex.Match(node.InnerText, r).Success) ||
                            search.Any(r => Regex.Match(node.InnerHtml, r).Success) ||
                            (ignoreSoftHypen && search.Any(r => Regex.Match(noSoftHypen, r).Success)))
                        {
                            termFound = true;
                        }
                    }
                    else
                    {
                        // Search for character name and aliases
                        // If there is an apostrophe, attempt to match 's at the end of the term
                        // Match end of word, then search for any lingering punctuation
                        search.Add(character.TermName);
                        // Search list should be in descending order by length, even the term name itself
                        search = search.OrderByDescending(s => s.Length).ToList();
                        if ((character.MatchCase && (search.Any(node.InnerText.Contains) || search.Any(node.InnerHtml.Contains))) ||
                            (!character.MatchCase && (search.Any(node.InnerText.ContainsIgnorecase) || search.Any(node.InnerHtml.ContainsIgnorecase))) ||
                            (ignoreSoftHypen && (character.MatchCase && search.Any(noSoftHypen.Contains)) ||
                             (!character.MatchCase && search.Any(noSoftHypen.ContainsIgnorecase))))
                        {
                            termFound = true;
                        }
                    }

                    if (!termFound)
                    {
                        continue;
                    }

                    var locHighlight = new List <int>();
                    var lenHighlight = new List <int>();
                    //Search html for character name and aliases
                    foreach (var s in search)
                    {
                        var matches = Regex.Matches(node.InnerHtml, $@"{quotes}?\b{s}{punctuationMarks}", character.MatchCase || character.RegexAliases ? RegexOptions.None : RegexOptions.IgnoreCase);
                        foreach (Match match in matches)
                        {
                            if (locHighlight.Contains(match.Index) && lenHighlight.Contains(match.Length))
                            {
                                continue;
                            }
                            locHighlight.Add(match.Index);
                            lenHighlight.Add(match.Length);
                        }
                    }
                    //If normal search fails, use regexp to search in case there is some wacky html nested in term
                    //Regexp may be less than ideal for parsing HTML but seems to work ok so far in these small paragraphs
                    //Also search in soft hyphen-less text if option is set to do so
                    if (locHighlight.Count == 0)
                    {
                        foreach (var s in search)
                        {
                            var          patterns    = new List <string>();
                            const string patternHtml = "(?:<[^>]*>)*";
                            //Match HTML tags -- provided there's nothing malformed
                            const string patternSoftHypen = "(\u00C2\u00AD|&shy;|&#173;|&#xad;|&#0173;|&#x00AD;)*";
                            var          pattern          = string.Format("{0}{1}{0}{2}",
                                                                          patternHtml,
                                                                          string.Join(patternHtml + patternSoftHypen, character.RegexAliases ? s.ToCharArray() : Regex.Unescape(s).ToCharArray()),
                                                                          punctuationMarks);
                            patterns.Add(pattern);
                            foreach (var pat in patterns)
                            {
                                MatchCollection matches;
                                if (character.MatchCase || character.RegexAliases)
                                {
                                    matches = Regex.Matches(node.InnerHtml, pat);
                                }
                                else
                                {
                                    matches = Regex.Matches(node.InnerHtml, pat, RegexOptions.IgnoreCase);
                                }
                                foreach (Match match in matches)
                                {
                                    if (locHighlight.Contains(match.Index) && lenHighlight.Contains(match.Length))
                                    {
                                        continue;
                                    }
                                    locHighlight.Add(match.Index);
                                    lenHighlight.Add(match.Length);
                                }
                            }
                        }
                    }
                    if (locHighlight.Count == 0 || locHighlight.Count != lenHighlight.Count) //something went wrong
                    {
                        // _logger.Log($"An error occurred while searching for start of highlight.\r\nWas looking for (or one of the aliases of): {character.TermName}\r\nSearching in: {node.InnerHtml}");
                        continue;
                    }

                    //If an excerpt is too long, the X-Ray reader cuts it off.
                    //If the location of the highlighted word (character name) within the excerpt is far enough in to get cut off,
                    //this section attempts to shorted the excerpt by locating the start of a sentence that is just far enough away from the highlight.
                    //The length is determined by the space the excerpt takes up rather than its actual length... so 135 is just a guess based on what I've seen.
                    const int lengthLimit = 135;
                    for (var j = 0; j < locHighlight.Count; j++)
                    {
                        if (!shortEx || locHighlight[j] + lenHighlight[j] <= lengthLimit)
                        {
                            continue;
                        }
                        var  start           = locHighlight[j];
                        long newLoc          = -1;
                        var  newLenQuote     = 0;
                        var  newLocHighlight = 0;

                        while (start > -1)
                        {
                            var at = node.InnerHtml.LastIndexOfAny(new[] { '.', '?', '!' }, start);
                            if (at > -1)
                            {
                                start = at - 1;
                                if (locHighlight[j] + lenHighlight[j] + 1 - at - 2 <= lengthLimit)
                                {
                                    newLoc          = location + at + 2;
                                    newLenQuote     = lenQuote - at - 2;
                                    newLocHighlight = locHighlight[j] - at - 2;
                                }
                                else
                                {
                                    break;
                                }
                            }
                            else
                            {
                                break;
                            }
                        }
                        //Only add new locs if shorter excerpt was found
                        if (newLoc >= 0)
                        {
                            character.Locs.Add(new []
                            {
                                newLoc + locOffset,
                                newLenQuote,
                                newLocHighlight,
                                lenHighlight[j]
                            });
                            locHighlight.RemoveAt(j);
                            lenHighlight.RemoveAt(j--);
                        }
                    }

                    for (var j = 0; j < locHighlight.Count; j++)
                    {
                        // For old format
                        character.Locs.Add(new long[]
                        {
                            location + locOffset,
                            lenQuote,
                            locHighlight[j],
                            lenHighlight[j]
                        });
                        // For new format
                        character.Occurrences.Add(new[] { location + locOffset + locHighlight[j], lenHighlight[j] });
                    }
                    var exCheck = xray.Excerpts.Where(t => t.Start.Equals(location + locOffset)).ToArray();
                    if (exCheck.Length > 0)
                    {
                        if (!exCheck[0].RelatedEntities.Contains(character.Id))
                        {
                            exCheck[0].RelatedEntities.Add(character.Id);
                        }
                    }
                    else
                    {
                        var newExcerpt = new Excerpt
                        {
                            Id     = excerptId++,
                            Start  = location + locOffset,
                            Length = lenQuote
                        };
                        newExcerpt.RelatedEntities.Add(character.Id);
                        xray.Excerpts.Add(newExcerpt);
                    }
                }

                // Attempt to match downloaded notable clips, not worried if no matches occur as some will be added later anyway
                if (useNewVersion && xray.NotableClips != null)
                {
                    foreach (var quote in xray.NotableClips)
                    {
                        var index = node.InnerText.IndexOf(quote.Text, StringComparison.Ordinal);
                        if (index > -1)
                        {
                            // See if an excerpt already exists at this location
                            var excerpt = xray.Excerpts.FirstOrDefault(e => e.Start == index);
                            if (excerpt == null)
                            {
                                if (skipNoLikes && quote.Likes == 0 ||
                                    quote.Text.Length < minClipLen)
                                {
                                    continue;
                                }
                                excerpt = new Excerpt
                                {
                                    Id         = excerptId++,
                                    Start      = location,
                                    Length     = node.InnerHtml.Length,
                                    Notable    = true,
                                    Highlights = quote.Likes
                                };
                                excerpt.RelatedEntities.Add(0); // Mark the excerpt as notable
                                // TODO: also add other related entities
                                xray.Excerpts.Add(excerpt);
                            }
                            else
                            {
                                excerpt.RelatedEntities.Add(0);
                            }

                            xray.FoundNotables++;
                        }
                    }
                }
                progress?.Add(1);
            }

            timer.Stop();
            _logger.Log($"Scan time: {timer.Elapsed}");
            //output list of terms with no locs
            foreach (var t in xray.Terms.Where(t => t.Match && t.Locs.Count == 0))
            {
                _logger.Log($"No locations were found for the term \"{t.TermName}\".\r\nYou should add aliases for this term using the book or rawml as a reference.");
            }
        }
示例#25
0
        public int ExpandFromRawMl(string rawMl, bool ignoreSoftHypen = false, bool shortEx = true)
        {
            int excerptId = 0;
            this._shortEx = shortEx;
            HtmlAgilityPack.HtmlDocument web = new HtmlAgilityPack.HtmlDocument();
            string readContents;
            using (StreamReader streamReader = new StreamReader(rawMl, Encoding.Default))
            {
                readContents = streamReader.ReadToEnd();
            }
            web.LoadHtml(readContents);
            //Similar to aliases, if chapters definition exists, load it. Otherwise, attempt to build it from the book
            string chapterFile = Environment.CurrentDirectory + @"\ext\" + asin + ".chapters";
            if (File.Exists(chapterFile) && !Properties.Settings.Default.overwriteChapters)
            {
                if (LoadChapters())
                    main.Log(String.Format("Chapters read from {0}.\r\nDelete this file if you want chapters built automatically.", chapterFile));
                else
                    main.Log(String.Format("Failed to read chapters from {0}.\r\nFile is missing or not formatted correctly.", chapterFile));
            }
            else
            {
                try
                {
                    SearchChapters(web, readContents);
                }
                catch (Exception ex)
                {
                    Console.WriteLine(ex.Message); //Just ignore errors
                }
                //Built chapters list is saved for manual editing
                if (_chapters.Count > 0)
                {
                    SaveChapters();
                    main.Log(String.Format("Chapters exported to {0} for manual editing.", chapterFile));
                }
                else
                    main.Log(
                        String.Format(
                            "No chapters detected.\r\nYou can create a file at {0} if you want to define chapters manually.",
                            chapterFile));
            }

            if (enableEdit)
                if (DialogResult.Yes ==
                    MessageBox.Show("Would you like to open the chapters file in notepad for editing?", "Chapters",
                        MessageBoxButtons.YesNo, MessageBoxIcon.Question, MessageBoxDefaultButton.Button2))
                {
                    Functions.RunNotepad(chapterFile);
                    _chapters.Clear();
                    if (LoadChapters())
                        main.Log("Reloaded chapters from edited file.");
                    else
                        main.Log(
                            String.Format(
                                "Failed to reload chapters from {0}.\r\nFile is missing or not formatted correctly.",
                                chapterFile));
                }

            //If no chapters were found, add a default chapter that spans the entire book
            //Define srl and erl so "progress bar" shows up correctly
            if (_chapters.Count == 0)
            {
                long len = (new FileInfo(rawMl)).Length;
                _chapters.Add(new Chapter("", 1, len));
                _srl = 1;
                _erl = len;
            }
            else
            {
                //Run through all chapters and take the highest value, in case some chapters can be defined in individual chapters and parts.
                //EG. Part 1 includes chapters 1-6, Part 2 includes chapters 7-12.
                _srl = _chapters[0].start;
                main.Log("Found chapters:");
                foreach (Chapter c in _chapters)
                {
                    if (c.End > _erl) _erl = c.End;
                    main.Log(String.Format("{0} | start: {1} | end: {2}", c.name, c.start, c.End));
                }
            }

            main.Log("Scanning book content...");
            System.Diagnostics.Stopwatch timer = new System.Diagnostics.Stopwatch();
            timer.Start();
            //Iterate over all paragraphs in book
            HtmlNodeCollection nodes = web.DocumentNode.SelectNodes("//p");
            if (nodes == null)
                nodes = web.DocumentNode.SelectNodes("//div[@class='paragraph']");
            if (nodes == null)
                nodes = web.DocumentNode.SelectNodes("//div[@class='p-indent']");
            if (nodes == null)
            {
                nodes = web.DocumentNode.SelectNodes("//div");
                main.Log("Warning: Could not locate paragraphs normally (p elements or divs of class 'paragraph').\r\n" +
                    "Searching all book contents (all divs), which may produce odd results.");
            }
            if (nodes == null)
                throw new Exception("Could not locate any paragraphs in this book.\r\n" +
                    "Report this error along with a copy of the book to improve parsing.");
            main.prgBar.Maximum = nodes.Count;
            for (int i = 0; i < nodes.Count; i++)
            {
                if (main.Exiting) return 1;
                main.prgBar.Value = (i + 1);
                if (((i + 1)%5) == 0) Application.DoEvents();

                HtmlNode node = nodes[i];
                if (node.FirstChild == null) continue; //If the inner HTML is just empty, skip the paragraph!
                int lenQuote = node.InnerHtml.Length;
                int location = node.FirstChild.StreamPosition;
                if (location < 0)
                {
                    main.Log("There was an error locating the paragraph within the book content.");
                    return 1;
                }
                if (location < _srl || location > _erl) continue; //Skip paragraph if outside chapter range
                string noSoftHypen = "";
                if (ignoreSoftHypen)
                {
                    noSoftHypen = node.InnerText;
                    noSoftHypen = noSoftHypen.Replace("\u00C2\u00AD", "");
                    noSoftHypen = noSoftHypen.Replace("&shy;", "");
                    noSoftHypen = noSoftHypen.Replace("&#xad;", "");
                    noSoftHypen = noSoftHypen.Replace("&#173;", "");
                    noSoftHypen = noSoftHypen.Replace("&#0173;", "");
                }
                foreach (Term character in Terms)
                {
                    //Search for character name and aliases in the html-less text. If failed, try in the HTML for rare situations.
                    //TODO: Improve location searching as IndexOf will not work if book length exceeds 2,147,483,647...
                    //If soft hyphen ignoring is turned on, also search hyphen-less text.
                    if (!character.Match) continue;
                    bool termFound = false;
                    List<string> search = character.Aliases.ToList<string>();
                    if (character.RegEx)
                    {
                        if (search.Any(r => Regex.Match(node.InnerText, r).Success)
                            || search.Any(r => Regex.Match(node.InnerHtml, r).Success)
                            || (ignoreSoftHypen && (search.Any(r => Regex.Match(noSoftHypen, r).Success) || search.Any(r => Regex.Match(noSoftHypen, r).Success))))
                            termFound = true;
                    }
                    else
                    {
                        search.Insert(0, character.TermName);
                        if ((character.MatchCase && (search.Any(node.InnerText.Contains) || search.Any(node.InnerHtml.Contains)))
                            || (!character.MatchCase && (search.Any(node.InnerText.ContainsIgnorecase) || search.Any(node.InnerHtml.ContainsIgnorecase)))
                            || (ignoreSoftHypen && (character.MatchCase && search.Any(noSoftHypen.Contains))
                                || (!character.MatchCase && search.Any(noSoftHypen.ContainsIgnorecase))))
                            termFound = true;
                    }
                    if (termFound)
                    {
                        List<int> locHighlight = new List<int>();
                        List<int> lenHighlight = new List<int>();
                        string punctuationMarks = @"\S*[!\.?,""'\);]*";
                        //Search html for the matching term out of all aliases
                        foreach (string s in search)
                        {
                            MatchCollection matches = Regex.Matches(node.InnerHtml, s + punctuationMarks, character.MatchCase || character.RegEx ? RegexOptions.None : RegexOptions.IgnoreCase);
                            foreach (Match match in matches)
                            {
                                if (match.Groups.Count > 1)
                                {
                                    locHighlight.Add(match.Groups[1].Index);
                                    lenHighlight.Add(match.Groups[1].Length);
                                }
                                else
                                {
                                    locHighlight.Add(match.Index);
                                    lenHighlight.Add(match.Length);
                                }
                            }
                        }
                        //If normal search fails, use regexp to search in case there is some wacky html nested in term
                        //Regexp may be less than ideal for parsing HTML but seems to work ok so far in these small paragraphs
                        //Also search in soft hyphen-less text if option is set to do so
                        if (locHighlight.Count == 0)
                        {
                            foreach (string s in search)
                            {
                                List<string> patterns = new List<string>();
                                string pattern;
                                string patternHTML = "(?:<[^>]*>)*";
                                //Match HTML tags -- provided there's nothing malformed
                                string patternSoftHypen = "(\u00C2\u00AD|&shy;|&#173;|&#xad;|&#0173;|&#x00AD;)*";
                                pattern = String.Format("{0}{1}{0}{2}", patternHTML,
                                    string.Join(patternHTML + patternSoftHypen, character.RegEx ? s.ToCharArray() : Regex.Unescape(s).ToCharArray()), punctuationMarks);
                                if (character.MatchCase)
                                    pattern += "(?=[^a-zA-Z])";
                                patterns.Add(pattern);
                                foreach (string pat in patterns)
                                {
                                    MatchCollection matches;
                                    if (character.MatchCase || character.RegEx)
                                        matches = Regex.Matches(node.InnerHtml, pat);
                                    else
                                        matches = Regex.Matches(node.InnerHtml, pat, RegexOptions.IgnoreCase);
                                    foreach (Match match in matches)
                                    {
                                        locHighlight.Add(match.Index);
                                        lenHighlight.Add(match.Length);
                                    }
                                }
                            }
                        }
                        if (locHighlight.Count == 0 || locHighlight.Count != lenHighlight.Count) //something went wrong
                        {
                            main.Log(
                                String.Format(
                                    "Something went wrong while searching for start of highlight.\nWas looking for (or one of the aliases of): {0}\nSearching in: {1}",
                                    character.TermName, node.InnerHtml));
                            continue;
                        }

                        //If an excerpt is too long, the X-Ray reader cuts it off.
                        //If the location of the highlighted word (character name) within the excerpt is far enough in to get cut off,
                        //this section attempts to shorted the excerpt by locating the start of a sentence that is just far enough away from the highlight.
                        //The length is determined by the space the excerpt takes up rather than its actual length... so 135 is just a guess based on what I've seen.
                        int lengthLimit = 135;
                        if (shortEx && locHighlight[0] + lenHighlight[0] > lengthLimit)
                        {
                            int start = locHighlight[0];
                            int at = 0;
                            long newLoc = -1;
                            int newLenQuote = 0;
                            int newLocHighlight = 0;

                            while ((start > -1) && (at > -1))
                            {
                                at = node.InnerHtml.LastIndexOfAny(new char[] { '.', '?', '!' }, start);
                                if (at > -1)
                                {
                                    start = at - 1;

                                    if ((locHighlight[0] + lenHighlight[0] + 1 - at - 2) <= lengthLimit)
                                    {
                                        newLoc = location + at + 2;
                                        newLenQuote = lenQuote - at - 2;
                                        newLocHighlight = locHighlight[0] - at - 2;
                                        string newQuote = node.InnerHtml.Substring(at + 2);
                                    }
                                    else break;
                                }
                                else break;
                            }
                            //Only add new locs if shorter excerpt was found
                            if (newLoc >= 0)
                            {
                                character.Locs.Add(String.Format("[{0},{1},{2},{3}]", newLoc + locOffset, newLenQuote,
                                    newLocHighlight, lenHighlight));
                                continue;
                            }
                        }

                        for (int j = 0; j < locHighlight.Count; j++)
                        {
                            character.Locs.Add(String.Format("[{0},{1},{2},{3}]", location + locOffset, lenQuote,
                                locHighlight[j], lenHighlight[j])); // For old format
                            character.Occurrences.Add(new int[] { location + locOffset + locHighlight[j], lenHighlight[j] }); // For new format
                        }
                        List<Excerpt> exCheck = excerpts.Where(t => t.start.Equals(location + locOffset)).ToList();
                        if (exCheck.Count > 0)
                        {
                            if (!exCheck[0].related_entities.Contains(character.Id))
                                exCheck[0].related_entities.Add(character.Id);
                        }
                        else
                        {
                            Excerpt newExcerpt = new Excerpt(excerptId++, location + locOffset, lenQuote);
                            newExcerpt.related_entities.Add(character.Id);
                            excerpts.Add(newExcerpt);
                        }
                    }
                }
            }

            // Attempt to match any quotes from Shelfari for Notable Clips, not worried if no matches occur as they will be added later anyway
            if (Properties.Settings.Default.useNewVersion)
            {
                foreach (string[] quote in notableShelfariQuotes)
                {
                    int index = readContents.IndexOf(quote[0]);
                    if (index > -1)
                    {
                        Excerpt excerpt = excerpts.FirstOrDefault(e => e.start == index);
                        if (excerpt == null)
                        {
                            excerpt = new Excerpt(excerptId++, index, quote[0].Length);
                            if (quote[1] != "")
                            {
                                Term foundterm = Terms.FirstOrDefault(t => t.TermName == quote[1]);
                                if (foundterm != null)
                                    excerpt.related_entities.Add(foundterm.Id);
                            }
                            excerpts.Add(excerpt);
                        }
                        foundNotables++;
                        excerpt.related_entities.Add(0);
                    }
                }
            }

            timer.Stop();
            main.Log("Scan time: " + timer.Elapsed);
            //output list of terms with no locs
            foreach (Term t in Terms)
            {
                if (t.Match && t.Locs.Count == 0)
                    main.Log(
                        String.Format(
                            "No locations were found for the term \"{0}\".\r\nYou should add aliases for this term using the book or rawml as a reference.",
                            t.TermName));
            }
            return 0;
        }
示例#26
0
        public void AddLocations(XRay xray,
                                 KfxContainer kfx,
                                 bool skipNoLikes,
                                 int minClipLen,
                                 IProgressBar progress,
                                 CancellationToken token)
        {
            _logger.Log("Scanning book content...");
            var contentChunks = kfx.GetContentChunks();

            // Set start and end of content
            // TODO Figure out how to identify the first *actual* bit of content after the TOC
            var last = contentChunks.Last();

            xray.Srl = 1;
            xray.Erl = last.Pid + last.Length - 1;

            var offset    = 0;
            var excerptId = 0;

            progress?.Set(0, contentChunks.Count);
            foreach (var contentChunk in contentChunks)
            {
                token.ThrowIfCancellationRequested();

                if (contentChunk.ContentText != null)
                {
                    foreach (var character in xray.Terms.Where(term => term.Match))
                    {
                        // If the aliases are not supposed to be in regex format, escape them
                        var aliases = character.RegexAliases
                            ? character.Aliases
                            : character.Aliases.Select(Regex.Escape);

                        var searchList = new[] { character.TermName }.Concat(aliases).ToArray();

                        //Search content for character name and aliases, respecting the case setting
                        var regexOptions = character.MatchCase || character.RegexAliases
                            ? RegexOptions.None
                            : RegexOptions.IgnoreCase;

                        var currentOffset = offset;
                        var highlights    = searchList
                                            .Select(search => Regex.Matches(contentChunk.ContentText, $@"{Quotes}?\b{search}{_punctuationMarks}", regexOptions))
                                            .SelectMany(matches => matches.Cast <Match>())
                                            .ToLookup(match => currentOffset + match.Index, match => match.Length);

                        if (highlights.Count == 0)
                        {
                            continue;
                        }

                        var highlightOccurrences = highlights.SelectMany(highlightGroup => highlightGroup.Select(highlight => new[] { highlightGroup.Key, highlight }));
                        character.Occurrences.AddRange(highlightOccurrences);

                        // Check excerpts
                        var exCheck = xray.Excerpts.Where(t => t.Start.Equals(offset)).ToArray();
                        if (exCheck.Length > 0)
                        {
                            if (!exCheck[0].RelatedEntities.Contains(character.Id))
                            {
                                exCheck[0].RelatedEntities.Add(character.Id);
                            }
                        }
                        else
                        {
                            var newExcerpt = new Excerpt
                            {
                                Id     = excerptId++,
                                Start  = offset,
                                Length = contentChunk.Length
                            };
                            newExcerpt.RelatedEntities.Add(character.Id);
                            xray.Excerpts.Add(newExcerpt);
                        }
                    }

                    // Attempt to match downloaded notable clips, not worried if no matches occur as some will be added later anyway
                    if (xray.NotableClips != null)
                    {
                        foreach (var quote in xray.NotableClips)
                        {
                            var index = contentChunk.ContentText.IndexOf(quote.Text, StringComparison.Ordinal);
                            if (index <= -1)
                            {
                                continue;
                            }

                            // See if an excerpt already exists at this location
                            var excerpt = xray.Excerpts.FirstOrDefault(e => e.Start == index);
                            if (excerpt == null)
                            {
                                if (skipNoLikes && quote.Likes == 0 ||
                                    quote.Text.Length < minClipLen)
                                {
                                    continue;
                                }
                                excerpt = new Excerpt
                                {
                                    Id         = excerptId++,
                                    Start      = offset,
                                    Length     = contentChunk.Length,
                                    Notable    = true,
                                    Highlights = quote.Likes
                                };
                                excerpt.RelatedEntities.Add(0); // Mark the excerpt as notable
                                // TODO: also add other related entities
                                xray.Excerpts.Add(excerpt);
                            }
                            else
                            {
                                excerpt.RelatedEntities.Add(0);
                            }

                            xray.FoundNotables++;
                        }
                    }

                    progress?.Add(1);
                }

                offset += contentChunk.Length;
            }

            var missingOccurrences = xray.Terms
                                     .Where(term => term.Match && term.Occurrences.Count == 0)
                                     .Select(term => term.TermName)
                                     .ToArray();

            if (!missingOccurrences.Any())
            {
                return;
            }

            var termList = string.Join(", ", missingOccurrences);

            _logger.Log($"\r\nNo locations were found for the following terms. You should add aliases for them using the book as a reference:\r\n{termList}\r\n");
        }
示例#27
0
 public static void ToString(Excerpt obj, MethodReturnEventArgs<string> e)
 {
     e.Result = (obj.Text ?? string.Empty).MaxLength(100);
 }