Exemple #1
0
        public void AddLocations(XRay xray,
                                 KfxContainer kfx,
                                 bool skipNoLikes,
                                 int minClipLen,
                                 IProgressBar progress,
                                 CancellationToken token)
        {
            _logger.Log("Scanning book content...");

            var paragraphs = _paragraphsService.GetParagraphs(kfx).ToArray();

            // Set start and end of content
            // TODO Figure out how to identify the first *actual* bit of content after the TOC
            var last = paragraphs.Last();

            xray.Srl = 1;
            xray.Erl = last.Location + last.Length - 1;

            progress?.Set(0, paragraphs.Length);
            foreach (var paragraph in paragraphs)
            {
                token.ThrowIfCancellationRequested();

                foreach (var character in xray.Terms.Where(term => term.Match))
                {
                    var occurrences = _termsService.FindOccurrences(kfx, character, paragraph);
                    if (!occurrences.Any())
                    {
                        continue;
                    }

                    character.Occurrences.UnionWith(occurrences);

                    ExcerptHelper.EnhanceOrAddExcerpts(xray.Excerpts, character.Id, new IndexLength(paragraph.Location, paragraph.Length));
                }

                // Attempt to match downloaded notable clips, not worried if no matches occur as some will be added later anyway
                if (xray.NotableClips != null)
                {
                    ExcerptHelper.ProcessNotablesForParagraph(paragraph.ContentText, paragraph.Location, xray.NotableClips, xray.Excerpts, skipNoLikes, minClipLen);
                }

                progress?.Add(1);
            }

            var missingOccurrences = xray.Terms
                                     .Where(term => term.Match && term.Occurrences.Count == 0)
                                     .Select(term => term.TermName)
                                     .ToArray();

            if (!missingOccurrences.Any())
            {
                return;
            }

            var termList = string.Join(", ", missingOccurrences);

            _logger.Log($"\r\nNo locations were found for the following terms. You should add aliases for them using the book as a reference:\r\n{termList}\r\n");
        }
Exemple #2
0
        public void DefaultTocTest(string kfxFile, int tocLength)
        {
            var fs  = new FileStream(kfxFile, FileMode.Open, FileAccess.Read);
            var kfx = new KfxContainer(fs);
            var toc = kfx.GetDefaultToc();

            Assert.NotNull(toc);
            Assert.AreEqual(tocLength, toc.Count);
        }
Exemple #3
0
        public void GetPageCountTest(string kfxFile, int pages)
        {
            var fs        = new FileStream(kfxFile, FileMode.Open, FileAccess.Read);
            var kfx       = new KfxContainer(fs);
            var pageCount = kfx.GetPageCount();

            Assert.NotNull(pageCount);
            Assert.AreEqual(pages, pageCount);
        }
Exemple #4
0
        public void CoverImageTest(string kfxFile, int height, int width)
        {
            var fs         = new FileStream(kfxFile, FileMode.Open, FileAccess.Read);
            var kfx        = new KfxContainer(fs);
            var coverImage = kfx.CoverImage;

            Assert.NotNull(coverImage);
            Assert.AreEqual(height, coverImage.Height);
            Assert.AreEqual(width, coverImage.Width);
        }
Exemple #5
0
        public void ContentTest(string kfxFile, string search, int firstOffset, int lastOffset, int chunkCount, long sum)
        {
            var fs            = new FileStream(kfxFile, FileMode.Open, FileAccess.Read);
            var kfx           = new KfxContainer(fs);
            var contentChunks = kfx.GetContentChunks();
            var testSearch    = FindInChunks(contentChunks, search).ToArray();

            Assert.AreEqual(chunkCount, contentChunks.Count);
            Assert.AreEqual(firstOffset, testSearch.First());
            Assert.AreEqual(lastOffset, testSearch.Last());
            Assert.AreEqual(sum, testSearch.Sum());
        }
Exemple #6
0
        public HashSet <Occurrence> FindOccurrences(IMetadata metadata, Term term, Paragraph paragraph)
        {
            if (!term.Match)
            {
                return(new HashSet <Occurrence>());
            }

            return(metadata switch
            {
                MobiMetadata _ => FindOccurrencesLegacy(term, paragraph),
                KfxContainer _ => FindOccurrences(term, paragraph),
                _ => FindOccurrencesLegacy(term, paragraph)
            });
Exemple #7
0
        public void GetKfxContainerMetadataTest(string kfxFile)
        {
            var fs  = new FileStream(kfxFile, FileMode.Open, FileAccess.Read);
            var kfx = new KfxContainer(fs);

            Assert.AreEqual("B018LJYLS8", kfx.Asin);
            Assert.AreEqual("Shelley, Mary W.", kfx.Author);
            Assert.AreEqual("EBOK", kfx.CdeContentType);
            Assert.AreEqual(4096, kfx.ContainerInfo.ChunkSize);
            Assert.AreEqual(0, kfx.ContainerInfo.CompressionType);
            Assert.AreEqual(YjContainer.ContainerFormat.KfxMain, kfx.ContainerInfo.ContainerFormat);
            Assert.AreEqual("CR!ZLWPJZFVMQ5HGT49NILVDTAKVNRN", kfx.ContainerInfo.ContainerId);
            Assert.AreEqual(0, kfx.ContainerInfo.DrmScheme);
            Assert.AreEqual("CONT", kfx.ContainerInfo.Header.Signature);
            Assert.AreEqual(2, kfx.ContainerInfo.Header.Version);
            Assert.AreEqual("KPR-3.28.1", kfx.ContainerInfo.KfxGenApplicationVersion);
            Assert.AreEqual("kfxlib-20181220", kfx.ContainerInfo.KfxGenPackageVersion);
            Assert.AreEqual("Frankenstein", kfx.Title);
        }
        public static IMetadata Load(string file)
        {
            using var fs = new FileStream(file, FileMode.Open, FileAccess.Read);

            IMetadata metadata;

            switch (Path.GetExtension(file))
            {
            case ".azw3":
            case ".mobi":
                metadata = new Metadata(fs);
                break;

            case ".kfx":
                metadata = new KfxContainer(fs);
                break;

            default:
                throw new NotSupportedException("Unsupported book format");
            }

            return(metadata);
        }
Exemple #9
0
        /// <summary>
        /// Builds an X-Ray file from the parameters given and returns the path at which the file has been saved (or null if something failed)
        /// </summary>
        public async Task <string> BuildAsync([NotNull] Request request, CancellationToken cancellationToken)
        {
            using var metadata = await GetAndValidateMetadataAsync(request.BookPath, cancellationToken);

            if (metadata == null)
            {
                return(null);
            }

            var dataSource = string.IsNullOrEmpty(request.DataUrl) || request.DataUrl == SecondarySourceRoentgen.FakeUrl
                ? _secondaryDataSourceFactory.Get(SecondaryDataSourceFactory.Enum.Roentgen)
                : _secondaryDataSourceFactory.GetInferredSource(request.DataUrl);

            if (dataSource == null)
            {
                _logger.Log("Data source could not be determined from the given path.");
                return(null);
            }

            Core.XRay.XRay xray;
            try
            {
                xray = await _xrayService.CreateXRayAsync(request.DataUrl, metadata.DbName, metadata.UniqueId, metadata.Asin, request.AmazonTld ?? "com", request.IncludeTopics, dataSource, _progress, cancellationToken);

                if (xray.Terms.Count == 0)
                {
                    _logger.Log($"No terms were available on {dataSource.Name}, cancelling the build...");
                    return(null);
                }

                var aliasPath = _directoryService.GetAliasPath(xray.Asin);
                _xrayService.ExportAndDisplayTerms(xray, dataSource, false, request.SplitAliases);

                if (xray.Terms.Any(term => term.Aliases?.Count > 0))
                {
                    _logger.Log("Character aliases read from the XML file.");
                }
                else if (!File.Exists(aliasPath))
                {
                    _logger.Log("Aliases file not found.");
                }
                else
                {
                    _aliasesRepository.LoadAliasesForXRay(xray);
                    _logger.Log($"Character aliases read from {aliasPath}.");
                }

                _logger.Log("Initial X-Ray built, adding locations and chapters...");
                //Expand the X-Ray file from the unpacked mobi
                Task buildTask = metadata switch
                {
                    // ReSharper disable AccessToDisposedClosure
                    MobiMetadata _ => Task.Run(() => _xrayService.ExpandFromRawMl(xray, metadata, metadata.GetRawMlStream(), true, true, 25, true, null, _progress, cancellationToken, true, false), cancellationToken),
                    KfxContainer kfx => Task.Run(() => _kfxXrayService.AddLocations(xray, kfx, true, 25, _progress, cancellationToken), cancellationToken),
                    _ => throw new NotSupportedException()
                };
                await buildTask.ConfigureAwait(false);
            }
            catch (OperationCanceledException)
            {
                _logger.Log("Build canceled.");
                return(null);
            }
            catch (Exception ex)
            {
                _logger.Log($"An error occurred while building the X-Ray:\r\n{ex.Message}\r\n{ex.StackTrace}");
                return(null);
            }

            _logger.Log("Saving X-Ray to file...");
            var xrayPath = _directoryService.GetArtifactPath(ArtifactType.XRay, metadata, Path.GetFileNameWithoutExtension(request.BookPath), true);

            try
            {
                var xrayExporter = _xrayExporterFactory.Get(XRayExporterFactory.Enum.Sqlite);
                xrayExporter.Export(xray, xrayPath, _progress, cancellationToken);
            }
            catch (OperationCanceledException)
            {
                _logger.Log("Building canceled.");
                return(null);
            }
            catch (Exception ex)
            {
                // TODO: Add option to retry maybe?
                _logger.Log($"An error occurred while creating the X-Ray file. Is it opened in another program?\r\n{ex.Message}");
                return(null);
            }

            _logger.Log($"X-Ray file created successfully!\r\nSaved to {xrayPath}");

            return(xrayPath);
        }
Exemple #10
0
        public void AddLocations(XRay xray,
                                 KfxContainer kfx,
                                 bool skipNoLikes,
                                 int minClipLen,
                                 IProgressBar progress,
                                 CancellationToken token)
        {
            _logger.Log("Scanning book content...");
            var contentChunks = kfx.GetContentChunks();

            // Set start and end of content
            // TODO Figure out how to identify the first *actual* bit of content after the TOC
            var last = contentChunks.Last();

            xray.Srl = 1;
            xray.Erl = last.Pid + last.Length - 1;

            var offset    = 0;
            var excerptId = 0;

            progress?.Set(0, contentChunks.Count);
            foreach (var contentChunk in contentChunks)
            {
                token.ThrowIfCancellationRequested();

                if (contentChunk.ContentText != null)
                {
                    foreach (var character in xray.Terms.Where(term => term.Match))
                    {
                        // If the aliases are not supposed to be in regex format, escape them
                        var aliases = character.RegexAliases
                            ? character.Aliases
                            : character.Aliases.Select(Regex.Escape);

                        var searchList = new[] { character.TermName }.Concat(aliases).ToArray();

                        //Search content for character name and aliases, respecting the case setting
                        var regexOptions = character.MatchCase || character.RegexAliases
                            ? RegexOptions.None
                            : RegexOptions.IgnoreCase;

                        var currentOffset = offset;
                        var highlights    = searchList
                                            .Select(search => Regex.Matches(contentChunk.ContentText, $@"{Quotes}?\b{search}{_punctuationMarks}", regexOptions))
                                            .SelectMany(matches => matches.Cast <Match>())
                                            .ToLookup(match => currentOffset + match.Index, match => match.Length);

                        if (highlights.Count == 0)
                        {
                            continue;
                        }

                        var highlightOccurrences = highlights.SelectMany(highlightGroup => highlightGroup.Select(highlight => new[] { highlightGroup.Key, highlight }));
                        character.Occurrences.AddRange(highlightOccurrences);

                        // Check excerpts
                        var exCheck = xray.Excerpts.Where(t => t.Start.Equals(offset)).ToArray();
                        if (exCheck.Length > 0)
                        {
                            if (!exCheck[0].RelatedEntities.Contains(character.Id))
                            {
                                exCheck[0].RelatedEntities.Add(character.Id);
                            }
                        }
                        else
                        {
                            var newExcerpt = new Excerpt
                            {
                                Id     = excerptId++,
                                Start  = offset,
                                Length = contentChunk.Length
                            };
                            newExcerpt.RelatedEntities.Add(character.Id);
                            xray.Excerpts.Add(newExcerpt);
                        }
                    }

                    // Attempt to match downloaded notable clips, not worried if no matches occur as some will be added later anyway
                    if (xray.NotableClips != null)
                    {
                        foreach (var quote in xray.NotableClips)
                        {
                            var index = contentChunk.ContentText.IndexOf(quote.Text, StringComparison.Ordinal);
                            if (index <= -1)
                            {
                                continue;
                            }

                            // See if an excerpt already exists at this location
                            var excerpt = xray.Excerpts.FirstOrDefault(e => e.Start == index);
                            if (excerpt == null)
                            {
                                if (skipNoLikes && quote.Likes == 0 ||
                                    quote.Text.Length < minClipLen)
                                {
                                    continue;
                                }
                                excerpt = new Excerpt
                                {
                                    Id         = excerptId++,
                                    Start      = offset,
                                    Length     = contentChunk.Length,
                                    Notable    = true,
                                    Highlights = quote.Likes
                                };
                                excerpt.RelatedEntities.Add(0); // Mark the excerpt as notable
                                // TODO: also add other related entities
                                xray.Excerpts.Add(excerpt);
                            }
                            else
                            {
                                excerpt.RelatedEntities.Add(0);
                            }

                            xray.FoundNotables++;
                        }
                    }

                    progress?.Add(1);
                }

                offset += contentChunk.Length;
            }

            var missingOccurrences = xray.Terms
                                     .Where(term => term.Match && term.Occurrences.Count == 0)
                                     .Select(term => term.TermName)
                                     .ToArray();

            if (!missingOccurrences.Any())
            {
                return;
            }

            var termList = string.Join(", ", missingOccurrences);

            _logger.Log($"\r\nNo locations were found for the following terms. You should add aliases for them using the book as a reference:\r\n{termList}\r\n");
        }