public void AddLocations(XRay xray, KfxContainer kfx, bool skipNoLikes, int minClipLen, IProgressBar progress, CancellationToken token) { _logger.Log("Scanning book content..."); var paragraphs = _paragraphsService.GetParagraphs(kfx).ToArray(); // Set start and end of content // TODO Figure out how to identify the first *actual* bit of content after the TOC var last = paragraphs.Last(); xray.Srl = 1; xray.Erl = last.Location + last.Length - 1; progress?.Set(0, paragraphs.Length); foreach (var paragraph in paragraphs) { token.ThrowIfCancellationRequested(); foreach (var character in xray.Terms.Where(term => term.Match)) { var occurrences = _termsService.FindOccurrences(kfx, character, paragraph); if (!occurrences.Any()) { continue; } character.Occurrences.UnionWith(occurrences); ExcerptHelper.EnhanceOrAddExcerpts(xray.Excerpts, character.Id, new IndexLength(paragraph.Location, paragraph.Length)); } // Attempt to match downloaded notable clips, not worried if no matches occur as some will be added later anyway if (xray.NotableClips != null) { ExcerptHelper.ProcessNotablesForParagraph(paragraph.ContentText, paragraph.Location, xray.NotableClips, xray.Excerpts, skipNoLikes, minClipLen); } progress?.Add(1); } var missingOccurrences = xray.Terms .Where(term => term.Match && term.Occurrences.Count == 0) .Select(term => term.TermName) .ToArray(); if (!missingOccurrences.Any()) { return; } var termList = string.Join(", ", missingOccurrences); _logger.Log($"\r\nNo locations were found for the following terms. You should add aliases for them using the book as a reference:\r\n{termList}\r\n"); }
public void DefaultTocTest(string kfxFile, int tocLength) { var fs = new FileStream(kfxFile, FileMode.Open, FileAccess.Read); var kfx = new KfxContainer(fs); var toc = kfx.GetDefaultToc(); Assert.NotNull(toc); Assert.AreEqual(tocLength, toc.Count); }
public void GetPageCountTest(string kfxFile, int pages) { var fs = new FileStream(kfxFile, FileMode.Open, FileAccess.Read); var kfx = new KfxContainer(fs); var pageCount = kfx.GetPageCount(); Assert.NotNull(pageCount); Assert.AreEqual(pages, pageCount); }
public void CoverImageTest(string kfxFile, int height, int width) { var fs = new FileStream(kfxFile, FileMode.Open, FileAccess.Read); var kfx = new KfxContainer(fs); var coverImage = kfx.CoverImage; Assert.NotNull(coverImage); Assert.AreEqual(height, coverImage.Height); Assert.AreEqual(width, coverImage.Width); }
public void ContentTest(string kfxFile, string search, int firstOffset, int lastOffset, int chunkCount, long sum) { var fs = new FileStream(kfxFile, FileMode.Open, FileAccess.Read); var kfx = new KfxContainer(fs); var contentChunks = kfx.GetContentChunks(); var testSearch = FindInChunks(contentChunks, search).ToArray(); Assert.AreEqual(chunkCount, contentChunks.Count); Assert.AreEqual(firstOffset, testSearch.First()); Assert.AreEqual(lastOffset, testSearch.Last()); Assert.AreEqual(sum, testSearch.Sum()); }
public HashSet <Occurrence> FindOccurrences(IMetadata metadata, Term term, Paragraph paragraph) { if (!term.Match) { return(new HashSet <Occurrence>()); } return(metadata switch { MobiMetadata _ => FindOccurrencesLegacy(term, paragraph), KfxContainer _ => FindOccurrences(term, paragraph), _ => FindOccurrencesLegacy(term, paragraph) });
public void GetKfxContainerMetadataTest(string kfxFile) { var fs = new FileStream(kfxFile, FileMode.Open, FileAccess.Read); var kfx = new KfxContainer(fs); Assert.AreEqual("B018LJYLS8", kfx.Asin); Assert.AreEqual("Shelley, Mary W.", kfx.Author); Assert.AreEqual("EBOK", kfx.CdeContentType); Assert.AreEqual(4096, kfx.ContainerInfo.ChunkSize); Assert.AreEqual(0, kfx.ContainerInfo.CompressionType); Assert.AreEqual(YjContainer.ContainerFormat.KfxMain, kfx.ContainerInfo.ContainerFormat); Assert.AreEqual("CR!ZLWPJZFVMQ5HGT49NILVDTAKVNRN", kfx.ContainerInfo.ContainerId); Assert.AreEqual(0, kfx.ContainerInfo.DrmScheme); Assert.AreEqual("CONT", kfx.ContainerInfo.Header.Signature); Assert.AreEqual(2, kfx.ContainerInfo.Header.Version); Assert.AreEqual("KPR-3.28.1", kfx.ContainerInfo.KfxGenApplicationVersion); Assert.AreEqual("kfxlib-20181220", kfx.ContainerInfo.KfxGenPackageVersion); Assert.AreEqual("Frankenstein", kfx.Title); }
public static IMetadata Load(string file) { using var fs = new FileStream(file, FileMode.Open, FileAccess.Read); IMetadata metadata; switch (Path.GetExtension(file)) { case ".azw3": case ".mobi": metadata = new Metadata(fs); break; case ".kfx": metadata = new KfxContainer(fs); break; default: throw new NotSupportedException("Unsupported book format"); } return(metadata); }
/// <summary> /// Builds an X-Ray file from the parameters given and returns the path at which the file has been saved (or null if something failed) /// </summary> public async Task <string> BuildAsync([NotNull] Request request, CancellationToken cancellationToken) { using var metadata = await GetAndValidateMetadataAsync(request.BookPath, cancellationToken); if (metadata == null) { return(null); } var dataSource = string.IsNullOrEmpty(request.DataUrl) || request.DataUrl == SecondarySourceRoentgen.FakeUrl ? _secondaryDataSourceFactory.Get(SecondaryDataSourceFactory.Enum.Roentgen) : _secondaryDataSourceFactory.GetInferredSource(request.DataUrl); if (dataSource == null) { _logger.Log("Data source could not be determined from the given path."); return(null); } Core.XRay.XRay xray; try { xray = await _xrayService.CreateXRayAsync(request.DataUrl, metadata.DbName, metadata.UniqueId, metadata.Asin, request.AmazonTld ?? "com", request.IncludeTopics, dataSource, _progress, cancellationToken); if (xray.Terms.Count == 0) { _logger.Log($"No terms were available on {dataSource.Name}, cancelling the build..."); return(null); } var aliasPath = _directoryService.GetAliasPath(xray.Asin); _xrayService.ExportAndDisplayTerms(xray, dataSource, false, request.SplitAliases); if (xray.Terms.Any(term => term.Aliases?.Count > 0)) { _logger.Log("Character aliases read from the XML file."); } else if (!File.Exists(aliasPath)) { _logger.Log("Aliases file not found."); } else { _aliasesRepository.LoadAliasesForXRay(xray); _logger.Log($"Character aliases read from {aliasPath}."); } _logger.Log("Initial X-Ray built, adding locations and chapters..."); //Expand the X-Ray file from the unpacked mobi Task buildTask = metadata switch { // ReSharper disable AccessToDisposedClosure MobiMetadata _ => Task.Run(() => _xrayService.ExpandFromRawMl(xray, metadata, metadata.GetRawMlStream(), true, true, 25, true, null, _progress, cancellationToken, true, false), cancellationToken), KfxContainer kfx => Task.Run(() => _kfxXrayService.AddLocations(xray, kfx, true, 25, _progress, cancellationToken), cancellationToken), _ => throw new NotSupportedException() }; await buildTask.ConfigureAwait(false); } catch (OperationCanceledException) { _logger.Log("Build canceled."); return(null); } catch (Exception ex) { _logger.Log($"An error occurred while building the X-Ray:\r\n{ex.Message}\r\n{ex.StackTrace}"); return(null); } _logger.Log("Saving X-Ray to file..."); var xrayPath = _directoryService.GetArtifactPath(ArtifactType.XRay, metadata, Path.GetFileNameWithoutExtension(request.BookPath), true); try { var xrayExporter = _xrayExporterFactory.Get(XRayExporterFactory.Enum.Sqlite); xrayExporter.Export(xray, xrayPath, _progress, cancellationToken); } catch (OperationCanceledException) { _logger.Log("Building canceled."); return(null); } catch (Exception ex) { // TODO: Add option to retry maybe? _logger.Log($"An error occurred while creating the X-Ray file. Is it opened in another program?\r\n{ex.Message}"); return(null); } _logger.Log($"X-Ray file created successfully!\r\nSaved to {xrayPath}"); return(xrayPath); }
public void AddLocations(XRay xray, KfxContainer kfx, bool skipNoLikes, int minClipLen, IProgressBar progress, CancellationToken token) { _logger.Log("Scanning book content..."); var contentChunks = kfx.GetContentChunks(); // Set start and end of content // TODO Figure out how to identify the first *actual* bit of content after the TOC var last = contentChunks.Last(); xray.Srl = 1; xray.Erl = last.Pid + last.Length - 1; var offset = 0; var excerptId = 0; progress?.Set(0, contentChunks.Count); foreach (var contentChunk in contentChunks) { token.ThrowIfCancellationRequested(); if (contentChunk.ContentText != null) { foreach (var character in xray.Terms.Where(term => term.Match)) { // If the aliases are not supposed to be in regex format, escape them var aliases = character.RegexAliases ? character.Aliases : character.Aliases.Select(Regex.Escape); var searchList = new[] { character.TermName }.Concat(aliases).ToArray(); //Search content for character name and aliases, respecting the case setting var regexOptions = character.MatchCase || character.RegexAliases ? RegexOptions.None : RegexOptions.IgnoreCase; var currentOffset = offset; var highlights = searchList .Select(search => Regex.Matches(contentChunk.ContentText, $@"{Quotes}?\b{search}{_punctuationMarks}", regexOptions)) .SelectMany(matches => matches.Cast <Match>()) .ToLookup(match => currentOffset + match.Index, match => match.Length); if (highlights.Count == 0) { continue; } var highlightOccurrences = highlights.SelectMany(highlightGroup => highlightGroup.Select(highlight => new[] { highlightGroup.Key, highlight })); character.Occurrences.AddRange(highlightOccurrences); // Check excerpts var exCheck = xray.Excerpts.Where(t => t.Start.Equals(offset)).ToArray(); if (exCheck.Length > 0) { if (!exCheck[0].RelatedEntities.Contains(character.Id)) { exCheck[0].RelatedEntities.Add(character.Id); } } else { var newExcerpt = new Excerpt { Id = excerptId++, Start = offset, Length = contentChunk.Length }; newExcerpt.RelatedEntities.Add(character.Id); xray.Excerpts.Add(newExcerpt); } } // Attempt to match downloaded notable clips, not worried if no matches occur as some will be added later anyway if (xray.NotableClips != null) { foreach (var quote in xray.NotableClips) { var index = contentChunk.ContentText.IndexOf(quote.Text, StringComparison.Ordinal); if (index <= -1) { continue; } // See if an excerpt already exists at this location var excerpt = xray.Excerpts.FirstOrDefault(e => e.Start == index); if (excerpt == null) { if (skipNoLikes && quote.Likes == 0 || quote.Text.Length < minClipLen) { continue; } excerpt = new Excerpt { Id = excerptId++, Start = offset, Length = contentChunk.Length, Notable = true, Highlights = quote.Likes }; excerpt.RelatedEntities.Add(0); // Mark the excerpt as notable // TODO: also add other related entities xray.Excerpts.Add(excerpt); } else { excerpt.RelatedEntities.Add(0); } xray.FoundNotables++; } } progress?.Add(1); } offset += contentChunk.Length; } var missingOccurrences = xray.Terms .Where(term => term.Match && term.Occurrences.Count == 0) .Select(term => term.TermName) .ToArray(); if (!missingOccurrences.Any()) { return; } var termList = string.Join(", ", missingOccurrences); _logger.Log($"\r\nNo locations were found for the following terms. You should add aliases for them using the book as a reference:\r\n{termList}\r\n"); }