Ejemplo n.º 1
0
        public void ContentTest(string kfxFile, string search, int firstOffset, int lastOffset, int chunkCount, long sum)
        {
            var fs            = new FileStream(kfxFile, FileMode.Open, FileAccess.Read);
            var kfx           = new KfxContainer(fs);
            var contentChunks = kfx.GetContentChunks();
            var testSearch    = FindInChunks(contentChunks, search).ToArray();

            Assert.AreEqual(chunkCount, contentChunks.Count);
            Assert.AreEqual(firstOffset, testSearch.First());
            Assert.AreEqual(lastOffset, testSearch.Last());
            Assert.AreEqual(sum, testSearch.Sum());
        }
Ejemplo n.º 2
0
        public void AddLocations(XRay xray,
                                 KfxContainer kfx,
                                 bool skipNoLikes,
                                 int minClipLen,
                                 IProgressBar progress,
                                 CancellationToken token)
        {
            _logger.Log("Scanning book content...");
            var contentChunks = kfx.GetContentChunks();

            // Set start and end of content
            // TODO Figure out how to identify the first *actual* bit of content after the TOC
            var last = contentChunks.Last();

            xray.Srl = 1;
            xray.Erl = last.Pid + last.Length - 1;

            var offset    = 0;
            var excerptId = 0;

            progress?.Set(0, contentChunks.Count);
            foreach (var contentChunk in contentChunks)
            {
                token.ThrowIfCancellationRequested();

                if (contentChunk.ContentText != null)
                {
                    foreach (var character in xray.Terms.Where(term => term.Match))
                    {
                        // If the aliases are not supposed to be in regex format, escape them
                        var aliases = character.RegexAliases
                            ? character.Aliases
                            : character.Aliases.Select(Regex.Escape);

                        var searchList = new[] { character.TermName }.Concat(aliases).ToArray();

                        //Search content for character name and aliases, respecting the case setting
                        var regexOptions = character.MatchCase || character.RegexAliases
                            ? RegexOptions.None
                            : RegexOptions.IgnoreCase;

                        var currentOffset = offset;
                        var highlights    = searchList
                                            .Select(search => Regex.Matches(contentChunk.ContentText, $@"{Quotes}?\b{search}{_punctuationMarks}", regexOptions))
                                            .SelectMany(matches => matches.Cast <Match>())
                                            .ToLookup(match => currentOffset + match.Index, match => match.Length);

                        if (highlights.Count == 0)
                        {
                            continue;
                        }

                        var highlightOccurrences = highlights.SelectMany(highlightGroup => highlightGroup.Select(highlight => new[] { highlightGroup.Key, highlight }));
                        character.Occurrences.AddRange(highlightOccurrences);

                        // Check excerpts
                        var exCheck = xray.Excerpts.Where(t => t.Start.Equals(offset)).ToArray();
                        if (exCheck.Length > 0)
                        {
                            if (!exCheck[0].RelatedEntities.Contains(character.Id))
                            {
                                exCheck[0].RelatedEntities.Add(character.Id);
                            }
                        }
                        else
                        {
                            var newExcerpt = new Excerpt
                            {
                                Id     = excerptId++,
                                Start  = offset,
                                Length = contentChunk.Length
                            };
                            newExcerpt.RelatedEntities.Add(character.Id);
                            xray.Excerpts.Add(newExcerpt);
                        }
                    }

                    // Attempt to match downloaded notable clips, not worried if no matches occur as some will be added later anyway
                    if (xray.NotableClips != null)
                    {
                        foreach (var quote in xray.NotableClips)
                        {
                            var index = contentChunk.ContentText.IndexOf(quote.Text, StringComparison.Ordinal);
                            if (index <= -1)
                            {
                                continue;
                            }

                            // See if an excerpt already exists at this location
                            var excerpt = xray.Excerpts.FirstOrDefault(e => e.Start == index);
                            if (excerpt == null)
                            {
                                if (skipNoLikes && quote.Likes == 0 ||
                                    quote.Text.Length < minClipLen)
                                {
                                    continue;
                                }
                                excerpt = new Excerpt
                                {
                                    Id         = excerptId++,
                                    Start      = offset,
                                    Length     = contentChunk.Length,
                                    Notable    = true,
                                    Highlights = quote.Likes
                                };
                                excerpt.RelatedEntities.Add(0); // Mark the excerpt as notable
                                // TODO: also add other related entities
                                xray.Excerpts.Add(excerpt);
                            }
                            else
                            {
                                excerpt.RelatedEntities.Add(0);
                            }

                            xray.FoundNotables++;
                        }
                    }

                    progress?.Add(1);
                }

                offset += contentChunk.Length;
            }

            var missingOccurrences = xray.Terms
                                     .Where(term => term.Match && term.Occurrences.Count == 0)
                                     .Select(term => term.TermName)
                                     .ToArray();

            if (!missingOccurrences.Any())
            {
                return;
            }

            var termList = string.Join(", ", missingOccurrences);

            _logger.Log($"\r\nNo locations were found for the following terms. You should add aliases for them using the book as a reference:\r\n{termList}\r\n");
        }