コード例 #1
0
 private void StubGetCharactersForSingleVerse(ICharacterVerseInfo cvInfo, int bookNum, int chapter, int verse, ScrVers versification,
                                              CharacterSpeakingMode[] result, bool includeAlternatesAndRareQuotes = false)
 {
     cvInfo.Stub(x => x.GetCharacters(Arg.Is(bookNum), Arg.Is(chapter),
                                      Arg <IReadOnlyCollection <IVerse> > .Matches(a => a.Single().StartVerse == verse && a.Single().EndVerse == verse), Arg.Is(versification),
                                      Arg.Is(includeAlternatesAndRareQuotes), Arg.Is(false))).Return(new HashSet <CharacterSpeakingMode>(result));
 }
コード例 #2
0
ファイル: QuoteParser.cs プロジェクト: irahopkinson/Glyssen
 public QuoteParser(ICharacterVerseInfo cvInfo, string bookId, IEnumerable <Block> blocks, ScrVers versification = null)
 {
     m_cvInfo        = cvInfo;
     m_bookId        = bookId;
     m_bookNum       = BCVRef.BookToNumber(bookId);
     m_inputBlocks   = blocks;
     m_versification = versification ?? ScrVers.English;
     GetRegExesForSplittingQuotes();
 }
コード例 #3
0
 public CharacterAssigner(ICharacterVerseInfo cvInfo)
 {
     m_cvInfo = cvInfo;
 }
コード例 #4
0
        public static QuoteSystem Guess <T>(ICharacterVerseInfo cvInfo, List <T> bookList, ScrVers versification, out bool certain, BackgroundWorker worker = null) where T : IScrBook
        {
            certain = false;
            var bookCount = bookList.Count;

            if (bookCount == 0)
            {
                ReportProgressComplete(worker);
                return(QuoteSystem.Default);
            }
            var scores = QuoteSystem.UniquelyGuessableSystems.ToDictionary(s => s, s => 0);
            var quotationDashCounts = QuoteSystem.UniquelyGuessableSystems.Where(s => !String.IsNullOrEmpty(s.QuotationDashMarker))
                                      .ToDictionary(s => s, s => 0);
            var viableSystems       = scores.Keys.ToList();
            int totalVersesAnalyzed = 0;
            int totalDialoqueQuoteVersesAnalyzed = 0;
            int maxNonDialogueSamplesPerBook     = BCVRef.LastBook * kMinSample / bookCount;
            int booksProcessed = 0;

            int  bestScore     = 0;
            bool foundEndQuote = false;
            bool foundSecondLevelQuoteCloser = false;

            // ReSharper disable once InconsistentNaming
            int kVerseValue = Math.Min(kStartQuoteValue + kEndQuoteValue, kQuotationDashValue);

            List <string> followingVerses = new List <string>(kMaxFollowingVersesToSearchForEndQuote);

            var stopwatch = new Stopwatch();

            stopwatch.Start();

            // Start with the New Testament because that's where most of the dialogue quotes are, and it makes guessing A LOT faster!
            foreach (var book in bookList.SkipWhile(b => BCVRef.BookToNumber(b.BookId) < 40).Union(bookList.TakeWhile(b => BCVRef.BookToNumber(b.BookId) < 40)))
            {
                if (worker != null)
                {
                    worker.ReportProgress(MathUtilities.Percent(++booksProcessed, bookCount));
                }

                int versesAnalyzedForCurrentBook = 0;
                int prevQuoteChapter             = -1;
                int prevQuoteVerse = -1;

                foreach (var quote in cvInfo.GetAllQuoteInfo(book.BookId).Where(q => q.IsExpected))
                {
                    if (versesAnalyzedForCurrentBook > maxNonDialogueSamplesPerBook && !quote.IsDialogue)
                    {
                        continue;
                    }

                    if (quote.Chapter == prevQuoteChapter && (quote.Verse == prevQuoteVerse || quote.Verse == prevQuoteVerse + 1))
                    {
                        prevQuoteVerse = quote.Verse;
                        continue;
                    }
                    var text = book.GetVerseText(quote.Chapter, quote.Verse);
                    followingVerses.Clear();
                    int maxFollowingVersesToSearch = kMaxFollowingVersesToSearchForEndQuote;
#if SHOWTESTINFO
                    if (quote.IsDialogue)
                    {
                        Debug.WriteLine("Evaluating {0} {1}:{2} - contents (DIALOGUE=TRUE): {3}", book.BookId, quote.Chapter, quote.Verse, text);
                    }
                    else
                    {
                        Debug.WriteLine("Evaluating {0} {1}:{2} - contents: {3}", book.BookId, quote.Chapter, quote.Verse, text);
                    }
#endif
                    foreach (var quoteSystem in viableSystems)
                    {
                        int ichStartQuote = text.IndexOf(quoteSystem.FirstLevel.Open, StringComparison.Ordinal);
                        int i2            = -1;

                        if (quote.IsDialogue && !string.IsNullOrEmpty(quoteSystem.QuotationDashMarker))
                        {
                            int i = text.IndexOf(quoteSystem.QuotationDashMarker, StringComparison.Ordinal);
                            if (i >= 0 && (ichStartQuote < 0 || i < ichStartQuote))
                            {
                                // Found a dialogue quote marker earlier in the text.
                                IncrementScore(scores, quoteSystem, kQuotationDashValue, ref bestScore);
                                quotationDashCounts[quoteSystem]++;
                                continue;
                            }
                        }
                        if (ichStartQuote >= 0 && ichStartQuote < text.Length - 2)
                        {
                            IncrementScore(scores, quoteSystem, kStartQuoteValue, ref bestScore);

                            if (quoteSystem.NormalLevels.Count > 1)
                            {
                                i2 = text.IndexOf(quoteSystem.NormalLevels[1].Open, ichStartQuote + 1, StringComparison.Ordinal);
                                if (i2 > ichStartQuote)
                                {
#if SHOWTESTINFO
                                    Debug.WriteLine("Found 2nd-level opener (" + quoteSystem.NormalLevels[1].Open + ") for system " + quoteSystem);
#endif
                                    IncrementScore(scores, quoteSystem, kStartLevel2QuoteValue, ref bestScore);
                                    if (i2 < text.Length - 2 && text.IndexOf(quoteSystem.NormalLevels[1].Close, i2 + 1, StringComparison.Ordinal) > i2)
                                    {
#if SHOWTESTINFO
                                        Debug.WriteLine("Found 2nd-level closer (" + quoteSystem.NormalLevels[1].Close + ") for system " + quoteSystem);
#endif
                                        foundSecondLevelQuoteCloser = true;
                                        IncrementScore(scores, quoteSystem, kEndLevel2QuoteValue, ref bestScore);
                                    }
                                }
                            }

                            if (text.IndexOf(quoteSystem.FirstLevel.Close, ichStartQuote + 1, StringComparison.Ordinal) > ichStartQuote)
                            {
                                foundEndQuote = true;
                                IncrementScore(scores, quoteSystem, kEndQuoteValue, ref bestScore);
                            }
                            else
                            {
                                for (int i = 1; i <= maxFollowingVersesToSearch; i++)
                                {
                                    if (!cvInfo.GetCharacters(book.BookId, quote.Chapter, quote.Verse + i, versification: versification).Any())
                                    {
                                        break;
                                    }
                                    string followingText;
                                    if (followingVerses.Count >= i)
                                    {
                                        followingText = followingVerses[i - 1];
                                    }
                                    else
                                    {
                                        followingText = book.GetVerseText(quote.Chapter, quote.Verse + i);
                                        followingVerses.Add(followingText);
                                    }
                                    if (i2 >= 0 && followingText.IndexOf(quoteSystem.NormalLevels[1].Close, StringComparison.Ordinal) >= 0)
                                    {
#if SHOWTESTINFO
                                        Debug.WriteLine("Found 2nd-level closer (" + quoteSystem.NormalLevels[1].Close + ") in subsequent verse for system " + quoteSystem);
#endif
                                        foundSecondLevelQuoteCloser = true;
                                        IncrementScore(scores, quoteSystem, kEndLevel2QuoteValue, ref bestScore);
                                    }
                                    if (followingText.IndexOf(quoteSystem.FirstLevel.Close, StringComparison.Ordinal) > 0)
                                    {
                                        foundEndQuote = true;
                                        IncrementScore(scores, quoteSystem, kEndQuoteValue, ref bestScore);
                                        break;
                                    }
                                }
                            }
                            maxFollowingVersesToSearch = followingVerses.Count;
                        }
                    }
                    totalVersesAnalyzed++;
                    if (quote.IsDialogue)
                    {
                        totalDialoqueQuoteVersesAnalyzed++;
                    }
                    versesAnalyzedForCurrentBook++;

                    if (totalVersesAnalyzed >= kMinSample && foundEndQuote &&
                        (totalDialoqueQuoteVersesAnalyzed >= kMinQuotationDashSample ||
                         viableSystems.TrueForAll(s => String.IsNullOrEmpty(s.QuotationDashMarker))) &&
                        (foundSecondLevelQuoteCloser || (totalVersesAnalyzed - totalDialoqueQuoteVersesAnalyzed) >= kMinSampleToAttemptToGetSecondLevel || viableSystems.TrueForAll(s => s.NormalLevels.Count == 1)))
                    {
                        var minViabilityScore = Math.Max(totalVersesAnalyzed * kVerseValue * kMinPercent,
                                                         bestScore * kMaxCompetitorPercent);
                        var competitors = viableSystems.Where(system => scores[system] > minViabilityScore).ToList();

                        if (competitors.Any())
                        {
#if SHOWTESTINFO
                            Debug.WriteLine("STATISTICS:");
                            foreach (var system in competitors)
                            {
                                Debug.WriteLine(system.Name + "(" + system + ")\tScore: " + scores[system]);
                                if (!String.IsNullOrEmpty(system.QuotationDashMarker))
                                {
                                    Debug.WriteLine("\tPercentage matches of total Dialogue quotes analyzed: " +
                                                    (100.0 * quotationDashCounts[system]) / totalDialoqueQuoteVersesAnalyzed);
                                }
                            }
#endif

                            if (competitors.Count == 1)
                            {
                                certain = true;
                                ReportProgressComplete(worker);
                                return(competitors[0]);
                            }

                            viableSystems = viableSystems.Where(competitors.Contains).ToList();
                            if (competitors.TrueForAll(c => c.FirstLevel.Open == competitors[0].FirstLevel.Open &&
                                                       c.FirstLevel.Close == competitors[0].FirstLevel.Close))
                            {
                                var contendersWithQDash        = competitors.Where(c => !String.IsNullOrEmpty(c.QuotationDashMarker)).ToList();
                                var failureThresholdForQDCount = kQuotationDashFailPercent * totalDialoqueQuoteVersesAnalyzed;
                                if (contendersWithQDash.TrueForAll(c => quotationDashCounts[c] < failureThresholdForQDCount))
                                {
#if SHOWTESTINFO
                                    Debug.Write("No systems with QD over minimum threshold (" + failureThresholdForQDCount +
                                                "). Competitors reduced from " + competitors.Count);
#endif
                                    competitors = competitors.Where(c => String.IsNullOrEmpty(c.QuotationDashMarker)).ToList();
#if SHOWTESTINFO
                                    Debug.WriteLine(" to " + competitors.Count);
#endif

                                    // We're probably (unless we reset this to false below) down to either a single contender (in
                                    // which case we can be pretty certain) or two contenders, in which case we can safely use the
                                    // one with multiple levels filled in (since there will be no harm done even if the data only
                                    // has 1st-level quotes).
                                    certain = true;
                                }
                                else
                                {
#if SHOWTESTINFO
                                    Debug.WriteLine("Only considering contenders with QD. Of " + competitors.Count + " competitors, there are " +
                                                    contendersWithQDash.Count + " contenders with QD count over minimum threshold (" +
                                                    failureThresholdForQDCount + ").");
#endif
                                    var minQDCount = kMinQuotationDashPercent * totalDialoqueQuoteVersesAnalyzed;
                                    competitors = contendersWithQDash.Where(c => scores[c] == bestScore &&
                                                                            quotationDashCounts[c] >= minQDCount).ToList();
#if SHOWTESTINFO
                                    switch (competitors.Count)
                                    {
                                    case 0:
                                        Debug.WriteLine("Of those, none had the best score (" + bestScore +
                                                        ") and had a QD count above the minimum (" + minQDCount + ")");
                                        break;

                                    case 1:
                                        Debug.WriteLine("Of those, one had the best score (" + bestScore + ") and had a QD count above the minimum (" +
                                                        minQDCount + ")");
                                        break;

                                    default:
                                        Debug.WriteLine("Of those, " + competitors.Count + " were tied for the best score (" + bestScore +
                                                        ") and had a QD count above the minimum (" + minQDCount + ")");
                                        break;
                                    }
#endif
                                }

                                if (competitors.Any())
                                {
                                    // If there are multiple systems with 2nd and 3rd levels specified, discard those options since
                                    // we didn't find anything in the data to help us choose among the options.
                                    if (competitors.Count(qs => qs.NormalLevels.Count > 1) > 1)
                                    {
                                        var bestSystems = competitors.Where(c => scores[c] == bestScore).ToList();
                                        if (bestSystems.Count == 1)
                                        {
#if SHOWTESTINFO
                                            Debug.Write("Multiple systems with 2nd and 3rd levels specified. Taking system with best score: " + bestSystems[0]);
#endif
                                            competitors = bestSystems;
                                        }
                                        else
                                        {
#if SHOWTESTINFO
                                            Debug.Write("Multiple systems with 2nd and 3rd levels specified. Competitors reduced from " +
                                                        competitors.Count);
#endif
                                            competitors = competitors.Where(qs => qs.NormalLevels.Count == 1).ToList();
#if SHOWTESTINFO
                                            Debug.WriteLine(" to " + competitors.Count);
#endif

                                            certain = false;
                                        }
                                    }

                                    if (competitors.Any())
                                    {
                                        ReportProgressComplete(worker);

                                        if (competitors.Count == 1)
                                        {
                                            return(competitors[0]);
                                        }
#if SHOWTESTINFO
                                        Debug.WriteLine("SURVIVORS:");
                                        foreach (var system in competitors)
                                        {
                                            Debug.WriteLine(system.Name + "(" + system + ")\tScore: " + scores[system]);
                                        }
#endif
                                        return(competitors.FirstOrDefault(qs => qs.NormalLevels.Count > 1) ?? competitors.First());
                                    }
                                }
                            }
                            // Still have multiple systems in contention with different first-level start & end markers;
                            // we haven't seen enough evidence to pick a clear winner.
                        }
#if SHOWTESTINFO
                        else
                        {
                            Debug.WriteLine("NO COMPETITORS. Total verses analyzed = " + totalVersesAnalyzed + ". Best Score = " + bestScore +
                                            ". Minimum viability score = " + minViabilityScore);
                        }
#endif
                    }

                    if (stopwatch.ElapsedMilliseconds > kMaxTimeLimit)
                    {
#if SHOWTESTINFO
                        Debug.WriteLine("Time-out guessing quote system.");
#endif
                        ReportProgressComplete(worker);
                        return(BestGuess(viableSystems, scores, bestScore, foundEndQuote));
                    }

                    prevQuoteChapter = quote.Chapter;
                    prevQuoteVerse   = quote.Verse;
                }
            }
            ReportProgressComplete(worker);
            return(BestGuess(viableSystems, scores, bestScore, foundEndQuote));
        }
コード例 #5
0
 private void StubGetCharactersForSingleVerse(ICharacterVerseInfo cvInfo, int bookNum, int chapter, int verse, ScrVers versification,
                                              string singleCharacterToReturn)
 {
     StubGetCharactersForSingleVerse(cvInfo, bookNum, chapter, verse, versification,
                                     new[] { new CharacterSpeakingMode(singleCharacterToReturn, null, null, false) });
 }
コード例 #6
0
ファイル: Block.cs プロジェクト: irahopkinson/Glyssen
 public CharacterVerse GetMatchingCharacter(ICharacterVerseInfo cvInfo, int bookNumber, Paratext.ScrVers scrVers)
 {
     return(cvInfo.GetCharacters(bookNumber, ChapterNumber, InitialStartVerseNumber,
                                 InitialEndVerseNumber, versification: scrVers).FirstOrDefault(c => c.Character == CharacterId));
 }