/// <summary> /// This guess factors in the placement of an occurrence in its segment for making other /// decisions like matching lowercase alternatives for sentence initial occurrences. /// </summary> public IAnalysis GetBestGuess(AnalysisOccurrence occurrence) { // first see if we can make a guess based on the lowercase form of a sentence initial (non-lowercase) wordform // TODO: make it look for the first word in the sentence...may not be at Index 0! if (occurrence.Analysis is IWfiWordform && occurrence.Index == 0) { ITsString tssWfBaseline = occurrence.BaselineText; CoreWritingSystemDefinition ws = Cache.ServiceLocator.WritingSystemManager.Get(tssWfBaseline.get_WritingSystemAt(0)); string sLower = UnicodeString.ToLower(tssWfBaseline.Text, ws.IcuLocale); // don't bother looking up the lowercased wordform if the instanceOf is already in lowercase form. if (sLower != tssWfBaseline.Text) { ITsString tssLower = TsStringUtils.MakeString(sLower, TsStringUtils.GetWsAtOffset(tssWfBaseline, 0)); IWfiWordform lowercaseWf; if (Cache.ServiceLocator.GetInstance <IWfiWordformRepository>().TryGetObject(tssLower, out lowercaseWf)) { IAnalysis bestGuess; if (TryGetBestGuess(lowercaseWf, occurrence.BaselineWs, out bestGuess)) { return(bestGuess); } } } } if (occurrence.BaselineWs == -1) { return(null); // happens with empty translation lines } return(GetBestGuess(occurrence.Analysis, occurrence.BaselineWs)); }
/// <summary> /// Returns a sequence of IAnalysis objects inclusive of the current occurrence /// and the end occurrence (otherAC). Parameter occurrence must occur AFTER 'this' /// occurrence. /// </summary> /// <param name="point2">An AnalysisOccurrence</param> /// <returns></returns> public IEnumerable <IAnalysis> GetAdvancingOccurrencesInclusiveOf(AnalysisOccurrence point2) { if (!point2.IsValid) { throw new ArgumentException("Invalid Analysis Occurrence"); } if (Segment == point2.Segment) { // Case 1: Two points in same ISegment object if (Index > point2.Index) { throw new ArgumentException("Second AnalysisOccurrence is before the first!"); } // Copy out sub-array of Analyses between the two ACs indices return(Segment.AnalysesRS.ToList().GetRange(Index, point2.Index - Index + 1)); } if (Segment.Owner == point2.Segment.Owner) { // Case 2: Two points in different ISegment objects, but in same StTxtPara if (Segment.IndexInOwner > point2.Segment.IndexInOwner) { throw new ArgumentException("Second AnalysisOccurrence is before the first!"); } // Need to copy out end of first segment, any segments in between and beginning of second segment return(CopyOutAnalysesFromMultipleSegments(point2)); } var para1 = Segment.Owner as IStTxtPara; var para2 = point2.Segment.Owner as IStTxtPara; if (!(para1.Owner as IStText).ParagraphsOS.Contains(para2)) { throw new ArgumentOutOfRangeException("point2", "AnalysisOccurrences are not within the same Text!"); } throw new NotImplementedException("So far we only handle this in same Segment or Paragraph."); }
/// <summary> /// Copy out IAnalysis objects from the end of this ISegment through the beginning of /// parameter's ISegment (inclusive). Also copies objects from intervening ISegments. /// Assumes parameter is in the same StTxtPara! /// </summary> /// <param name="point2"></param> /// <returns></returns> private IEnumerable <IAnalysis> CopyOutAnalysesFromMultipleSegments(AnalysisOccurrence point2) { // Need to copy out end of first segment, any segments in between and beginning of second segment var result = new List <IAnalysis>(); var paraSegs = (Segment.Owner as IStTxtPara).SegmentsOS; if (paraSegs == null) { throw new NullReferenceException("Unexpected error!"); } for (var i = Segment.IndexInOwner; i <= point2.Segment.IndexInOwner; i++) { if (i == Segment.IndexInOwner) { // Copy out end of this segment result.AddRange(paraSegs[i].AnalysesRS.ToList().GetRange(Index, Segment.AnalysesRS.Count - Index)); continue; } if (i == point2.Segment.IndexInOwner) { // Copy out beginning of this segment result.AddRange(paraSegs[i].AnalysesRS.ToList().GetRange(0, point2.Index + 1)); continue; } // Copy out all of this segment result.AddRange(paraSegs[i].AnalysesRS); } return(result); }
/// <summary> /// Tests to see if this AnalysisOccurrence is later in the text than the /// parameter. /// </summary> /// <param name="otherPoint"></param> /// <returns></returns> public bool IsAfter(AnalysisOccurrence otherPoint) { if (Paragraph.Owner.Hvo != otherPoint.Paragraph.Owner.Hvo) { throw new ArgumentException("The two points are not from the same text!"); } var imyPara = Paragraph.IndexInOwner; var iotherPara = otherPoint.Paragraph.IndexInOwner; if (imyPara > iotherPara) { return(true); } if (imyPara < iotherPara) { return(false); } var imySeg = Segment.IndexInOwner; var iotherSeg = otherPoint.Segment.IndexInOwner; if (imySeg > iotherSeg) { return(true); } if (imySeg < iotherSeg) { return(false); } var iother = otherPoint.Index; return(Index > iother); }
public void AdvanceThroughWordformInThePara_Using_GetNextWordformOrDefault() { //start before end of sentence var navigator = new SegmentServices.StTextAnnotationNavigator(m_expectedOccurrences[2]); var nextWordform = navigator.GetNextWordformOrDefault(null); Assert.AreEqual(m_expectedOccurrences[3], nextWordform); navigator = new SegmentServices.StTextAnnotationNavigator(m_expectedOccurrences[3]); nextWordform = navigator.GetNextWordformOrDefault(null); Assert.AreEqual(m_expectedOccurrences[5], nextWordform); navigator = new SegmentServices.StTextAnnotationNavigator(m_expectedOccurrences[6]); nextWordform = navigator.GetNextWordformOrDefault(null); Assert.AreEqual(m_expectedOccurrences[8], nextWordform); //position the navigator at the last occurrence of the stText. navigator = new SegmentServices.StTextAnnotationNavigator(m_expectedOccurrences[18]); //if there are no occurrences beyond this position in the stText then null should be returned nextWordform = navigator.GetNextWordformOrDefault(null); Assert.AreEqual(null, nextWordform); // JohnT: this is to test that it returns an arbitrary default passed in, if there are // no more. This dummy analysis occurrence is probably not in a valid state, so don't // use it for other things or be too surprised if improved validation detects a problem. var expected = new AnalysisOccurrence(m_para0.SegmentsOS[0], 18); nextWordform = navigator.GetNextWordformOrDefault(expected); Assert.AreEqual(expected, nextWordform); navigator = new SegmentServices.StTextAnnotationNavigator(m_expectedOccurrences[18]); //if there are no occurrences beyond this position in the stText then null should be returned nextWordform = navigator.GetNextWordformOrDefault(null); Assert.AreEqual(null, nextWordform); //======================================================================== navigator = new SegmentServices.StTextAnnotationNavigator(m_expectedOccurrences[2]); nextWordform = navigator.GetNextWordformOrStartingWordform(); Assert.AreEqual(m_expectedOccurrences[3], nextWordform); navigator = new SegmentServices.StTextAnnotationNavigator(m_expectedOccurrences[3]); nextWordform = navigator.GetNextWordformOrStartingWordform(); Assert.AreEqual(m_expectedOccurrences[5], nextWordform); navigator = new SegmentServices.StTextAnnotationNavigator(m_expectedOccurrences[6]); nextWordform = navigator.GetNextWordformOrStartingWordform(); Assert.AreEqual(m_expectedOccurrences[8], nextWordform); //position the navigator at the last occurrence of the stText. navigator = new SegmentServices.StTextAnnotationNavigator(m_expectedOccurrences[18]); //if there are no occurrences beyond this position in the stText then null should be returned nextWordform = navigator.GetNextWordformOrStartingWordform(); Assert.AreEqual(m_expectedOccurrences[18], nextWordform); }
public void PreviousAnOc_BeginningOfSegment() { // Test Setup var point1 = m_expectedAnOcsPara0[5]; // PunctuationForm at segment beginning var expectedPrevious = new AnalysisOccurrence(m_expectedAnOcsPara0[4].Segment, m_expectedAnOcsPara0[4].Index); // SUT var actual = point1.PreviousAnalysisOccurrence(); // Verification VerifyAnalysisOccurrence(expectedPrevious, actual); }
public void NextWordform_AlmostEndOfParagraph() { // Test Setup var point1 = m_expectedAnOcsPara0[6]; // next occurrence is PunctuationForm, need to skip to next paragraph var expectedNext = new AnalysisOccurrence(m_expectedAnOcs[8].Segment, m_expectedAnOcs[8].Index); // SUT var actual = point1.NextWordform(); // Verification VerifyAnalysisOccurrence(expectedNext, actual); }
public void NextAnOc_EndOfParagraph() { // Test Setup var point1 = m_expectedAnOcsPara0[7]; // PunctuationForm at segment/paragraph end var expectedNext = new AnalysisOccurrence(m_expectedAnOcs[8].Segment, m_expectedAnOcs[8].Index); // SUT var actual = point1.NextAnalysisOccurrence(); // Verification VerifyAnalysisOccurrence(expectedNext, actual); }
public void NextAnOc_StartOfSegment() { // Test Setup var point1 = m_expectedAnOcsPara0[0]; var expectedNext = new AnalysisOccurrence(point1.Segment, point1.Index + 1); // SUT var actual = point1.NextAnalysisOccurrence(); // Verification VerifyAnalysisOccurrence(expectedNext, actual); }
public void PreviousAnOc_BeginningOfParagraph() { // Test Setup var point1 = m_expectedAnOcs[8]; // need to skip to previous segment across paragraph boundary var expectedPrevious = new AnalysisOccurrence(m_expectedAnOcs[7].Segment, m_expectedAnOcs[7].Index); // SUT var actual = point1.PreviousAnalysisOccurrence(); // Verification VerifyAnalysisOccurrence(expectedPrevious, actual); }
public void PreviousWordform_BeginningOfSegment() { // Test Setup // need to skip to previous segment, but previous occurrence is PunctuationForm var point1 = m_expectedAnOcsPara0[5]; var expectedPrevious = new AnalysisOccurrence(m_expectedAnOcsPara0[3].Segment, m_expectedAnOcsPara0[3].Index); // SUT var actual = point1.PreviousWordform(); // Verification VerifyAnalysisOccurrence(expectedPrevious, actual); }
public void PreviousWordform_BeginningOfParagraph() { // Test Setup // need to skip to previous segment across paragraph boundary, // but previous occurrence is PunctuationForm var point1 = m_expectedAnOcs[8]; var expectedPrevious = new AnalysisOccurrence(m_expectedAnOcs[6].Segment, m_expectedAnOcs[6].Index); // SUT var actual = point1.PreviousWordform(); // Verification VerifyAnalysisOccurrence(expectedPrevious, actual); }
public void MakeAndBreakPhrase() { UndoableUnitOfWorkHelper.Do("doit", "undoit", Cache.ActionHandlerAccessor, () => { //the book is red. the pages in the book are the color of the paper. IStTxtPara para0 = MakeSimpleParsedText(); Assert.AreEqual(5, para0.SegmentsOS[0].AnalysesRS.Count, "check preconditions -- includes final punctuation"); var firstBook = new AnalysisOccurrence(para0.SegmentsOS[0], 1); firstBook.Analysis.Wordform.Form.AnalysisDefaultWritingSystem = TsStringUtils.MakeString( "bookA", Cache.DefaultAnalWs); var firstIs = new AnalysisOccurrence(para0.SegmentsOS[0], 2); firstIs.Analysis.Wordform.Form.AnalysisDefaultWritingSystem = TsStringUtils.MakeString( "isA", Cache.DefaultAnalWs); var bookIs = firstBook.MakePhraseWithNextWord(); Assert.AreEqual(4, para0.SegmentsOS[0].AnalysesRS.Count); Assert.AreEqual("book is", bookIs.BaselineText.Text); Assert.AreEqual("bookA isA", bookIs.Analysis.Wordform.Form.AnalysisDefaultWritingSystem.Text); var firstThe = new AnalysisOccurrence(para0.SegmentsOS[0], 0); Assert.AreEqual(firstThe.BaselineWs, bookIs.BaselineWs); var bookIsRed = bookIs.MakePhraseWithNextWord(); Assert.AreEqual(3, para0.SegmentsOS[0].AnalysesRS.Count); Assert.AreEqual("book is red", bookIsRed.BaselineText.Text); Assert.AreEqual(firstThe.BaselineWs, bookIsRed.BaselineWs); Assert.IsNull(bookIsRed.MakePhraseWithNextWord()); Assert.IsFalse(bookIsRed.CanMakePhraseWithNextWord()); var phraseWf = bookIsRed.Analysis; bookIsRed.BreakPhrase(); Assert.AreEqual(5, para0.SegmentsOS[0].AnalysesRS.Count, "break phrase should have restored all wordforms"); Assert.AreEqual("the", new AnalysisOccurrence(para0.SegmentsOS[0], 0).BaselineText.Text); Assert.AreEqual("book", new AnalysisOccurrence(para0.SegmentsOS[0], 1).BaselineText.Text); Assert.AreEqual("is", new AnalysisOccurrence(para0.SegmentsOS[0], 2).BaselineText.Text); Assert.AreEqual("red", new AnalysisOccurrence(para0.SegmentsOS[0], 3).BaselineText.Text); Assert.AreEqual(".", new AnalysisOccurrence(para0.SegmentsOS[0], 4).BaselineText.Text); Assert.IsFalse(phraseWf.IsValidObject); // This checks that we do NOT delete a broken phrase when there are other references. firstThe.MakePhraseWithNextWord(); var secondTheBook = new AnalysisOccurrence(para0.SegmentsOS[1], 3).MakePhraseWithNextWord(); secondTheBook.BreakPhrase(); Assert.AreEqual("the book", firstThe.BaselineText.Text); Assert.IsTrue(firstThe.Analysis.IsValidObject); }); }
private void ParseText() { using (var pp = new ParagraphParser(Cache)) { pp.Parse(m_txtPara); } var seg = m_txtPara.SegmentsOS[0]; var wordArray = seg.AnalysesRS.ToArray(); var cwords = wordArray.Length; m_occurrences = new AnalysisOccurrence[cwords]; for (var i = 0; i < cwords; i++) { m_occurrences[i] = new AnalysisOccurrence(seg, i); } }
/// <summary> /// Returns a sequence of IAnalysis objects inclusive of the current occurrence /// and the end occurrence (otherAC). /// </summary> /// <param name="point2">An AnalysisOccurrence</param> public IEnumerable <IAnalysis> GetAdvancingWordformsInclusiveOf(AnalysisOccurrence point2) { return(from occurrence in GetAdvancingOccurrencesInclusiveOf(point2) where !(occurrence is IPunctuationForm) select occurrence); }
/// <summary> /// /// </summary> public bool TryGetBestGuess(AnalysisOccurrence occurrence, out IAnalysis bestGuess) { bestGuess = GetBestGuess(occurrence); return(!(bestGuess is NullWAG)); }
private static void VerifyAnalysisOccurrence(AnalysisOccurrence expectedNext, AnalysisOccurrence actual) { Assert.AreEqual(expectedNext.Segment.Hvo, actual.Segment.Hvo, "Analysis Occurrence has the wrong ISegment"); Assert.AreEqual(expectedNext.Index, actual.Index, "Analysis Occurrence has the wrong index"); }
/// <summary> /// More efficient equality if the other argument is known to be an AnalysisOccurrence. /// </summary> /// <param name="other"></param> /// <returns></returns> public bool Equals(AnalysisOccurrence other) { return(other.Segment == Segment && other.Index == Index); }