private void DiscardDeletedSegments() { int firstDeleted = -1; //marks the index into the paragraph(m_para) segments of the first segment which is deleted m_cSegsDeleted = 0; int iSeg = 0; int iJustPastEditInOldContents = m_paraDiffInfo.IchFirstDiff + m_paraDiffInfo.CchDeleteFromOld; foreach (ISegment seg in m_para.SegmentsOS) { // segment boundaries are within the portion of text deleted. if (seg.BeginOffset >= m_paraDiffInfo.IchFirstDiff && m_oldEndOffsets[iSeg] <= iJustPastEditInOldContents) { //delete this segment if (m_cSegsDeleted == 0) firstDeleted = iSeg; m_cSegsDeleted++; DiscardDeletedRefs(seg); // Discard any IAnalysisReferences wholly contained in deleted text } else if (seg.BeginOffset >= m_paraDiffInfo.IchFirstDiff && seg.BeginOffset < iJustPastEditInOldContents && m_oldEndOffsets[iSeg] >= iJustPastEditInOldContents) { //The seg starts in the range of characters deleted. The segment extends past this range of //characters. //However, if the portion of the segment within the deletion area contains an EOS character //then delete the segment //and //then check the portion after the deleted portion //If this portion contains any nonWhite characters //then they should be part of a punctuation analysis char of the previous segment //If the portion remaining contains an EndOfSentence character //then do not delete the segment //If there is only WhiteSpace characters in the part of this segment that is outside of the //deleted text then all non-WhiteSpace characters in the segment is deleted so //delete the segment. var textPastEOS = m_oldContents.Substring(iJustPastEditInOldContents, m_oldEndOffsets[iSeg] - iJustPastEditInOldContents); var tsStrBldr = textPastEOS.GetBldr(); tsStrBldr.Replace(0, textPastEOS.Length, textPastEOS.Text.Trim(), null); //remove all whiteSpace var punctTsStr = tsStrBldr.GetString(); if (punctTsStr.Length == 0) { if (m_cSegsDeleted == 0) firstDeleted = iSeg; m_cSegsDeleted++; DiscardDeletedRefs(seg); // Discard any IAnalysisReferences wholly contained in deleted text } else { var segBaseline = m_oldContents.Substring(seg.BeginOffset, m_oldEndOffsets[iSeg] - seg.BeginOffset); var collectorSeg = new SegmentCollector(segBaseline, m_para.Cache.WritingSystemFactory); //Note: this will not choose TE footnote makers for EOSPositions. TsStringUtils.kChObject collectorSeg.Run(); int iInSegOfEndOfDeletion = iJustPastEditInOldContents - seg.BeginOffset - 1; //if (collectorSeg.EosPositions.Count == 0) //there are no seg break characters which means we are examining the //last segment of a paragraph and this segment has no segment break character //if (collectorSeg.EosPositions[0] > iInSegOfEndOfDeletion //then do not delete the segment. It could be merged or kept. if (collectorSeg.EosPositions.Count > 0 && collectorSeg.EosPositions[0] <= iInSegOfEndOfDeletion) { //delete this segment if (m_cSegsDeleted == 0) firstDeleted = iSeg; m_cSegsDeleted++; DiscardDeletedRefs(seg); // Discard any IAnalysisReferences wholly contained in deleted text //Before we delete the segment we need to deal with any remaining text past //the EOS character. //If there is any text in this segment past the segment break character //(i.e. any text past collectorSeg.EosPositions[0]) //we need to add this as a PunctuationForm analysis to the merged segments. //For example the test where we have .) and ) is not deleted. Therefore save this text //and when creating m_oldAnalyses we need to insert a PunctuationForm for ')' in this list. //var textPastEOS = segBaseline.Substring(collectorSeg.EosPositions[0] + 1, segBaseline.Length - collectorSeg.EosPositions[0] - 1); if (textPastEOS.Length > 0) { //var tsStrBldr = textPastEOS.GetBldr(); //tsStrBldr.Replace(0, textPastEOS.Length, textPastEOS.Text.Trim(), null); //var punctTsStr = tsStrBldr.GetString(); if (punctTsStr.Length > 0) { IPunctuationForm pf = WfiWordformServices.FindOrCreatePunctuationform(m_para.Cache, punctTsStr); m_punctRemainingFromLastDeletedSegment = new List<IAnalysis> {pf}; } } } } } iSeg++; } if (m_cSegsDeleted == 0) return; m_iFirstDeletedSegment = firstDeleted; //removes all the segments which were completely within the deleted range of characters from the paragraph m_para.SegmentsOS.Replace(m_iFirstDeletedSegment, m_cSegsDeleted, new ICmObject[0]); // Fix any tags that just lost their Begin or End Segment (won't be both) // after removing the segments above firstDeleted now points to the first segment following the deleted range. FixSingleHangingReference(firstDeleted); }
/// <summary> /// This is very similar to CollectSegments on the base class, but does not make /// even dummy annotations, just TsStringSegments. /// </summary> /// <param name="tssText"></param> /// <param name="ichMinSegBreaks"></param> /// <returns></returns> internal List<TsStringSegment> CollectTempSegmentAnnotations(ITsString tssText, out List<int> ichMinSegBreaks) { SegmentCollector collector = new SegmentCollector(tssText, m_cache.WritingSystemFactory); collector.Run(); ichMinSegBreaks = collector.EosPositions; return collector.Segments; }