Ejemplo n.º 1
0
		private void DiscardDeletedSegments()
		{
			int firstDeleted = -1; //marks the index into the paragraph(m_para) segments of the first segment which is deleted
			m_cSegsDeleted = 0;
			int iSeg = 0;
			int iJustPastEditInOldContents = m_paraDiffInfo.IchFirstDiff + m_paraDiffInfo.CchDeleteFromOld;
			foreach (ISegment seg in m_para.SegmentsOS)
			{
				// segment boundaries are within the portion of text deleted.
				if (seg.BeginOffset >= m_paraDiffInfo.IchFirstDiff && m_oldEndOffsets[iSeg] <= iJustPastEditInOldContents)
				{
					//delete this segment
					if (m_cSegsDeleted == 0)
						firstDeleted = iSeg;
					m_cSegsDeleted++;
					DiscardDeletedRefs(seg); // Discard any IAnalysisReferences wholly contained in deleted text
				}
				else if (seg.BeginOffset >= m_paraDiffInfo.IchFirstDiff &&
						 seg.BeginOffset < iJustPastEditInOldContents &&
						 m_oldEndOffsets[iSeg] >= iJustPastEditInOldContents)
				{
					//The seg starts in the range of characters deleted. The segment extends past this range of
					//characters.
					//However, if the portion of the segment within the deletion area contains an EOS character
					//then delete the segment
					//and
					//then check the portion after the deleted portion
					//If this portion contains any nonWhite characters
					//then they should be part of a punctuation analysis char of the previous segment
					//If the portion remaining contains an EndOfSentence character
					//then do not delete the segment

					//If there is only WhiteSpace characters in the part of this segment that is outside of the
					//deleted text then all non-WhiteSpace characters in the segment is deleted so
					//delete the segment.
					var textPastEOS = m_oldContents.Substring(iJustPastEditInOldContents,
															m_oldEndOffsets[iSeg] - iJustPastEditInOldContents);
					var tsStrBldr = textPastEOS.GetBldr();
					tsStrBldr.Replace(0, textPastEOS.Length, textPastEOS.Text.Trim(), null); //remove all whiteSpace
					var punctTsStr = tsStrBldr.GetString();

					if (punctTsStr.Length == 0)
					{
						if (m_cSegsDeleted == 0)
							firstDeleted = iSeg;
						m_cSegsDeleted++;
						DiscardDeletedRefs(seg); // Discard any IAnalysisReferences wholly contained in deleted text
					}
					else
					{
						var segBaseline = m_oldContents.Substring(seg.BeginOffset, m_oldEndOffsets[iSeg] - seg.BeginOffset);
						var collectorSeg = new SegmentCollector(segBaseline, m_para.Cache.WritingSystemFactory);
						//Note: this will not choose TE footnote makers for EOSPositions. TsStringUtils.kChObject
						collectorSeg.Run();
						int iInSegOfEndOfDeletion = iJustPastEditInOldContents - seg.BeginOffset - 1;

						//if (collectorSeg.EosPositions.Count == 0)
						//there are no seg break characters which means we are examining the
						//last segment of a paragraph and this segment has no segment break character
						//if (collectorSeg.EosPositions[0] > iInSegOfEndOfDeletion
						//then do not delete the segment. It could be merged or kept.
						if (collectorSeg.EosPositions.Count > 0 &&
							collectorSeg.EosPositions[0] <= iInSegOfEndOfDeletion)
						{
							//delete this segment
							if (m_cSegsDeleted == 0)
								firstDeleted = iSeg;
							m_cSegsDeleted++;
							DiscardDeletedRefs(seg); // Discard any IAnalysisReferences wholly contained in deleted text

							//Before we delete the segment we need to deal with any remaining text past
							//the EOS character.
							//If there is any text in this segment past the segment break character
							//(i.e. any text past collectorSeg.EosPositions[0])
							//we need to add this as a PunctuationForm analysis to the merged segments.
							//For example the test where we have .) and ) is not deleted.  Therefore save this text
							//and when creating m_oldAnalyses we need to insert a PunctuationForm for ')' in this list.

							//var textPastEOS = segBaseline.Substring(collectorSeg.EosPositions[0] + 1, segBaseline.Length - collectorSeg.EosPositions[0] - 1);
							if (textPastEOS.Length > 0)
							{
								//var tsStrBldr = textPastEOS.GetBldr();
								//tsStrBldr.Replace(0, textPastEOS.Length, textPastEOS.Text.Trim(), null);
								//var punctTsStr = tsStrBldr.GetString();

								if (punctTsStr.Length > 0)
								{
									IPunctuationForm pf = WfiWordformServices.FindOrCreatePunctuationform(m_para.Cache, punctTsStr);
									m_punctRemainingFromLastDeletedSegment = new List<IAnalysis> {pf};
								}
							}
						}
					}
				}
				iSeg++;
			}
			if (m_cSegsDeleted == 0)
				return;

			m_iFirstDeletedSegment = firstDeleted;

			//removes all the segments which were completely within the deleted range of characters from the paragraph
			m_para.SegmentsOS.Replace(m_iFirstDeletedSegment, m_cSegsDeleted, new ICmObject[0]);

			// Fix any tags that just lost their Begin or End Segment (won't be both)
			// after removing the segments above firstDeleted now points to the first segment following the deleted range.
			FixSingleHangingReference(firstDeleted);
		}
Ejemplo n.º 2
0
		/// <summary>
		/// This is very similar to CollectSegments on the base class, but does not make
		/// even dummy annotations, just TsStringSegments.
		/// </summary>
		/// <param name="tssText"></param>
		/// <param name="ichMinSegBreaks"></param>
		/// <returns></returns>
		internal List<TsStringSegment> CollectTempSegmentAnnotations(ITsString tssText, out List<int> ichMinSegBreaks)
		{
			SegmentCollector collector = new SegmentCollector(tssText, m_cache.WritingSystemFactory);
			collector.Run();
			ichMinSegBreaks = collector.EosPositions;
			return collector.Segments;
		}