コード例 #1
0
		private static void CreateTemporaryXfic(bool fWfic, int beginOffset, int endOffset, ParagraphContext context)
		{
			var currentParaGuid = context.ParaGuid;
			var xficsForCurrentPara = context.ParaXfics;

			// if fWfic, create temporary wfic; if !fWfic, create temporary pfic
			var annoTypeGuid = fWfic ? DataMigrationServices.kTwficAnnDefnGuid : DataMigrationServices.kPficAnnDefnGuid;

			// Need to create a new old xfic XElement (not dto), to try and and maintain analysis indices.
			var brandNewPficGuid = Guid.NewGuid().ToString().ToLower();
			const int paraContentsFlid = StTxtParaTags.kflidContents;
			var tmp = new XElement("rt",
				new XAttribute("class", "CmBaseAnnotation"),
				new XAttribute("guid", brandNewPficGuid),
				new XElement("CmObject"),
				new XElement("CmAnnotation",
					new XElement("AnnotationType",
						 DataMigrationServices.CreateReferenceObjSurElement(annoTypeGuid)),
					new XElement("InstanceOf",
						 DataMigrationServices.CreateReferenceObjSurElement(ktempXficInstanceOfGuid))),
				new XElement("CmBaseAnnotation",
					new XElement("BeginOffset", new XAttribute("val", beginOffset)),
					new XElement("EndOffset", new XAttribute("val", endOffset)),
					new XElement("Flid", new XAttribute("val", paraContentsFlid)),
					new XElement("BeginObject",
						 DataMigrationServices.CreateReferenceObjSurElement(currentParaGuid))));
			xficsForCurrentPara.Add(beginOffset, Encoding.UTF8.GetBytes(tmp.ToString()));
		}
コード例 #2
0
		private static void CreateTemporaryXfics(IEnumerable<Tuple<int, int, bool>> neededXficForms,
			ParagraphContext context)
		{
			// Since xfics were 'optional' and usually not maintained in the db,
			// we need to make sure there is a temporary dummy one in xficsForCurrentPara
			// in order to get the correct Begin/EndAnalysisIndex for chart and tagging objects
			// N.B. We use a temporary InstanceOf guid that is stripped out after creating these objects.
			foreach (var xficTuple in neededXficForms)
				CreateTemporaryXfic(xficTuple.Item3, xficTuple.Item1, xficTuple.Item2, context);
		}
コード例 #3
0
		private static void EnsureAllXfics(IList<string> runs, ParagraphContext context)
		{
			if (runs == null || runs.Count == 0)
				return; // No <Run> elements; can't have any xfics.
			var bldr = new StringBuilder();
			for (var i = 0; i < runs.Count; ++i)
				bldr.Append(runs[i]);
			var text = Icu.Normalize(bldr.ToString(), Icu.UNormalizationMode.UNORM_NFD);
			ParseTextAndCheckForXfics(text, context);
		}
コード例 #4
0
		private static void ParseTextAndCheckForXfics(string text, ParagraphContext context)
		{
			List<Tuple<int, int, bool>> neededXficForms;
			new XficParser(text, context.ParaXfics).Run(out neededXficForms);

			if (neededXficForms.Count == 0)
				return;

			CreateTemporaryXfics(neededXficForms, context);
			return;
		}
コード例 #5
0
		private static void ProcessParagraphs(
			IDomainObjectDTORepository dtoRepos,
			IDictionary<string, byte[]> oldCCAs,
			IEnumerable<KeyValuePair<byte[], XElement>> halfBakedCcwgItems,
			IDictionary<string, SortedList<int, byte[]>> paraToOldSegments,
			IDictionary<string, SortedList<int, byte[]>> paraToOldXfics,
			IDictionary<Guid, Guid> ccaGuidMap,
			ICollection<byte[]> oldTextTags,
			Dictionary<string, List<byte[]>> freeTrans,
			Dictionary<string, List<byte[]>> litTrans,
			Dictionary<string, List<byte[]>> notes)
		{
			var dtos = dtoRepos.AllInstancesWithSubclasses("StTxtPara");
			//var count = dtos.Count();
			//var num = 0;
			//var cpara = 0;
			foreach (var currentParaDto in dtos)
			{
				//++num;
				// If it has no contents, then skip it.
				var stTxtParaBounds = new ElementBounds(currentParaDto.XmlBytes, s_tagsStTxtPara);
				if (!stTxtParaBounds.IsValid)
					continue;
				var contentsBounds = new ElementBounds(currentParaDto.XmlBytes, s_tagsContents,
					stTxtParaBounds.BeginTagOffset, stTxtParaBounds.EndTagOffset);
				if (!contentsBounds.IsValid)
					continue;
				//++cpara;

				// Mark the paragraph as needing retokenization.
				MarkParaAsNeedingTokenization(dtoRepos, currentParaDto);

				var currentParaGuid = currentParaDto.Guid.ToLower();
				SortedList<int, byte[]> xficsForCurrentPara;
				paraToOldXfics.TryGetValue(currentParaGuid, out xficsForCurrentPara);

				SortedList<int, byte[]> segsForCurrentPara;
				if (!paraToOldSegments.TryGetValue(currentParaGuid, out segsForCurrentPara)
					&& xficsForCurrentPara != null
					&& xficsForCurrentPara.Count > 0)
				{
					// We have no segments at all, but there are xfics, so try to recover the broken data,
					// as much as possible.
					// Need to create a new old segment XElement (not dto), to try and and keep old data.
					var guidBrandNewSeg = Guid.NewGuid();
					var brandNewSegGuid = guidBrandNewSeg.ToString().ToLower();
					ccaGuidMap.Add(guidBrandNewSeg, guidBrandNewSeg);
					segsForCurrentPara = new SortedList<int, byte[]>();
					paraToOldSegments.Add(currentParaGuid, segsForCurrentPara);
					var bldr = new StringBuilder();
					bldr.AppendFormat("<rt guid=\"{0}\"", brandNewSegGuid);
					bldr.Append("<CmObject/>");
					bldr.Append("<CmBaseAnnotation>");
					bldr.Append("<BeginOffset val=\"0\"/>");
					bldr.AppendFormat("<EndOffset val=\"{0}\"/>", int.MaxValue);
					bldr.Append("</CmBaseAnnotation>");
					bldr.Append("</rt>");
					segsForCurrentPara.Add(0, Encoding.UTF8.GetBytes(bldr.ToString()));
				}

				// If the para has no segs or xfics, skip the following work.
				if (segsForCurrentPara == null)
					continue;

				if (xficsForCurrentPara != null && xficsForCurrentPara.Count > 0 && segsForCurrentPara.Count > 0)
				{
					// We have both segments and xfics. Check for odd case (like FWR-3081)
					// where the first segment starts AFTER the first xfic, and add a new
					// segment that covers the text up to the first current segment.
					if (xficsForCurrentPara.First().Key < segsForCurrentPara.First().Key)
						AddExtraInitialSegment(currentParaGuid, ccaGuidMap, paraToOldSegments);
				}
				var halfBakedCcwgItemsForCurrentPara = new List<KeyValuePair<byte[], XElement>>();
				List<string> writingSystems;
				var runs = GetParagraphContentRuns(currentParaDto.XmlBytes, out writingSystems);
				// We may well have segments with no xfics, for example, Scripture that has segmented BT.
				if (xficsForCurrentPara != null)
				{

					// Since pfics/wfics were 'optional' and usually not maintained in the db,
					// we need to make sure there is a dummy one in xficsForCurrentPara
					// in order to get the correct Begin/EndAnalysisIndex for chart and tagging objects
					// It turns out we don't need to worry about ws and exact begin/end character offsets.
					// All we need to end up with correct indices is the correct NUMBER of xfics.
					var context = new ParagraphContext(currentParaGuid, xficsForCurrentPara);
					EnsureAllXfics(runs, context);

					// Find any 'halfbaked' items for the current paragraph.
					// Get the para for the first objsur's guid (some twfic ann),
					// in the CmIndirectAnnotation's AppliesTo prop.
					foreach (var kvp in halfBakedCcwgItems)
					{
						var refs = GetAppliesToObjsurGuids(kvp.Key);
						if (refs == null || refs.Count == 0)
							continue;
						var guid = refs[0];
						var dto = dtoRepos.GetDTO(guid);
						var guidBegin = GetBeginObjectGuid(dto.XmlBytes);
						if (guidBegin == currentParaGuid)
							halfBakedCcwgItemsForCurrentPara.Add(kvp);
					}
				}
				var bldrSegmentsElement = new StringBuilder();
				var numberOfOldSegmentsInCurrentPara = segsForCurrentPara.Values.Count;
				var currentOldSegmentIdx = 1;
				foreach (var currentOldSegInCurrentPara in segsForCurrentPara.Values)
				{
					var isLastOldSegment = (currentOldSegmentIdx++ == numberOfOldSegmentsInCurrentPara);
					var oldSegGuid = GetGuid(currentOldSegInCurrentPara);
					var guidOldSeg = new Guid(oldSegGuid);
					var newSegGuid = ccaGuidMap[guidOldSeg].ToString().ToLowerInvariant();
					// Add it to Segments prop of currentParaElement,
					var objsur = DataMigrationServices.CreateOwningObjSurElement(newSegGuid);
					bldrSegmentsElement.AppendLine(objsur.ToString());

					// Create new XElement for new segment.
					var newSegmentElement =
						new XElement("rt",
							new XAttribute("class", "Segment"),
							new XAttribute("guid", newSegGuid),
							new XAttribute("ownerguid", currentParaDto.Guid.ToLower()),
							new XElement("CmObject"),
							new XElement("Segment",
								AddBeginOffset(GetBeginOffset(currentOldSegInCurrentPara)),
								AddFreeTranslation(oldSegGuid, freeTrans),
								AddLiteralTranslation(oldSegGuid, litTrans),
								AddNotes(dtoRepos, newSegGuid, oldSegGuid, notes),
								AddSegmentAnalyses(dtoRepos,
									halfBakedCcwgItemsForCurrentPara,
									currentOldSegInCurrentPara,
									xficsForCurrentPara,
									oldTextTags,
									newSegGuid,
									isLastOldSegment,
									currentParaDto)));
					newSegmentElement = DeleteTemporaryAnalyses(newSegmentElement);
					// Create a new Segment instance DTO from the 'newSegmentElement',
					// and add it to repos.
					var newSegDto = new DomainObjectDTO(newSegGuid, "Segment", newSegmentElement.ToString());
					dtoRepos.Add(newSegDto);
				}

				paraToOldSegments.Remove(currentParaGuid.ToLower());
				paraToOldXfics.Remove(currentParaGuid.ToLower());

				if (bldrSegmentsElement.Length == 0)
					continue;
				bldrSegmentsElement.Insert(0, "<Segments>");
				bldrSegmentsElement.Append("</Segments>");

				// Add paraSegmentsElement to current para.
				var segBytes = Encoding.UTF8.GetBytes(bldrSegmentsElement.ToString());
				var xmlNew = new List<byte>(currentParaDto.XmlBytes.Length + segBytes.Length);
				xmlNew.AddRange(currentParaDto.XmlBytes);
				stTxtParaBounds = new ElementBounds(currentParaDto.XmlBytes, s_tagsStTxtPara);
				xmlNew.InsertRange(stTxtParaBounds.EndTagOffset, segBytes);
				// Tell DTO repos about the modification.
				DataMigrationServices.UpdateDTO(dtoRepos, currentParaDto, xmlNew.ToArray());
			}
		}