private static void CreateTemporaryXfic(bool fWfic, int beginOffset, int endOffset, ParagraphContext context) { var currentParaGuid = context.ParaGuid; var xficsForCurrentPara = context.ParaXfics; // if fWfic, create temporary wfic; if !fWfic, create temporary pfic var annoTypeGuid = fWfic ? DataMigrationServices.kTwficAnnDefnGuid : DataMigrationServices.kPficAnnDefnGuid; // Need to create a new old xfic XElement (not dto), to try and and maintain analysis indices. var brandNewPficGuid = Guid.NewGuid().ToString().ToLower(); const int paraContentsFlid = StTxtParaTags.kflidContents; var tmp = new XElement("rt", new XAttribute("class", "CmBaseAnnotation"), new XAttribute("guid", brandNewPficGuid), new XElement("CmObject"), new XElement("CmAnnotation", new XElement("AnnotationType", DataMigrationServices.CreateReferenceObjSurElement(annoTypeGuid)), new XElement("InstanceOf", DataMigrationServices.CreateReferenceObjSurElement(ktempXficInstanceOfGuid))), new XElement("CmBaseAnnotation", new XElement("BeginOffset", new XAttribute("val", beginOffset)), new XElement("EndOffset", new XAttribute("val", endOffset)), new XElement("Flid", new XAttribute("val", paraContentsFlid)), new XElement("BeginObject", DataMigrationServices.CreateReferenceObjSurElement(currentParaGuid)))); xficsForCurrentPara.Add(beginOffset, Encoding.UTF8.GetBytes(tmp.ToString())); }
private static void CreateTemporaryXfics(IEnumerable<Tuple<int, int, bool>> neededXficForms, ParagraphContext context) { // Since xfics were 'optional' and usually not maintained in the db, // we need to make sure there is a temporary dummy one in xficsForCurrentPara // in order to get the correct Begin/EndAnalysisIndex for chart and tagging objects // N.B. We use a temporary InstanceOf guid that is stripped out after creating these objects. foreach (var xficTuple in neededXficForms) CreateTemporaryXfic(xficTuple.Item3, xficTuple.Item1, xficTuple.Item2, context); }
private static void EnsureAllXfics(IList<string> runs, ParagraphContext context) { if (runs == null || runs.Count == 0) return; // No <Run> elements; can't have any xfics. var bldr = new StringBuilder(); for (var i = 0; i < runs.Count; ++i) bldr.Append(runs[i]); var text = Icu.Normalize(bldr.ToString(), Icu.UNormalizationMode.UNORM_NFD); ParseTextAndCheckForXfics(text, context); }
private static void ParseTextAndCheckForXfics(string text, ParagraphContext context) { List<Tuple<int, int, bool>> neededXficForms; new XficParser(text, context.ParaXfics).Run(out neededXficForms); if (neededXficForms.Count == 0) return; CreateTemporaryXfics(neededXficForms, context); return; }
private static void ProcessParagraphs( IDomainObjectDTORepository dtoRepos, IDictionary<string, byte[]> oldCCAs, IEnumerable<KeyValuePair<byte[], XElement>> halfBakedCcwgItems, IDictionary<string, SortedList<int, byte[]>> paraToOldSegments, IDictionary<string, SortedList<int, byte[]>> paraToOldXfics, IDictionary<Guid, Guid> ccaGuidMap, ICollection<byte[]> oldTextTags, Dictionary<string, List<byte[]>> freeTrans, Dictionary<string, List<byte[]>> litTrans, Dictionary<string, List<byte[]>> notes) { var dtos = dtoRepos.AllInstancesWithSubclasses("StTxtPara"); //var count = dtos.Count(); //var num = 0; //var cpara = 0; foreach (var currentParaDto in dtos) { //++num; // If it has no contents, then skip it. var stTxtParaBounds = new ElementBounds(currentParaDto.XmlBytes, s_tagsStTxtPara); if (!stTxtParaBounds.IsValid) continue; var contentsBounds = new ElementBounds(currentParaDto.XmlBytes, s_tagsContents, stTxtParaBounds.BeginTagOffset, stTxtParaBounds.EndTagOffset); if (!contentsBounds.IsValid) continue; //++cpara; // Mark the paragraph as needing retokenization. MarkParaAsNeedingTokenization(dtoRepos, currentParaDto); var currentParaGuid = currentParaDto.Guid.ToLower(); SortedList<int, byte[]> xficsForCurrentPara; paraToOldXfics.TryGetValue(currentParaGuid, out xficsForCurrentPara); SortedList<int, byte[]> segsForCurrentPara; if (!paraToOldSegments.TryGetValue(currentParaGuid, out segsForCurrentPara) && xficsForCurrentPara != null && xficsForCurrentPara.Count > 0) { // We have no segments at all, but there are xfics, so try to recover the broken data, // as much as possible. // Need to create a new old segment XElement (not dto), to try and and keep old data. var guidBrandNewSeg = Guid.NewGuid(); var brandNewSegGuid = guidBrandNewSeg.ToString().ToLower(); ccaGuidMap.Add(guidBrandNewSeg, guidBrandNewSeg); segsForCurrentPara = new SortedList<int, byte[]>(); paraToOldSegments.Add(currentParaGuid, segsForCurrentPara); var bldr = new StringBuilder(); bldr.AppendFormat("<rt guid=\"{0}\"", brandNewSegGuid); bldr.Append("<CmObject/>"); bldr.Append("<CmBaseAnnotation>"); bldr.Append("<BeginOffset val=\"0\"/>"); bldr.AppendFormat("<EndOffset val=\"{0}\"/>", int.MaxValue); bldr.Append("</CmBaseAnnotation>"); bldr.Append("</rt>"); segsForCurrentPara.Add(0, Encoding.UTF8.GetBytes(bldr.ToString())); } // If the para has no segs or xfics, skip the following work. if (segsForCurrentPara == null) continue; if (xficsForCurrentPara != null && xficsForCurrentPara.Count > 0 && segsForCurrentPara.Count > 0) { // We have both segments and xfics. Check for odd case (like FWR-3081) // where the first segment starts AFTER the first xfic, and add a new // segment that covers the text up to the first current segment. if (xficsForCurrentPara.First().Key < segsForCurrentPara.First().Key) AddExtraInitialSegment(currentParaGuid, ccaGuidMap, paraToOldSegments); } var halfBakedCcwgItemsForCurrentPara = new List<KeyValuePair<byte[], XElement>>(); List<string> writingSystems; var runs = GetParagraphContentRuns(currentParaDto.XmlBytes, out writingSystems); // We may well have segments with no xfics, for example, Scripture that has segmented BT. if (xficsForCurrentPara != null) { // Since pfics/wfics were 'optional' and usually not maintained in the db, // we need to make sure there is a dummy one in xficsForCurrentPara // in order to get the correct Begin/EndAnalysisIndex for chart and tagging objects // It turns out we don't need to worry about ws and exact begin/end character offsets. // All we need to end up with correct indices is the correct NUMBER of xfics. var context = new ParagraphContext(currentParaGuid, xficsForCurrentPara); EnsureAllXfics(runs, context); // Find any 'halfbaked' items for the current paragraph. // Get the para for the first objsur's guid (some twfic ann), // in the CmIndirectAnnotation's AppliesTo prop. foreach (var kvp in halfBakedCcwgItems) { var refs = GetAppliesToObjsurGuids(kvp.Key); if (refs == null || refs.Count == 0) continue; var guid = refs[0]; var dto = dtoRepos.GetDTO(guid); var guidBegin = GetBeginObjectGuid(dto.XmlBytes); if (guidBegin == currentParaGuid) halfBakedCcwgItemsForCurrentPara.Add(kvp); } } var bldrSegmentsElement = new StringBuilder(); var numberOfOldSegmentsInCurrentPara = segsForCurrentPara.Values.Count; var currentOldSegmentIdx = 1; foreach (var currentOldSegInCurrentPara in segsForCurrentPara.Values) { var isLastOldSegment = (currentOldSegmentIdx++ == numberOfOldSegmentsInCurrentPara); var oldSegGuid = GetGuid(currentOldSegInCurrentPara); var guidOldSeg = new Guid(oldSegGuid); var newSegGuid = ccaGuidMap[guidOldSeg].ToString().ToLowerInvariant(); // Add it to Segments prop of currentParaElement, var objsur = DataMigrationServices.CreateOwningObjSurElement(newSegGuid); bldrSegmentsElement.AppendLine(objsur.ToString()); // Create new XElement for new segment. var newSegmentElement = new XElement("rt", new XAttribute("class", "Segment"), new XAttribute("guid", newSegGuid), new XAttribute("ownerguid", currentParaDto.Guid.ToLower()), new XElement("CmObject"), new XElement("Segment", AddBeginOffset(GetBeginOffset(currentOldSegInCurrentPara)), AddFreeTranslation(oldSegGuid, freeTrans), AddLiteralTranslation(oldSegGuid, litTrans), AddNotes(dtoRepos, newSegGuid, oldSegGuid, notes), AddSegmentAnalyses(dtoRepos, halfBakedCcwgItemsForCurrentPara, currentOldSegInCurrentPara, xficsForCurrentPara, oldTextTags, newSegGuid, isLastOldSegment, currentParaDto))); newSegmentElement = DeleteTemporaryAnalyses(newSegmentElement); // Create a new Segment instance DTO from the 'newSegmentElement', // and add it to repos. var newSegDto = new DomainObjectDTO(newSegGuid, "Segment", newSegmentElement.ToString()); dtoRepos.Add(newSegDto); } paraToOldSegments.Remove(currentParaGuid.ToLower()); paraToOldXfics.Remove(currentParaGuid.ToLower()); if (bldrSegmentsElement.Length == 0) continue; bldrSegmentsElement.Insert(0, "<Segments>"); bldrSegmentsElement.Append("</Segments>"); // Add paraSegmentsElement to current para. var segBytes = Encoding.UTF8.GetBytes(bldrSegmentsElement.ToString()); var xmlNew = new List<byte>(currentParaDto.XmlBytes.Length + segBytes.Length); xmlNew.AddRange(currentParaDto.XmlBytes); stTxtParaBounds = new ElementBounds(currentParaDto.XmlBytes, s_tagsStTxtPara); xmlNew.InsertRange(stTxtParaBounds.EndTagOffset, segBytes); // Tell DTO repos about the modification. DataMigrationServices.UpdateDTO(dtoRepos, currentParaDto, xmlNew.ToArray()); } }