public void Empty_String_Parameter_Throws() { using (var tempFile = new TempFile("")) { using (var reader = new FastXmlElementSplitter(tempFile.Path)) { // ToList is needed to make the enumeration evaluate. Assert.Throws<ArgumentException>(() => reader.GetSecondLevelElementBytes("").ToList()); } } }
private void LoadDataFile(object sender, EventArgs e) { try { string selectedPathname = null; using (var fileDlg = new OpenFileDialog()) { if (fileDlg.ShowDialog(this) == DialogResult.OK) selectedPathname = fileDlg.FileName; } if (!string.IsNullOrEmpty(selectedPathname)) { var extension = Path.GetExtension(selectedPathname).ToLowerInvariant(); string firstElementMarker = null; string recordMarker = null; switch (extension) { case ".lift": firstElementMarker = "header"; recordMarker = "entry"; break; case ".chorusnotes": recordMarker = "annotation"; break; case ".fwdata": firstElementMarker = "AdditionalFields"; recordMarker = "rt"; break; } using (var splitter = new FastXmlElementSplitter(selectedPathname)) { bool foundOptionalFirstElement; var results = splitter.GetSecondLevelElementBytes(firstElementMarker, recordMarker, out foundOptionalFirstElement); Console.WriteLine("Records: " + results.Count()); } } Close(); } catch (Exception err) { var msg = err.Message; Console.WriteLine(msg); throw; } }
public void No_Records_Without_Children_Is_Fine() { const string noRecordsInput = @"<?xml version='1.0' encoding='utf-8'?> <classdata />"; using (var tempFile = TempFile.WithExtension(".ClassData")) { File.WriteAllText(tempFile.Path, noRecordsInput, Encoding.UTF8); using (var reader = new FastXmlElementSplitter(tempFile.Path)) { Assert.AreEqual(0, reader.GetSecondLevelElementBytes("rt").Count()); } } }
private static void ProcessContent(FastXmlElementSplitter fastXmlElementSplitter, int expectedCount, string firstElementMarker, string recordMarker, Encoding enc) { bool foundOptionalFirstElement; var elementBytes = fastXmlElementSplitter.GetSecondLevelElementBytes(firstElementMarker, recordMarker, out foundOptionalFirstElement) .ToList(); Assert.AreEqual(expectedCount, elementBytes.Count); var elementStrings = fastXmlElementSplitter.GetSecondLevelElementStrings(firstElementMarker, recordMarker, out foundOptionalFirstElement).ToList(); Assert.AreEqual(expectedCount, elementStrings.Count); for (var i = 0; i < elementStrings.Count; ++i) { var currentStr = elementStrings[i]; Assert.AreEqual( currentStr, enc.GetString(elementBytes[i])); var el = XElement.Parse(currentStr); } }
public void Not_Xml_Throws() { const string noRecordsInput = "Some random text file."; using (var tempFile = TempFile.WithExtension(".txt")) { File.WriteAllText(tempFile.Path, noRecordsInput, Encoding.UTF8); using (var reader = new FastXmlElementSplitter(tempFile.Path)) { // An earlier version was expected to throw XmlException. But we aren't parsing XML well enough to do that confidently. // Note: the ToList is needed to force the enumeration to enumerate. Assert.Throws<ArgumentException>(() => reader.GetSecondLevelElementBytes("rt").ToList()); } } }
private void Verify(Stopwatch verifyTimer, StringBuilder sb) { GC.Collect(2, GCCollectionMode.Forced); verifyTimer.Start(); GetFreshMdc(); // Want it fresh. var origData = new Dictionary<string, byte[]>(StringComparer.InvariantCultureIgnoreCase); using (var fastSplitterOrig = new FastXmlElementSplitter(_srcFwdataPathname + ".orig")) { var foundOrigOptionalFirstElement = false; var testedforExistanceOfOrigOptionalFirstElement = false; foreach (var origRecord in fastSplitterOrig.GetSecondLevelElementBytes(SharedConstants.AdditionalFieldsTag, SharedConstants.RtTag)) { if (!testedforExistanceOfOrigOptionalFirstElement) { foundOrigOptionalFirstElement = FLExProjectSplitter.IsOptionalFirstElement(origRecord); testedforExistanceOfOrigOptionalFirstElement = true; } if (foundOrigOptionalFirstElement) { origData.Add(SharedConstants.AdditionalFieldsTag, origRecord); foundOrigOptionalFirstElement = false; continue; } origData.Add(XmlUtils.GetAttributes(origRecord, new HashSet<string> { SharedConstants.GuidStr })[SharedConstants.GuidStr].ToLowerInvariant(), origRecord); } } verifyTimer.Stop(); GC.Collect(2, GCCollectionMode.Forced); verifyTimer.Start(); using (var fastSplitterNew = new FastXmlElementSplitter(_srcFwdataPathname)) { // NB: The main input file *does* have to deal with the optional first element. //var counter = 0; var foundNewOptionalFirstElement = false; var testedforExistanceOfNewOptionalFirstElement = false; foreach (var newRecordAsBytes in fastSplitterNew.GetSecondLevelElementBytes(SharedConstants.AdditionalFieldsTag, SharedConstants.RtTag)) { if (!testedforExistanceOfNewOptionalFirstElement) { foundNewOptionalFirstElement = FLExProjectSplitter.IsOptionalFirstElement(newRecordAsBytes); testedforExistanceOfNewOptionalFirstElement = true; } var newRecCopyAsBytes = newRecordAsBytes; byte[] origRecAsBytes; string srcGuid = null; if (foundNewOptionalFirstElement) { origRecAsBytes = origData[SharedConstants.AdditionalFieldsTag]; origData.Remove(SharedConstants.AdditionalFieldsTag); foundNewOptionalFirstElement = false; } else { var attrValues = XmlUtils.GetAttributes(newRecordAsBytes, new HashSet<string> { SharedConstants.GuidStr, SharedConstants.Class }); srcGuid = attrValues[SharedConstants.GuidStr]; origRecAsBytes = origData[srcGuid]; origData.Remove(srcGuid); if (attrValues[SharedConstants.Class] == "WfiWordform") { var wfElement = Utilities.CreateFromBytes(origRecAsBytes); var csProp = wfElement.Element("Checksum"); if (csProp != null) { csProp.Attribute(SharedConstants.Val).Value = "0"; origRecAsBytes = SharedConstants.Utf8.GetBytes(wfElement.ToString()); } } } //if (counter == 1000) //{ // verifyTimer.Stop(); // GC.Collect(2, GCCollectionMode.Forced); // verifyTimer.Start(); // counter = 0; //} //else //{ // counter++; //} // Way too slow, since it has to always make the XmlNodes. // Just feeding strings to XmlUtilities.AreXmlElementsEqual is faster, // since it skips making them, if the strings are the same. //var origNode = CreateXmlNodeFromBytes(origRecAsBytes); //var newNode = CreateXmlNodeFromBytes(newRecCopyAsBytes); //if (XmlUtilities.AreXmlElementsEqual(origNode, newNode)) // continue; //if (srcGuid == null) //{ // WriteProblemDataFile(Path.Combine(_workingDir, "CustomProperties-SRC.txt"), origNode); // WriteProblemDataFile(Path.Combine(_workingDir, srcGuid + "CustomProperties-TRG.txt"), newNode); // sb.Append("Main src and trg custom properties are different in the resulting xml."); //} //else //{ // WriteProblemDataFile(Path.Combine(_workingDir, srcGuid + "-SRC.txt"), origNode); // WriteProblemDataFile(Path.Combine(_workingDir, srcGuid + "-TRG.txt"), newNode); // sb.AppendFormat("Main src and trg object with guid '{0}' are different in the resulting xml.", srcGuid); //} //if (XmlUtilities.AreXmlElementsEqual(SharedConstants.Utf8.GetString(origRecAsBytes), SharedConstants.Utf8.GetString(newRecCopyAsBytes))) // continue; if (XmlUtilities.AreXmlElementsEqual(origRecAsBytes, newRecCopyAsBytes)) continue; if (srcGuid == null) { WriteProblemDataFile(Path.Combine(_workingDir, "CustomProperties-SRC.txt"), origRecAsBytes); WriteProblemDataFile(Path.Combine(_workingDir, srcGuid + "CustomProperties-TRG.txt"), newRecCopyAsBytes); sb.Append("Main src and trg custom properties are different in the resulting xml."); } else { WriteProblemDataFile(Path.Combine(_workingDir, srcGuid + "-SRC.txt"), origRecAsBytes); WriteProblemDataFile(Path.Combine(_workingDir, srcGuid + "-TRG.txt"), newRecCopyAsBytes); sb.AppendFormat("Main src and trg object with guid '{0}' are different in the resulting xml.", srcGuid); } sb.AppendLine(); } } if (origData.Count > 0) { sb.AppendFormat("Hmm, there are {0} more <rt> elements in the original than in the rebuilt fwdata file.", origData.Count); sb.AppendLine(); foreach (var attrs in origData.Values.Select(byteData => XmlUtils.GetAttributes(byteData, new HashSet<string> { SharedConstants.GuidStr, SharedConstants.Class }))) { sb.AppendFormat("\t\t'{0}' of class '{1}' is not in rebuilt file.", attrs[SharedConstants.GuidStr], attrs[SharedConstants.Class]); sb.AppendLine(); } } verifyTimer.Stop(); }
private void RoundTripData(Stopwatch breakupTimer, Stopwatch restoreTimer, Stopwatch ambiguousTimer, StringBuilder sbValidation) { File.Copy(_srcFwdataPathname, _srcFwdataPathname + ".orig", true); // Keep it safe. GetFreshMdc(); // Want it fresh. breakupTimer.Start(); FLExProjectSplitter.PushHumptyOffTheWall(new NullProgress(), _srcFwdataPathname); breakupTimer.Stop(); GC.Collect(2, GCCollectionMode.Forced); if (_cbCheckAmbiguousElements.Checked) { var allDataFiles = new HashSet<string>(); var currentDir = Path.Combine(_workingDir, "Linguistics"); if (Directory.Exists(currentDir)) { allDataFiles.UnionWith(from pathname in Directory.GetFiles(currentDir, "*.*", SearchOption.AllDirectories) where !pathname.ToLowerInvariant().EndsWith("chorusnotes") select pathname); } currentDir = Path.Combine(_workingDir, "Anthropology"); if (Directory.Exists(currentDir)) { allDataFiles.UnionWith(from pathname in Directory.GetFiles(currentDir, "*.*", SearchOption.AllDirectories) where !pathname.ToLowerInvariant().EndsWith("chorusnotes") select pathname); } currentDir = Path.Combine(_workingDir, "Other"); if (Directory.Exists(currentDir)) { allDataFiles.UnionWith( from pathname in Directory.GetFiles(currentDir, "*.*", SearchOption.AllDirectories) where !pathname.ToLowerInvariant().EndsWith("chorusnotes") select pathname); } currentDir = Path.Combine(_workingDir, "General"); if (Directory.Exists(currentDir)) { allDataFiles.UnionWith(from pathname in Directory.GetFiles(currentDir, "*.*", SearchOption.AllDirectories) where !pathname.ToLowerInvariant().EndsWith("chorusnotes") select pathname); } var mergeOrder = new MergeOrder(null, null, null, new NullMergeSituation()) { EventListener = new ChangeAndConflictAccumulator() }; var merger = FieldWorksMergeServices.CreateXmlMergerForFieldWorksData(mergeOrder, MetadataCache.MdCache); ambiguousTimer.Start(); foreach (var dataFile in allDataFiles) { var extension = Path.GetExtension(dataFile).Substring(1); string optionalElementName = null; string mainRecordName = null; switch (extension) { case SharedConstants.Style: mainRecordName = SharedConstants.StStyle; break; case SharedConstants.List: mainRecordName = SharedConstants.CmPossibilityList; break; case SharedConstants.langproj: mainRecordName = SharedConstants.LangProject; break; case SharedConstants.Annotation: mainRecordName = SharedConstants.CmAnnotation; break; case SharedConstants.Filter: mainRecordName = SharedConstants.CmFilter; break; case SharedConstants.orderings: mainRecordName = SharedConstants.VirtualOrdering; break; case SharedConstants.pictures: mainRecordName = SharedConstants.CmPicture; break; case SharedConstants.ArchivedDraft: mainRecordName = SharedConstants.ScrDraft; break; case SharedConstants.ImportSetting: mainRecordName = SharedConstants.ScrImportSet; break; case SharedConstants.Srs: mainRecordName = SharedConstants.ScrRefSystem; break; case SharedConstants.Trans: mainRecordName = SharedConstants.Scripture; break; case SharedConstants.bookannotations: mainRecordName = SharedConstants.ScrBookAnnotations; break; case SharedConstants.book: mainRecordName = SharedConstants.ScrBook; break; case SharedConstants.Ntbk: optionalElementName = SharedConstants.Header; mainRecordName = SharedConstants.RnGenericRec; break; case SharedConstants.Reversal: optionalElementName = SharedConstants.Header; mainRecordName = SharedConstants.ReversalIndexEntry; break; case SharedConstants.Lexdb: optionalElementName = SharedConstants.Header; mainRecordName = SharedConstants.LexEntry; break; case SharedConstants.TextInCorpus: mainRecordName = SharedConstants.Text; break; case SharedConstants.Inventory: optionalElementName = SharedConstants.Header; mainRecordName = SharedConstants.WfiWordform; break; case SharedConstants.DiscourseExt: optionalElementName = SharedConstants.Header; mainRecordName = SharedConstants.DsChart; break; case SharedConstants.Featsys: mainRecordName = SharedConstants.FsFeatureSystem; break; case SharedConstants.Phondata: mainRecordName = SharedConstants.PhPhonData; break; case SharedConstants.Morphdata: mainRecordName = SharedConstants.MoMorphData; break; case SharedConstants.Agents: mainRecordName = SharedConstants.CmAgent; break; } using (var fastSplitter = new FastXmlElementSplitter(dataFile)) { bool foundOptionalFirstElement; foreach (var record in fastSplitter.GetSecondLevelElementBytes(optionalElementName, mainRecordName, out foundOptionalFirstElement)) { XmlMergeService.RemoveAmbiguousChildren(merger.EventListener, merger.MergeStrategies, CreateXmlNodeFromBytes(record)); } } } ambiguousTimer.Stop(); foreach (var warning in ((ChangeAndConflictAccumulator)merger.EventListener).Warnings) { sbValidation.AppendLine(warning.Description); sbValidation.AppendLine(); sbValidation.AppendLine(warning.HtmlDetails); sbValidation.AppendLine(); } GC.Collect(2, GCCollectionMode.Forced); } restoreTimer.Start(); FLExProjectUnifier.PutHumptyTogetherAgain(new NullProgress(), _srcFwdataPathname); restoreTimer.Stop(); GC.Collect(2, GCCollectionMode.Forced); }
private MetadataCache GetFreshMdc() { var mdc = MetadataCache.TestOnlyNewCache; var modelVersionPathname = Path.Combine(_workingDir, SharedConstants.ModelVersionFilename); if (!File.Exists(modelVersionPathname)) { FLExProjectSplitter.WriteVersionFile(_srcFwdataPathname); using (var fastSplitter = new FastXmlElementSplitter(_srcFwdataPathname)) { bool foundOptionalFirstElement; // NB: The main input file *does* have to deal with the optional first element. foreach (var record in fastSplitter.GetSecondLevelElementBytes(SharedConstants.AdditionalFieldsTag, SharedConstants.RtTag, out foundOptionalFirstElement)) { if (foundOptionalFirstElement) { // 2. Write custom properties file with custom properties. FileWriterService.WriteCustomPropertyFile(mdc, _workingDir, record); } else { // Write empty custom properties file. FileWriterService.WriteCustomPropertyFile(Path.Combine(_workingDir, SharedConstants.CustomPropertiesFilename), null); } break; } } } var modelData = File.ReadAllText(modelVersionPathname); mdc.UpgradeToVersion(Int32.Parse(modelData.Split(new[] { "{", ":", "}" }, StringSplitOptions.RemoveEmptyEntries)[1])); var customPropPathname = Path.Combine(_workingDir, SharedConstants.CustomPropertiesFilename); mdc.AddCustomPropInfo(new MergeOrder( customPropPathname, customPropPathname, customPropPathname, new MergeSituation(customPropPathname, "", "", "", "", MergeOrder.ConflictHandlingModeChoices.WeWin))); return mdc; }
private static void TokenizeFile(MetadataCache mdc, string srcFwdataPathname, Dictionary<string, SortedDictionary<string, byte[]>> unownedObjects, Dictionary<string, SortedDictionary<string, byte[]>> classData, Dictionary<string, string> guidToClassMapping) { using (var fastSplitter = new FastXmlElementSplitter(srcFwdataPathname)) { bool foundOptionalFirstElement; // NB: The main input file *does* have to deal with the optional first element. foreach (var record in fastSplitter.GetSecondLevelElementBytes(SharedConstants.AdditionalFieldsTag, SharedConstants.RtTag, out foundOptionalFirstElement)) { if (foundOptionalFirstElement) { // Cache custom prop file for later write. var cpElement = DataSortingService.SortCustomPropertiesRecord(SharedConstants.Utf8.GetString(record)); // Add custom property info to MDC, since it may need to be sorted in the data files. foreach (var propElement in cpElement.Elements(SharedConstants.CustomField)) { var className = propElement.Attribute(SharedConstants.Class).Value; var propName = propElement.Attribute(SharedConstants.Name).Value; var typeAttr = propElement.Attribute("type"); var adjustedTypeValue = MetadataCache.AdjustedPropertyType(typeAttr.Value); if (adjustedTypeValue != typeAttr.Value) typeAttr.Value = adjustedTypeValue; var customProp = new FdoPropertyInfo( propName, typeAttr.Value, true); mdc.AddCustomPropInfo( className, customProp); } mdc.ResetCaches(); //optionalFirstElement = Utf8.GetBytes(cpElement.ToString()); foundOptionalFirstElement = false; } else { CacheDataRecord(unownedObjects, classData, guidToClassMapping, record); } } } GC.Collect(2, GCCollectionMode.Forced); }
private static Dictionary<string, string> WriteOrCacheProperties(string mainFilePathname, Dictionary<string, SortedDictionary<string, byte[]>> classData, Dictionary<string, XElement> wellUsedElements) { var pathRoot = Path.GetDirectoryName(mainFilePathname); var mdc = MetadataCache.MdCache; // Key is the guid of the object, and value is the class name. var guidToClassMapping = new Dictionary<string, string>(); using (var fastSplitter = new FastXmlElementSplitter(mainFilePathname)) { var haveWrittenCustomFile = false; bool foundOptionalFirstElement; // NB: The main input file *does* have to deal with the optional first element. foreach (var record in fastSplitter.GetSecondLevelElementBytes(SharedConstants.AdditionalFieldsTag, SharedConstants.RtTag, out foundOptionalFirstElement)) { if (foundOptionalFirstElement) { // 2. Write custom properties file with custom properties. FileWriterService.WriteCustomPropertyFile(mdc, pathRoot, record); foundOptionalFirstElement = false; haveWrittenCustomFile = true; } else { CacheDataRecord(record, wellUsedElements, classData, guidToClassMapping); } } if (!haveWrittenCustomFile) { // Write empty custom properties file. FileWriterService.WriteCustomPropertyFile(Path.Combine(pathRoot, SharedConstants.CustomPropertiesFilename), null); } } return guidToClassMapping; }