public void Split(ESplitType splitType, int splitSize,string fileSuffixXpath) { _data.Read(); //Capture all of the items before the first element while (_data.NodeType != XmlNodeType.Element) { _output.WriteNode(_data, true); } //Prepare the document header and footer sections for use later... _output.WriteStartElement(_data.Name); _output.WriteAttributes(_data, true); var header = _outputBuilder.ToString() + ">"; var footer = "</" + _data.Name + ">"; //skip past the first node _data.Read(); int elementCounter = 0; int splitCount = 0; bool emptyDocument = false; FileInfo dataFI = new FileInfo(_dataPath); string outputFileName = dataFI.Name.Substring(0,dataFI.Name.LastIndexOf(".")); StreamWriter writer = null; while (!_data.EOF) { //Only count the nodes that interest us... if (!IgnorableNodeType(_data.NodeType)) elementCounter++; else emptyDocument = false; //copy everything from the reader _output.WriteNode(_data, true); if ((splitType == ESplitType.ElementCount && elementCounter >= splitSize) || (splitType == ESplitType.Filesize && (_outputBuilder.Length - header.Length) >= splitSize*1000)) { ////construct the final XML string ExportXmlFile(footer, writer, outputFileName, splitCount); //reset counters splitCount++; elementCounter = 0; //reset string builder _outputBuilder.Length = 0; _outputBuilder.Append(header); emptyDocument = true; } } //If there is anything left, export it - no footer required! if(_outputBuilder.Length > 0) ExportXmlFile("", writer, outputFileName, splitCount); SplitTotal = splitCount; }
private static void GetSplitConfig() { var config = new System.Xml.XmlDocument(); config.Load(_configPath); var split = config.SelectSingleNode("/config/split"); if (split == null) { throw new Exception("No <split> element defined in config."); } if (split.Attributes["type"] == null || split.Attributes["size"] == null) { throw new Exception("type, size, directoryname attributes were not defined for <split> element in config"); } //get config switch (split.Attributes["type"].Value.ToLower()) { case "elementcount": _splitType = ESplitType.ElementCount; break; case "filesize": _splitType = ESplitType.Filesize; break; default: throw new Exception("Unrecognized split type --> must be elementcount or filesize"); } if (!int.TryParse(split.Attributes["size"].Value, out _splitSize)) { throw new Exception("size attribute not integer value"); } //now all config parameters obtained need to: //1) create a folder to place all of the chunks of origial data //2) find the folder for the output and name appropriately for each one if (split.Attributes["inputdirectoryname"] != null) { _inputDirectoryName = split.Attributes["inputdirectoryname"].Value; } if (split.Attributes["outputdirectoryname"] != null) { _outputDirectoryName = split.Attributes["outputdirectoryname"].Value; } if (split.Attributes["filesuffix"] != null) { _fileSuffixXpath = split.Attributes["filesuffix"].Value; } //input chunks folder - get the folder of the _dataPath FileInfo _dataFileInfo = new FileInfo(_dataPath); //_inputChunkingFolder = _dataFileInfo.Directory.FullName + "\\" + _inputDirectoryName; _inputChunkingFolder = Path.Combine(_dataFileInfo.Directory.FullName, _inputDirectoryName); Directory.CreateDirectory(_inputChunkingFolder); //rdf output chunks folder - get the folder of the _outputPath _outputChunkingFolder = Path.Combine(_outputPath, _outputDirectoryName); Directory.CreateDirectory(_outputChunkingFolder); }
private static void GetSplitConfig() { var config = new System.Xml.XmlDocument(); config.Load(_configPath); var split = config.SelectSingleNode("/config/split"); if (split == null) throw new Exception("No <split> element defined in config."); if (split.Attributes["type"] == null || split.Attributes["size"] == null) throw new Exception("type, size, directoryname attributes were not defined for <split> element in config"); //get config switch (split.Attributes["type"].Value.ToLower()) { case "elementcount": _splitType = ESplitType.ElementCount; break; case "filesize": _splitType = ESplitType.Filesize; break; default: throw new Exception("Unrecognized split type --> must be elementcount or filesize"); } if (!int.TryParse(split.Attributes["size"].Value, out _splitSize)) throw new Exception("size attribute not integer value"); //now all config parameters obtained need to: //1) create a folder to place all of the chunks of origial data //2) find the folder for the output and name appropriately for each one if (split.Attributes["inputdirectoryname"] != null) _inputDirectoryName = split.Attributes["inputdirectoryname"].Value; if (split.Attributes["outputdirectoryname"] != null) _outputDirectoryName = split.Attributes["outputdirectoryname"].Value; if (split.Attributes["filesuffix"] != null) _fileSuffixXpath = split.Attributes["filesuffix"].Value; //input chunks folder - get the folder of the _dataPath FileInfo _dataFileInfo = new FileInfo(_dataPath); //_inputChunkingFolder = _dataFileInfo.Directory.FullName + "\\" + _inputDirectoryName; _inputChunkingFolder = Path.Combine(_dataFileInfo.Directory.FullName, _inputDirectoryName); Directory.CreateDirectory(_inputChunkingFolder); //rdf output chunks folder - get the folder of the _outputPath _outputChunkingFolder = Path.Combine(_outputPath, _outputDirectoryName); Directory.CreateDirectory(_outputChunkingFolder); }
public void Split(ESplitType splitType, int splitSize, string fileSuffixXpath) { _data.Read(); //Capture all of the items before the first element while (_data.NodeType != XmlNodeType.Element) { _output.WriteNode(_data, true); } //Prepare the document header and footer sections for use later... _output.WriteStartElement(_data.Name); _output.WriteAttributes(_data, true); var header = _outputBuilder.ToString() + ">"; var footer = "</" + _data.Name + ">"; //skip past the first node _data.Read(); int elementCounter = 0; int splitCount = 0; bool emptyDocument = false; FileInfo dataFI = new FileInfo(_dataPath); string outputFileName = dataFI.Name.Substring(0, dataFI.Name.LastIndexOf(".")); StreamWriter writer = null; while (!_data.EOF) { //Only count the nodes that interest us... if (!IgnorableNodeType(_data.NodeType)) { elementCounter++; } else { emptyDocument = false; } //copy everything from the reader _output.WriteNode(_data, true); if ((splitType == ESplitType.ElementCount && elementCounter >= splitSize) || (splitType == ESplitType.Filesize && (_outputBuilder.Length - header.Length) >= splitSize * 1000)) { ////construct the final XML string ExportXmlFile(footer, writer, outputFileName, splitCount); //reset counters splitCount++; elementCounter = 0; //reset string builder _outputBuilder.Length = 0; _outputBuilder.Append(header); emptyDocument = true; } } //If there is anything left, export it - no footer required! if (_outputBuilder.Length > 0) { ExportXmlFile("", writer, outputFileName, splitCount); } SplitTotal = splitCount; }