Example #1
0
        public void Split(ESplitType splitType, int splitSize,string fileSuffixXpath)
        {
            _data.Read();

            //Capture all of the items before the first element
            while (_data.NodeType != XmlNodeType.Element)
            {
                _output.WriteNode(_data, true);
            }

            //Prepare the document header and footer sections for use later...
            _output.WriteStartElement(_data.Name);
            _output.WriteAttributes(_data, true);
            var header = _outputBuilder.ToString() + ">";
            var footer = "</" + _data.Name + ">";

            //skip past the first node
            _data.Read();

            int elementCounter = 0;
            int splitCount = 0;

            bool emptyDocument = false;

            FileInfo dataFI = new FileInfo(_dataPath);
            string outputFileName = dataFI.Name.Substring(0,dataFI.Name.LastIndexOf("."));

            StreamWriter writer = null;
            while (!_data.EOF)
            {
                //Only count the nodes that interest us...
                if (!IgnorableNodeType(_data.NodeType))
                    elementCounter++;
                else
                    emptyDocument = false;

                //copy everything from the reader
                _output.WriteNode(_data, true);

                if ((splitType == ESplitType.ElementCount && elementCounter >= splitSize) || (splitType == ESplitType.Filesize && (_outputBuilder.Length - header.Length) >= splitSize*1000))
                {
                    ////construct the final XML string
                    ExportXmlFile(footer, writer, outputFileName, splitCount);

                    //reset counters
                    splitCount++;
                    elementCounter = 0;
                    //reset string builder
                    _outputBuilder.Length = 0;
                    _outputBuilder.Append(header);
                    emptyDocument = true;
                }
            }

            //If there is anything left, export it - no footer required!
            if(_outputBuilder.Length > 0)
                ExportXmlFile("", writer, outputFileName, splitCount);

            SplitTotal = splitCount;
        }
Example #2
0
        private static void GetSplitConfig()
        {
            var config = new System.Xml.XmlDocument();

            config.Load(_configPath);

            var split = config.SelectSingleNode("/config/split");

            if (split == null)
            {
                throw new Exception("No <split> element defined in config.");
            }

            if (split.Attributes["type"] == null || split.Attributes["size"] == null)
            {
                throw new Exception("type, size, directoryname attributes were not defined for <split> element in config");
            }

            //get config
            switch (split.Attributes["type"].Value.ToLower())
            {
            case "elementcount":
                _splitType = ESplitType.ElementCount;
                break;

            case "filesize":
                _splitType = ESplitType.Filesize;
                break;

            default:
                throw new Exception("Unrecognized split type --> must be elementcount or filesize");
            }

            if (!int.TryParse(split.Attributes["size"].Value, out _splitSize))
            {
                throw new Exception("size attribute not integer value");
            }

            //now all config parameters obtained need to:
            //1) create a folder to place all of the chunks of origial data
            //2) find the folder for the output and name appropriately for each one

            if (split.Attributes["inputdirectoryname"] != null)
            {
                _inputDirectoryName = split.Attributes["inputdirectoryname"].Value;
            }
            if (split.Attributes["outputdirectoryname"] != null)
            {
                _outputDirectoryName = split.Attributes["outputdirectoryname"].Value;
            }
            if (split.Attributes["filesuffix"] != null)
            {
                _fileSuffixXpath = split.Attributes["filesuffix"].Value;
            }

            //input chunks folder - get the folder of the _dataPath
            FileInfo _dataFileInfo = new FileInfo(_dataPath);

            //_inputChunkingFolder =  _dataFileInfo.Directory.FullName + "\\" + _inputDirectoryName;
            _inputChunkingFolder = Path.Combine(_dataFileInfo.Directory.FullName, _inputDirectoryName);

            Directory.CreateDirectory(_inputChunkingFolder);

            //rdf output chunks folder - get the folder of the _outputPath
            _outputChunkingFolder = Path.Combine(_outputPath, _outputDirectoryName);
            Directory.CreateDirectory(_outputChunkingFolder);
        }
Example #3
0
        private static void GetSplitConfig()
        {
            var config = new System.Xml.XmlDocument();
            config.Load(_configPath);

            var split = config.SelectSingleNode("/config/split");

            if (split == null)
                throw new Exception("No <split> element defined in config.");

            if (split.Attributes["type"] == null || split.Attributes["size"] == null)
                throw new Exception("type, size, directoryname attributes were not defined for <split> element in config");

            //get config
            switch (split.Attributes["type"].Value.ToLower())
            {
                case "elementcount":
                    _splitType = ESplitType.ElementCount;
                    break;
                case "filesize":
                    _splitType = ESplitType.Filesize;
                    break;
                default:
                    throw new Exception("Unrecognized split type --> must be elementcount or filesize");
            }

            if (!int.TryParse(split.Attributes["size"].Value, out _splitSize))
                throw new Exception("size attribute not integer value");

            //now all config parameters obtained need to:
            //1) create a folder to place all of the chunks of origial data
            //2) find the folder for the output and name appropriately for each one

            if (split.Attributes["inputdirectoryname"] != null)
                _inputDirectoryName = split.Attributes["inputdirectoryname"].Value;
            if (split.Attributes["outputdirectoryname"] != null)
                _outputDirectoryName = split.Attributes["outputdirectoryname"].Value;
            if (split.Attributes["filesuffix"] != null)
                _fileSuffixXpath = split.Attributes["filesuffix"].Value;

            //input chunks folder - get the folder of the _dataPath
            FileInfo _dataFileInfo = new FileInfo(_dataPath);

            //_inputChunkingFolder =  _dataFileInfo.Directory.FullName + "\\" + _inputDirectoryName;
            _inputChunkingFolder = Path.Combine(_dataFileInfo.Directory.FullName, _inputDirectoryName);

            Directory.CreateDirectory(_inputChunkingFolder);

            //rdf output chunks folder - get the folder of the _outputPath
            _outputChunkingFolder = Path.Combine(_outputPath, _outputDirectoryName);
            Directory.CreateDirectory(_outputChunkingFolder);
        }
Example #4
0
        public void Split(ESplitType splitType, int splitSize, string fileSuffixXpath)
        {
            _data.Read();

            //Capture all of the items before the first element
            while (_data.NodeType != XmlNodeType.Element)
            {
                _output.WriteNode(_data, true);
            }

            //Prepare the document header and footer sections for use later...
            _output.WriteStartElement(_data.Name);
            _output.WriteAttributes(_data, true);
            var header = _outputBuilder.ToString() + ">";
            var footer = "</" + _data.Name + ">";

            //skip past the first node
            _data.Read();

            int elementCounter = 0;
            int splitCount     = 0;

            bool emptyDocument = false;

            FileInfo dataFI         = new FileInfo(_dataPath);
            string   outputFileName = dataFI.Name.Substring(0, dataFI.Name.LastIndexOf("."));

            StreamWriter writer = null;

            while (!_data.EOF)
            {
                //Only count the nodes that interest us...
                if (!IgnorableNodeType(_data.NodeType))
                {
                    elementCounter++;
                }
                else
                {
                    emptyDocument = false;
                }

                //copy everything from the reader
                _output.WriteNode(_data, true);

                if ((splitType == ESplitType.ElementCount && elementCounter >= splitSize) || (splitType == ESplitType.Filesize && (_outputBuilder.Length - header.Length) >= splitSize * 1000))
                {
                    ////construct the final XML string
                    ExportXmlFile(footer, writer, outputFileName, splitCount);

                    //reset counters
                    splitCount++;
                    elementCounter = 0;
                    //reset string builder
                    _outputBuilder.Length = 0;
                    _outputBuilder.Append(header);
                    emptyDocument = true;
                }
            }

            //If there is anything left, export it - no footer required!
            if (_outputBuilder.Length > 0)
            {
                ExportXmlFile("", writer, outputFileName, splitCount);
            }

            SplitTotal = splitCount;
        }