コード例 #1
0
        /// <summary>Extract is called at least once per instance</summary>
        /// <param name="input">Wrapper for a Stream</param>
        /// <param name="output">IUpdatableRow uses a mutable builder pattern --
        /// set individual fields with IUpdatableRow.Set, then build an immutable IRow by
        /// calling IUpdatableRow.AsReadOnly.</param>
        /// <returns>IEnumerable of IRow, one IRow per SQLIP row.</returns>
        public override IEnumerable <IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
        {
            // Make sure that all requested columns are of type string
            IColumn column = output.Schema.FirstOrDefault(col => col.Type != typeof(string));

            if (column != null)
            {
                throw new ArgumentException(string.Format("Column '{0}' must be of type 'string', not '{1}'", column.Name, column.Type.Name));
            }

            var state = new ParseState();

            state.ClearAndJump(ParseLocation.Row);
            using (var reader = XmlReader.Create(input.BaseStream))
            {
                while (reader.Read())
                {
                    switch (state.Location)
                    {
                    case ParseLocation.Row:
                        // when looking for a new row, we are only interested in elements
                        // whose name matches the requested row element
                        if (reader.NodeType == XmlNodeType.Element && reader.Name == this.rowPath)
                        {
                            // when found, clear the IUpdatableRow's memory
                            // (this is no provided Clear method)
                            for (int i = 0; i < output.Schema.Count; i++)
                            {
                                output.Set <string>(i, null);
                            }

                            state.ClearAndJump(ParseLocation.Column);
                        }

                        break;

                    case ParseLocation.Column:
                        // When looking for a new column, we are interested in elements
                        // whose name is a key in the columnPaths map or
                        // whose name is in the requested output schema.
                        // This indicates a column whose value needs to be read,
                        // so prepare for reading it by clearing elementValue.
                        if (reader.NodeType == XmlNodeType.Element &&
                            (this.columnPaths.ContainsKey(reader.Name) ||
                             output.Schema.Select(c => c.Name).Contains(reader.Name)))
                        {
                            if (reader.IsEmptyElement)
                            {
                                // For an empty element, set an empty string
                                // and immediately jump to looking for the next column
                                output.Set(this.columnPaths[reader.Name] ?? reader.Name, state.ReadElementValue());
                                state.ClearAndJump(ParseLocation.Column);
                            }
                            else
                            {
                                state.Location    = ParseLocation.Data;
                                state.ElementName = reader.Name;
                                state.ClearElementValue();
                            }
                        }
                        else if (reader.NodeType == XmlNodeType.EndElement && reader.Name == this.rowPath)
                        {
                            // The other interesting case is an end element whose name matches
                            // the current row element. This indicates the end of a row,
                            // so yield the now-complete row and jump to looking for
                            // another row.
                            yield return(output.AsReadOnly());

                            state.ClearAndJump(ParseLocation.Row);
                        }

                        break;

                    case ParseLocation.Data:
                        // Most of the code for reading the value of a column
                        // deals with re-creating the inner XML from discrete elements.
                        // The only jump occurs when the reader hits an end element
                        // whose name matches the current column. In this case, we
                        // need to write the accumulated value to the appropriate
                        // column in the output row.
                        switch (reader.NodeType)
                        {
                        case XmlNodeType.EndElement:
                            if (reader.Name == state.ElementName)
                            {
                                output.Set(this.columnPaths[state.ElementName] ?? state.ElementName, state.ReadElementValue());
                                state.ClearAndJump(ParseLocation.Column);
                            }
                            else
                            {
                                state.ElementWriter.WriteEndElement();
                            }

                            break;

                        case XmlNodeType.Element:
                            state.ElementWriter.WriteStartElement(reader.Name);
                            state.ElementWriter.WriteAttributes(reader, false);
                            if (reader.IsEmptyElement)
                            {
                                state.ElementWriter.WriteEndElement();
                            }

                            break;

                        case XmlNodeType.CDATA:
                            state.ElementWriter.WriteCData(reader.Value);
                            break;

                        case XmlNodeType.Comment:
                            state.ElementWriter.WriteComment(reader.Value);
                            break;

                        case XmlNodeType.ProcessingInstruction:
                            state.ElementWriter.WriteProcessingInstruction(reader.Name, reader.Value);
                            break;

                        default:
                            state.ElementWriter.WriteString(reader.Value);
                            break;
                        }

                        break;

                    default:
                        throw new NotImplementedException("StreamFromXml has not implemented a new member of the ParseLocation enum");
                    }
                }

                if (state.Location != ParseLocation.Row)
                {
                    throw new ArgumentException("XML document ended without proper closing tags");
                }
            }
        }
コード例 #2
0
ファイル: XmlExtractor.cs プロジェクト: hughwasos/usql
        /// <summary>Extract is called at least once per instance</summary>
        /// <param name="input">Wrapper for a Stream</param>
        /// <param name="output">IUpdatableRow uses a mutable builder pattern -- 
        /// set individual fields with IUpdatableRow.Set, then build an immutable IRow by
        /// calling IUpdatableRow.AsReadOnly.</param>
        /// <returns>IEnumerable of IRow, one IRow per SQLIP row.</returns>
		public override IEnumerable<IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
		{
            // Make sure that all requested columns are of type string
            IColumn column = output.Schema.FirstOrDefault(col => col.Type != typeof(string));
            if (column != null)
            {
                throw new ArgumentException(string.Format("Column '{0}' must be of type 'string', not '{1}'", column.Name, column.Type.Name));
            }

			var state = new ParseState();
			state.ClearAndJump(ParseLocation.Row);
			using (var reader = XmlReader.Create(input.BaseStream))
			{
				while (reader.Read())
				{
					switch (state.Location)
					{
                        case ParseLocation.Row:
                            // when looking for a new row, we are only interested in elements
                            // whose name matches the requested row element
                            if (reader.NodeType == XmlNodeType.Element && reader.Name == this.rowPath)
                            {
                                // when found, clear the IUpdatableRow's memory
                                // (this is no provided Clear method)
                                for (int i = 0; i < output.Schema.Count; i++)
                                {
                                    output.Set<string>(i, null);
                                }

                                state.ClearAndJump(ParseLocation.Column);
                            }

                            break;
                        case ParseLocation.Column:
                            // When looking for a new column, we are interested in elements
                            // whose name is a key in the columnPaths map or
                            // whose name is in the requested output schema.
                            // This indicates a column whose value needs to be read, 
                            // so prepare for reading it by clearing elementValue.
                            if (reader.NodeType == XmlNodeType.Element
                                && (this.columnPaths.ContainsKey(reader.Name)
                                    || output.Schema.Select(c => c.Name).Contains(reader.Name)))
                            {
                                if (reader.IsEmptyElement)
                                {
                                    // For an empty element, set an empty string 
                                    // and immediately jump to looking for the next column
                                    output.Set(this.columnPaths[reader.Name] ?? reader.Name, state.ReadElementValue());
                                    state.ClearAndJump(ParseLocation.Column);
                                }
                                else
                                {
                                    state.Location = ParseLocation.Data;
                                    state.ElementName = reader.Name;
                                    state.ClearElementValue();
                                }
                            }
                            else if (reader.NodeType == XmlNodeType.EndElement && reader.Name == this.rowPath)
                            {
                                // The other interesting case is an end element whose name matches 
                                // the current row element. This indicates the end of a row, 
                                // so yield the now-complete row and jump to looking for 
                                // another row.
                                yield return output.AsReadOnly();
                                state.ClearAndJump(ParseLocation.Row);
                            }

                            break;
                        case ParseLocation.Data:
                            // Most of the code for reading the value of a column
                            // deals with re-creating the inner XML from discrete elements.
                            // The only jump occurs when the reader hits an end element
                            // whose name matches the current column. In this case, we
                            // need to write the accumulated value to the appropriate 
                            // column in the output row.
                            switch (reader.NodeType)
                            {
                                case XmlNodeType.EndElement:
                                    if (reader.Name == state.ElementName)
                                    {
                                        output.Set(this.columnPaths[state.ElementName] ?? state.ElementName, state.ReadElementValue());
                                        state.ClearAndJump(ParseLocation.Column);
                                    }
                                    else
                                    {
                                        state.ElementWriter.WriteEndElement();
                                    }

                                    break;
                                case XmlNodeType.Element:
                                    state.ElementWriter.WriteStartElement(reader.Name);
                                    state.ElementWriter.WriteAttributes(reader, false);
                                    if (reader.IsEmptyElement)
                                    {
                                        state.ElementWriter.WriteEndElement();
                                    }

                                    break;
                                case XmlNodeType.CDATA:
                                    state.ElementWriter.WriteCData(reader.Value);
                                    break;
                                case XmlNodeType.Comment:
                                    state.ElementWriter.WriteComment(reader.Value);
                                    break;
                                case XmlNodeType.ProcessingInstruction:
                                    state.ElementWriter.WriteProcessingInstruction(reader.Name, reader.Value);
                                    break;
                                default:
                                    state.ElementWriter.WriteString(reader.Value);
                                    break;
                            }

                            break;
                        default:
                            throw new NotImplementedException("StreamFromXml has not implemented a new member of the ParseLocation enum");
                    }
				}

                if (state.Location != ParseLocation.Row)
				{
					throw new ArgumentException("XML document ended without proper closing tags");
				}
			}
		}