/// <summary> Reads the data from a XML Node Reader </summary> /// <param name="nodeReader">XML Node Reader </param> /// <param name="record"> Record into which to read the contents of the MarcXML file </param> /// <returns>TRUE if successful, otherwise FALSE </returns> public static bool ReadMarcInfo(XmlTextReader nodeReader, MarcRecord record) { try { // Move to the this node MoveToNode(nodeReader, "record"); // Get the leader information int tag = -1; while (nodeReader.Read()) { if ((nodeReader.NodeType == XmlNodeType.EndElement) && (nodeReader.Name == "record")) { return(true); } if (nodeReader.NodeType == XmlNodeType.Element) { switch (nodeReader.Name.Trim().Replace("marc:", "")) { case "leader": nodeReader.Read(); record.Leader = nodeReader.Value; break; case "controlfield": // Get the tag if (nodeReader.MoveToAttribute("tag")) { // The tag should always be numeric per the schema, but just relaxing this // for invalid MARC so the rest of the data can be successfully read. if (Int32.TryParse(nodeReader.Value, out tag)) { // Move to the value and then add this nodeReader.Read(); record.AddField(tag, nodeReader.Value); } } break; case "datafield": // Set the default indicators char ind1 = ' '; char ind2 = ' '; // Get the indicators if they exist while (nodeReader.MoveToNextAttribute()) { if (nodeReader.Name.Trim() == "ind1") { string temp1 = nodeReader.Value; if (temp1.Length > 0) { ind1 = temp1[0]; } } if (nodeReader.Name.Trim() == "ind2") { string temp2 = nodeReader.Value; if (temp2.Length > 0) { ind2 = temp2[0]; } } if (nodeReader.Name.Trim() == "tag") { tag = Convert.ToInt32(nodeReader.Value); } } // Add this datafield MarcField newField = record.AddField(tag, ind1, ind2); // Now, add each subfield while (nodeReader.Read()) { if ((nodeReader.NodeType == XmlNodeType.EndElement) && (nodeReader.Name.Replace("marc:", "") == "datafield")) { break; } if ((nodeReader.NodeType == XmlNodeType.Element) && (nodeReader.Name.Replace("marc:", "") == "subfield")) { // Get the code nodeReader.MoveToFirstAttribute(); char subfield = nodeReader.Value.Length > 0 ? nodeReader.Value[0] : ' '; // Get the value nodeReader.Read(); string dataValue = nodeReader.Value; // Save this subfield newField.Add_Subfield(subfield, dataValue); // Do some special stuff if this is the 260 if (tag == 260) { newField.ControlFieldValue = newField.ControlFieldValue + "|" + subfield + " " + dataValue + " "; } } } break; } } } return(true); } catch { return(false); } }
private MarcRecord parse_next_record() { // Create the MARC record to return and subfield collection var thisRecord = new MarcRecord(); var fieldDatas = new Dictionary <short, ParserVariableFieldData>(); try { // Some values to check the end of the file long fileLength = _reader.BaseStream.Length; // Create the StringBuilder object for this record var leaderBuilder = new StringBuilder(30); // Read to first character int result = _reader.Read(); bool eof = false; // Read the leader and directory directly into a string, since this will not have specially // coded characters ( leader and directory end with a RECORD_SEPERATOR ) int count = 0; while ((!eof) && (result != EndOfRecord) && (result != RecordSeperator) && (count < 24)) { // Want to skip any special characters at the beginning (like encoding characters) if (result < 127) { // Save this character directly leaderBuilder.Append((char)result); count++; } // Read the next character and increment the count if (_reader.BaseStream.Position < fileLength) { result = _reader.ReadByte(); } else { eof = true; } } // If this is the empty string, then just return null (DONE!) if (eof) { //set flag to indicate that the EOF has been reached EofFlag = true; // Close the reader Close(); // return a null value to end file processing of the MARC file return(null); } // Ensure the leader was correctly retrieved if (leaderBuilder.Length < 24) { throw new ApplicationException( "Error reading leader. Either end of file, group seperator, or record seperator found prematurely."); } // Save the leader into the record thisRecord.Leader = leaderBuilder.ToString(); // Verify the type of character encoding used here RecordCharacterEncoding encoding = RecordCharacterEncoding.Unrecognized; switch (thisRecord.Leader[9]) { case ' ': encoding = RecordCharacterEncoding.Marc; break; case 'a': encoding = RecordCharacterEncoding.Unicode; break; } // Now, read in all the directory information var directoryEntries = new List <ParserDirectoryEntry>(); count = 0; int tag = 0; int fieldLength = 0; int startingPosition = 0; while ((result != EndOfRecord) && (result != RecordSeperator)) { // Set the temp value to zero here short temp = 0; if (!short.TryParse(((char)result).ToString(), out temp)) { if (ActionOnError == ActionOnErrorEncounteredEnum.StoreInRecord) { thisRecord.AddError(MarcRecordParsingErrorTypeEnum.InvalidDirectoryEncountered, "Found invalid (non-numeric) character in a directory entry."); } else { throw new ApplicationException("Found invalid (non-numeric) character in a directory entry."); } } // Increment different values, depending on how far into this directory // the reader has gotten. switch (count) { case 0: case 1: case 2: tag = (tag * 10) + temp; break; case 3: case 4: case 5: case 6: fieldLength = (fieldLength * 10) + temp; break; case 7: case 8: case 9: case 10: case 11: startingPosition = (startingPosition * 10) + temp; break; } // Read the next character result = _reader.Read(); count++; // If this directory entry has been completely read, save it // and reset the values for the next directory if (count == 12) { directoryEntries.Add(new ParserDirectoryEntry((short)tag, (short)fieldLength, (short)startingPosition)); tag = 0; fieldLength = 0; startingPosition = 0; count = 0; } } // Use a memory stream to accumulate bytes (we don't yet know the character // encoding for this record, so needs to remain bytes ) var byteFieldBuilder = new MemoryStream(); // Read all the data from the variable fields count = 0; var startIndex = 0; short lastFieldStartIndex = 0; result = _reader.Read(); while (result != EndOfRecord) { // Was this the end of the field (or tag)? if (result == RecordSeperator) { // Get the value for this field byte[] fieldAsByteArray = byteFieldBuilder.ToArray(); // Get the field as string, depending on the encoding string fieldAsString; switch (encoding) { case RecordCharacterEncoding.Marc: fieldAsString = ConvertMarcBytesToUnicodeString(fieldAsByteArray); break; default: fieldAsString = Encoding.UTF8.GetString(fieldAsByteArray); break; } // Clear the byte field builder (create new memory stream) byteFieldBuilder = new MemoryStream(); // Add the field to the list of variable data fieldDatas.Add((short)startIndex, new ParserVariableFieldData((short)startIndex, fieldAsString)); // This may be the last field, so save this index lastFieldStartIndex = (short)startIndex; // Save the count as the next start index startIndex = count + 1; } else { // Save this byte byteFieldBuilder.WriteByte((byte)result); } // Read the next character result = _reader.ReadByte(); count++; } // Now, step through the directory, retrieve each pre-converted field data, // and finish parsing int directoryErrorCorrection = 0; foreach (ParserDirectoryEntry directoryEntry in directoryEntries) { // Get the field if (!fieldDatas.ContainsKey((short)(directoryEntry.StartingPosition + directoryErrorCorrection))) { while ( (!fieldDatas.ContainsKey( (short)(directoryEntry.StartingPosition + directoryErrorCorrection))) && (lastFieldStartIndex > directoryEntry.StartingPosition + directoryErrorCorrection)) { directoryErrorCorrection += 1; } // If this still didn't work, throw the exception if ( !fieldDatas.ContainsKey( (short)(directoryEntry.StartingPosition + directoryErrorCorrection))) { if (ActionOnError == ActionOnErrorEncounteredEnum.StoreInRecord) { thisRecord.AddError( MarcRecordParsingErrorTypeEnum.DirectoryFieldMismatchUnhandled); } else { throw new ApplicationException( "Field indexes and directory information cannot be resolved with one another."); } } else { // This worked, but add a warning none-the-less thisRecord.AddWarning( MarcRecordParsingWarningTypeEnum.DirectoryFieldMismatchHandled); } } var fieldData = fieldDatas[(short)(directoryEntry.StartingPosition + directoryErrorCorrection)]; var variableFieldData = fieldData.FieldData; // See if this row has an indicator var indicator = ""; if ((variableFieldData.Length > 3) && (variableFieldData[2] == (UnitSeperator))) { indicator = variableFieldData.Substring(0, 2); variableFieldData = variableFieldData.Substring(2); } else { variableFieldData = variableFieldData.Substring(0); } // Is this split into seperate subfields? if ((variableFieldData.Length > 1) && (variableFieldData[0] == (UnitSeperator))) { // Split this into subfields var subfields = variableFieldData.Substring(1).Split(UnitSeperator); // Create the new field var newField = new MarcField { Tag = Convert.ToInt32(directoryEntry.Tag), Indicators = indicator }; // Step through each subfield foreach (string thisSubfield in subfields) { // Add this subfield newField.Add_Subfield(thisSubfield[0], thisSubfield.Substring(1)); } // Add this entry to the current record thisRecord.AddField(newField); } else { // Must be just one subfield thisRecord.AddField(Convert.ToInt32(directoryEntry.Tag), variableFieldData); } } // if this was MARC8 encoding originally, change the encoding specified in the // leader, since this was converted to Unicode if (encoding == RecordCharacterEncoding.Marc) { thisRecord.Leader = thisRecord.Leader.Substring(0, 9) + "a" + thisRecord.Leader.Substring(10); } } catch (EndOfStreamException) { if (ActionOnError == ActionOnErrorEncounteredEnum.StoreInRecord) { thisRecord.AddError(MarcRecordParsingErrorTypeEnum.UnexpectedEndOfStreamEncountered); } else { throw new ApplicationException( "Unexpected end of stream encountered! Input stream may be invalid format or truncated."); } } return(thisRecord); }