private MarcRecord parse_next_record() { // Create the MARC record to return and subfield collection var thisRecord = new MarcRecord(); var fieldDatas = new Dictionary <short, ParserVariableFieldData>(); try { // Some values to check the end of the file long fileLength = _reader.BaseStream.Length; // Create the StringBuilder object for this record var leaderBuilder = new StringBuilder(30); // Read to first character int result = _reader.Read(); bool eof = false; // Read the leader and directory directly into a string, since this will not have specially // coded characters ( leader and directory end with a RECORD_SEPERATOR ) int count = 0; while ((!eof) && (result != EndOfRecord) && (result != RecordSeperator) && (count < 24)) { // Want to skip any special characters at the beginning (like encoding characters) if (result < 127) { // Save this character directly leaderBuilder.Append((char)result); count++; } // Read the next character and increment the count if (_reader.BaseStream.Position < fileLength) { result = _reader.ReadByte(); } else { eof = true; } } // If this is the empty string, then just return null (DONE!) if (eof) { //set flag to indicate that the EOF has been reached EofFlag = true; // Close the reader Close(); // return a null value to end file processing of the MARC file return(null); } // Ensure the leader was correctly retrieved if (leaderBuilder.Length < 24) { throw new ApplicationException( "Error reading leader. Either end of file, group seperator, or record seperator found prematurely."); } // Save the leader into the record thisRecord.Leader = leaderBuilder.ToString(); // Verify the type of character encoding used here RecordCharacterEncoding encoding = RecordCharacterEncoding.Unrecognized; switch (thisRecord.Leader[9]) { case ' ': encoding = RecordCharacterEncoding.Marc; break; case 'a': encoding = RecordCharacterEncoding.Unicode; break; } // Now, read in all the directory information var directoryEntries = new List <ParserDirectoryEntry>(); count = 0; int tag = 0; int fieldLength = 0; int startingPosition = 0; while ((result != EndOfRecord) && (result != RecordSeperator)) { // Set the temp value to zero here short temp = 0; if (!short.TryParse(((char)result).ToString(), out temp)) { if (ActionOnError == ActionOnErrorEncounteredEnum.StoreInRecord) { thisRecord.AddError(MarcRecordParsingErrorTypeEnum.InvalidDirectoryEncountered, "Found invalid (non-numeric) character in a directory entry."); } else { throw new ApplicationException("Found invalid (non-numeric) character in a directory entry."); } } // Increment different values, depending on how far into this directory // the reader has gotten. switch (count) { case 0: case 1: case 2: tag = (tag * 10) + temp; break; case 3: case 4: case 5: case 6: fieldLength = (fieldLength * 10) + temp; break; case 7: case 8: case 9: case 10: case 11: startingPosition = (startingPosition * 10) + temp; break; } // Read the next character result = _reader.Read(); count++; // If this directory entry has been completely read, save it // and reset the values for the next directory if (count == 12) { directoryEntries.Add(new ParserDirectoryEntry((short)tag, (short)fieldLength, (short)startingPosition)); tag = 0; fieldLength = 0; startingPosition = 0; count = 0; } } // Use a memory stream to accumulate bytes (we don't yet know the character // encoding for this record, so needs to remain bytes ) var byteFieldBuilder = new MemoryStream(); // Read all the data from the variable fields count = 0; var startIndex = 0; short lastFieldStartIndex = 0; result = _reader.Read(); while (result != EndOfRecord) { // Was this the end of the field (or tag)? if (result == RecordSeperator) { // Get the value for this field byte[] fieldAsByteArray = byteFieldBuilder.ToArray(); // Get the field as string, depending on the encoding string fieldAsString; switch (encoding) { case RecordCharacterEncoding.Marc: fieldAsString = ConvertMarcBytesToUnicodeString(fieldAsByteArray); break; default: fieldAsString = Encoding.UTF8.GetString(fieldAsByteArray); break; } // Clear the byte field builder (create new memory stream) byteFieldBuilder = new MemoryStream(); // Add the field to the list of variable data fieldDatas.Add((short)startIndex, new ParserVariableFieldData((short)startIndex, fieldAsString)); // This may be the last field, so save this index lastFieldStartIndex = (short)startIndex; // Save the count as the next start index startIndex = count + 1; } else { // Save this byte byteFieldBuilder.WriteByte((byte)result); } // Read the next character result = _reader.ReadByte(); count++; } // Now, step through the directory, retrieve each pre-converted field data, // and finish parsing int directoryErrorCorrection = 0; foreach (ParserDirectoryEntry directoryEntry in directoryEntries) { // Get the field if (!fieldDatas.ContainsKey((short)(directoryEntry.StartingPosition + directoryErrorCorrection))) { while ( (!fieldDatas.ContainsKey( (short)(directoryEntry.StartingPosition + directoryErrorCorrection))) && (lastFieldStartIndex > directoryEntry.StartingPosition + directoryErrorCorrection)) { directoryErrorCorrection += 1; } // If this still didn't work, throw the exception if ( !fieldDatas.ContainsKey( (short)(directoryEntry.StartingPosition + directoryErrorCorrection))) { if (ActionOnError == ActionOnErrorEncounteredEnum.StoreInRecord) { thisRecord.AddError( MarcRecordParsingErrorTypeEnum.DirectoryFieldMismatchUnhandled); } else { throw new ApplicationException( "Field indexes and directory information cannot be resolved with one another."); } } else { // This worked, but add a warning none-the-less thisRecord.AddWarning( MarcRecordParsingWarningTypeEnum.DirectoryFieldMismatchHandled); } } var fieldData = fieldDatas[(short)(directoryEntry.StartingPosition + directoryErrorCorrection)]; var variableFieldData = fieldData.FieldData; // See if this row has an indicator var indicator = ""; if ((variableFieldData.Length > 3) && (variableFieldData[2] == (UnitSeperator))) { indicator = variableFieldData.Substring(0, 2); variableFieldData = variableFieldData.Substring(2); } else { variableFieldData = variableFieldData.Substring(0); } // Is this split into seperate subfields? if ((variableFieldData.Length > 1) && (variableFieldData[0] == (UnitSeperator))) { // Split this into subfields var subfields = variableFieldData.Substring(1).Split(UnitSeperator); // Create the new field var newField = new MarcField { Tag = Convert.ToInt32(directoryEntry.Tag), Indicators = indicator }; // Step through each subfield foreach (string thisSubfield in subfields) { // Add this subfield newField.Add_Subfield(thisSubfield[0], thisSubfield.Substring(1)); } // Add this entry to the current record thisRecord.AddField(newField); } else { // Must be just one subfield thisRecord.AddField(Convert.ToInt32(directoryEntry.Tag), variableFieldData); } } // if this was MARC8 encoding originally, change the encoding specified in the // leader, since this was converted to Unicode if (encoding == RecordCharacterEncoding.Marc) { thisRecord.Leader = thisRecord.Leader.Substring(0, 9) + "a" + thisRecord.Leader.Substring(10); } } catch (EndOfStreamException) { if (ActionOnError == ActionOnErrorEncounteredEnum.StoreInRecord) { thisRecord.AddError(MarcRecordParsingErrorTypeEnum.UnexpectedEndOfStreamEncountered); } else { throw new ApplicationException( "Unexpected end of stream encountered! Input stream may be invalid format or truncated."); } } return(thisRecord); }