示例#1
0
        private MarcRecord parse_next_record()
        {
            // Create the MARC record to return and subfield collection
            var thisRecord = new MarcRecord();

            var fieldDatas = new Dictionary <short, ParserVariableFieldData>();

            try
            {
                // Some values to check the end of the file
                long fileLength = _reader.BaseStream.Length;

                // Create the StringBuilder object for this record
                var leaderBuilder = new StringBuilder(30);

                // Read to first character
                int  result = _reader.Read();
                bool eof    = false;

                // Read the leader and directory directly into a string, since this will not have specially
                // coded characters ( leader and directory end with a RECORD_SEPERATOR )
                int count = 0;
                while ((!eof) && (result != EndOfRecord) && (result != RecordSeperator) && (count < 24))
                {
                    // Want to skip any special characters at the beginning (like encoding characters)
                    if (result < 127)
                    {
                        // Save this character directly
                        leaderBuilder.Append((char)result);
                        count++;
                    }

                    // Read the next character and increment the count
                    if (_reader.BaseStream.Position < fileLength)
                    {
                        result = _reader.ReadByte();
                    }
                    else
                    {
                        eof = true;
                    }
                }

                // If this is the empty string, then just return null (DONE!)
                if (eof)
                {
                    //set flag to indicate that the EOF has been reached
                    EofFlag = true;

                    // Close the reader
                    Close();

                    // return a null value to end file processing of the MARC file
                    return(null);
                }

                // Ensure the leader was correctly retrieved
                if (leaderBuilder.Length < 24)
                {
                    throw new ApplicationException(
                              "Error reading leader.  Either end of file, group seperator, or record seperator found prematurely.");
                }

                // Save the leader into the record
                thisRecord.Leader = leaderBuilder.ToString();

                // Verify the type of character encoding used here
                RecordCharacterEncoding encoding = RecordCharacterEncoding.Unrecognized;
                switch (thisRecord.Leader[9])
                {
                case ' ':
                    encoding = RecordCharacterEncoding.Marc;
                    break;

                case 'a':
                    encoding = RecordCharacterEncoding.Unicode;
                    break;
                }

                // Now, read in all the directory information
                var directoryEntries = new List <ParserDirectoryEntry>();
                count = 0;
                int tag              = 0;
                int fieldLength      = 0;
                int startingPosition = 0;
                while ((result != EndOfRecord) && (result != RecordSeperator))
                {
                    // Set the temp value to zero here
                    short temp = 0;
                    if (!short.TryParse(((char)result).ToString(), out temp))
                    {
                        if (ActionOnError == ActionOnErrorEncounteredEnum.StoreInRecord)
                        {
                            thisRecord.AddError(MarcRecordParsingErrorTypeEnum.InvalidDirectoryEncountered,
                                                "Found invalid (non-numeric) character in a directory entry.");
                        }
                        else
                        {
                            throw new ApplicationException("Found invalid (non-numeric) character in a directory entry.");
                        }
                    }

                    // Increment different values, depending on how far into this directory
                    // the reader has gotten.
                    switch (count)
                    {
                    case 0:
                    case 1:
                    case 2:
                        tag = (tag * 10) + temp;
                        break;

                    case 3:
                    case 4:
                    case 5:
                    case 6:
                        fieldLength = (fieldLength * 10) + temp;
                        break;

                    case 7:
                    case 8:
                    case 9:
                    case 10:
                    case 11:
                        startingPosition = (startingPosition * 10) + temp;
                        break;
                    }

                    // Read the next character
                    result = _reader.Read();
                    count++;

                    // If this directory entry has been completely read, save it
                    // and reset the values for the next directory
                    if (count == 12)
                    {
                        directoryEntries.Add(new ParserDirectoryEntry((short)tag, (short)fieldLength,
                                                                      (short)startingPosition));
                        tag              = 0;
                        fieldLength      = 0;
                        startingPosition = 0;
                        count            = 0;
                    }
                }

                // Use a memory stream to accumulate bytes (we don't yet know the character
                // encoding for this record, so needs to remain bytes )
                var byteFieldBuilder = new MemoryStream();

                // Read all the data from the variable fields
                count = 0;
                var   startIndex          = 0;
                short lastFieldStartIndex = 0;
                result = _reader.Read();
                while (result != EndOfRecord)
                {
                    // Was this the end of the field (or tag)?
                    if (result == RecordSeperator)
                    {
                        // Get the value for this field
                        byte[] fieldAsByteArray = byteFieldBuilder.ToArray();

                        // Get the field as string, depending on the encoding
                        string fieldAsString;
                        switch (encoding)
                        {
                        case RecordCharacterEncoding.Marc:
                            fieldAsString = ConvertMarcBytesToUnicodeString(fieldAsByteArray);
                            break;

                        default:
                            fieldAsString = Encoding.UTF8.GetString(fieldAsByteArray);
                            break;
                        }

                        // Clear the byte field builder (create new memory stream)
                        byteFieldBuilder = new MemoryStream();

                        // Add the field to the list of variable data
                        fieldDatas.Add((short)startIndex,
                                       new ParserVariableFieldData((short)startIndex, fieldAsString));

                        // This may be the last field, so save this index
                        lastFieldStartIndex = (short)startIndex;

                        // Save the count as the next start index
                        startIndex = count + 1;
                    }
                    else
                    {
                        // Save this byte
                        byteFieldBuilder.WriteByte((byte)result);
                    }

                    // Read the next character
                    result = _reader.ReadByte();
                    count++;
                }

                // Now, step through the directory, retrieve each pre-converted field data,
                // and finish parsing
                int directoryErrorCorrection = 0;
                foreach (ParserDirectoryEntry directoryEntry in directoryEntries)
                {
                    // Get the field
                    if (!fieldDatas.ContainsKey((short)(directoryEntry.StartingPosition + directoryErrorCorrection)))
                    {
                        while (
                            (!fieldDatas.ContainsKey(
                                 (short)(directoryEntry.StartingPosition + directoryErrorCorrection))) &&
                            (lastFieldStartIndex > directoryEntry.StartingPosition + directoryErrorCorrection))
                        {
                            directoryErrorCorrection += 1;
                        }

                        // If this still didn't work, throw the exception
                        if (
                            !fieldDatas.ContainsKey(
                                (short)(directoryEntry.StartingPosition + directoryErrorCorrection)))
                        {
                            if (ActionOnError == ActionOnErrorEncounteredEnum.StoreInRecord)
                            {
                                thisRecord.AddError(
                                    MarcRecordParsingErrorTypeEnum.DirectoryFieldMismatchUnhandled);
                            }
                            else
                            {
                                throw new ApplicationException(
                                          "Field indexes and directory information cannot be resolved with one another.");
                            }
                        }
                        else
                        {
                            // This worked, but add a warning none-the-less
                            thisRecord.AddWarning(
                                MarcRecordParsingWarningTypeEnum.DirectoryFieldMismatchHandled);
                        }
                    }
                    var fieldData         = fieldDatas[(short)(directoryEntry.StartingPosition + directoryErrorCorrection)];
                    var variableFieldData = fieldData.FieldData;

                    // See if this row has an indicator
                    var indicator = "";
                    if ((variableFieldData.Length > 3) && (variableFieldData[2] == (UnitSeperator)))
                    {
                        indicator         = variableFieldData.Substring(0, 2);
                        variableFieldData = variableFieldData.Substring(2);
                    }
                    else
                    {
                        variableFieldData = variableFieldData.Substring(0);
                    }

                    // Is this split into seperate subfields?
                    if ((variableFieldData.Length > 1) && (variableFieldData[0] == (UnitSeperator)))
                    {
                        // Split this into subfields
                        var subfields = variableFieldData.Substring(1).Split(UnitSeperator);

                        // Create the new field
                        var newField = new MarcField
                        {
                            Tag        = Convert.ToInt32(directoryEntry.Tag),
                            Indicators = indicator
                        };

                        // Step through each subfield
                        foreach (string thisSubfield in subfields)
                        {
                            // Add this subfield
                            newField.Add_Subfield(thisSubfield[0], thisSubfield.Substring(1));
                        }

                        // Add this entry to the current record
                        thisRecord.AddField(newField);
                    }
                    else
                    {
                        // Must be just one subfield
                        thisRecord.AddField(Convert.ToInt32(directoryEntry.Tag), variableFieldData);
                    }
                }

                // if this was MARC8 encoding originally, change the encoding specified in the
                // leader, since this was converted to Unicode
                if (encoding == RecordCharacterEncoding.Marc)
                {
                    thisRecord.Leader = thisRecord.Leader.Substring(0, 9) + "a" + thisRecord.Leader.Substring(10);
                }
            }
            catch (EndOfStreamException)
            {
                if (ActionOnError == ActionOnErrorEncounteredEnum.StoreInRecord)
                {
                    thisRecord.AddError(MarcRecordParsingErrorTypeEnum.UnexpectedEndOfStreamEncountered);
                }
                else
                {
                    throw new ApplicationException(
                              "Unexpected end of stream encountered!  Input stream may be invalid format or truncated.");
                }
            }

            return(thisRecord);
        }