/// <summary> /// Makes a deep clone of this instance. /// </summary> /// <returns></returns> public Record Clone() { Record clone = new Record(); clone.Leader = this.Leader; foreach (string needsCloned in Warnings) { clone.AddWarnings(needsCloned); } foreach (Field needsCloned in this.Fields) { clone.Fields.Add(needsCloned.Clone()); } return(clone); }
/// <summary> /// Makes a deep clone of this instance. /// </summary> /// <returns></returns> public Record Clone() { Record clone = new Record { leader = leader }; foreach (string needsCloned in warnings) { clone.AddWarnings(needsCloned); } foreach (Field needsCloned in fields) { clone.fields.Add(needsCloned.Clone()); } return(clone); }
//Private utility functions #region Private utility functions /// <summary> /// Decodes the raw MARC record into a <see cref="MARC.Record"/> at the specified index./// /// </summary> /// <param name="index">The index of the record to retrieve.</param> /// <returns></returns> private Record decode(int index) { string raw = rawSource[index]; Record marc = new Record(); Match match = Regex.Match(raw, "^(\\d{5})"); int recordLength = 0; int totalExtraBytesRead = 0; // Store record length if (match.Captures.Count == 0) { marc.AddWarnings("MARC record length is not numeric or incorrect number of characters."); string[] split = Regex.Split(raw, "[^0-9]"); if (Int32.TryParse(split[0], out recordLength)) { string padding = ""; padding = padding.PadLeft(5 - split[0].Length, '0'); raw = padding + raw; } } else { recordLength = Convert.ToInt32(match.Captures[0].Value); } if (recordLength != raw.Length) { //Check if there are multi-byte characters in the string System.Globalization.StringInfo stringInfo = new System.Globalization.StringInfo(raw); int extraBytes = raw.Length - stringInfo.LengthInTextElements; int extraBytes2 = Encoding.UTF8.GetByteCount(raw) - raw.Length; if (recordLength == extraBytes + raw.Length) { recordLength -= extraBytes; } else if (recordLength == extraBytes2 + raw.Length) { recordLength -= extraBytes2; } else { marc.AddWarnings("MARC record length does not match actual length."); recordLength = raw.Length; } } if (!raw.EndsWith(END_OF_RECORD.ToString())) { throw new InvalidDataException("MARC record ends with an invalid terminator"); } //Store leader marc.Leader = raw.Substring(0, LEADER_LEN); //Bytes 12-16 of leader give offset to the body of the record int dataStart = Convert.ToInt32(raw.Substring(12, 5)); //Verify data start matches the first end of field marker if (raw.IndexOf(END_OF_FIELD) + 1 != dataStart) { dataStart = raw.IndexOf(END_OF_FIELD) + 1; marc.AddWarnings("Leader specifies incorrect base address of data."); } //Immediately after the leader comes the directory (no separator) string directory = raw.Substring(LEADER_LEN, dataStart - LEADER_LEN - 1); //Character after the directory should be END_OF_FIELD if (raw.Substring(dataStart - 1, 1) != END_OF_FIELD.ToString()) { marc.AddWarnings("No directory found."); } //All directory entries must be DIRECTORY_ENTRY_LEN long, so length % DIRECTORY_ENTRY_LEN should be 0 if (directory.Length % DIRECTORY_ENTRY_LEN != 0) { marc.AddWarnings("Invalid directory length."); } //Go through all the fields int fieldCount = directory.Length / DIRECTORY_ENTRY_LEN; for (int i = 0; i < fieldCount; i++) { string tag = directory.Substring(i * DIRECTORY_ENTRY_LEN, DIRECTORY_ENTRY_LEN).Substring(0, 3); int fieldLength = 0; int fieldOffset = 0; try { fieldLength = Convert.ToInt32(directory.Substring(i * DIRECTORY_ENTRY_LEN, DIRECTORY_ENTRY_LEN).Substring(3, 4)); } catch (FormatException) { marc.AddWarnings("Invalid Directory Tag Length for tag " + tag + "."); } try { fieldOffset = Convert.ToInt32(directory.Substring(i * DIRECTORY_ENTRY_LEN, DIRECTORY_ENTRY_LEN).Substring(7, 5)) + totalExtraBytesRead; } catch (FormatException) { marc.AddWarnings("Invalid Directory Offset for tag " + tag + "."); } //Check Directory validity //If a tag isn't valid, default it to ZZZ. This should at least make the record valid enough to be readable and not throw exceptions if (!Field.ValidateTag(tag)) { marc.AddWarnings("Invalid tag " + tag + " in directory."); tag = "ZZZ"; } if (fieldOffset + fieldLength > recordLength) { marc.AddWarnings("Directory entry for tag " + tag + " runs past the end of the record."); } int fieldStart = dataStart + fieldOffset - (totalExtraBytesRead * 2); if (fieldStart > recordLength) { marc.AddWarnings("Directory entry for tag " + tag + " starts past the end of the record. Skipping tag and all proceeding tags."); break; } else if (fieldStart + fieldLength > recordLength) { marc.AddWarnings("Directory entry for tag " + tag + " runs past the end of the record."); fieldLength = recordLength - fieldStart; } string tagData = raw.Substring(fieldStart, fieldLength); //Check if there are multi-byte characters in the string System.Globalization.StringInfo stringInfo = new System.Globalization.StringInfo(tagData); int extraBytes = fieldLength - stringInfo.LengthInTextElements; int extraBytes2 = Encoding.UTF8.GetByteCount(tagData) - fieldLength; int endOfFieldIndex = tagData.IndexOf(END_OF_FIELD); if (tagData.Length - 1 != endOfFieldIndex) { int differenceLength = tagData.Length - 1 - endOfFieldIndex; if (differenceLength != extraBytes && differenceLength != extraBytes2) { fieldLength -= differenceLength; totalExtraBytesRead += differenceLength; tagData = raw.Substring(fieldStart, endOfFieldIndex + 1); } else { if (extraBytes > 0) { fieldLength -= extraBytes; totalExtraBytesRead += extraBytes; tagData = raw.Substring(fieldStart, fieldLength); } else if (extraBytes2 > 0) { fieldLength -= extraBytes2; totalExtraBytesRead += extraBytes2; tagData = raw.Substring(fieldStart, fieldLength); } } } if (fieldLength > 0) { string endCharacter = tagData.Substring(tagData.Length - 1, 1); if (endCharacter == END_OF_FIELD.ToString()) { //Get rid of the end of tag character tagData = tagData.Remove(tagData.Length - 1); fieldLength--; } else { marc.AddWarnings("Field for tag " + tag + " does not end with an end of field character."); } } else { marc.AddWarnings("Field for tag " + tag + " has a length of 0."); } match = Regex.Match(tag, "^\\d+$"); if (match.Captures.Count > 0 && Convert.ToInt32(tag) < 10) { marc.Fields.Add(new ControlField(tag, tagData)); } else { List <string> rawSubfields = new List <string>(tagData.Split(SUBFIELD_INDICATOR)); string indicators = rawSubfields[0]; rawSubfields.RemoveAt(0); char ind1; char ind2; if (indicators.Length != 2) { marc.AddWarnings("Invalid indicator length. Forced indicators to blanks for tag " + tag + "."); ind1 = ind2 = ' '; } else { ind1 = char.ToLower(indicators[0]); if (!DataField.ValidateIndicator(ind1)) { ind1 = ' '; marc.AddWarnings("Invalid first indicator. Forced first indicator to blank for tag " + tag + "."); } ind2 = char.ToLower(indicators[1]); if (!DataField.ValidateIndicator(ind2)) { ind2 = ' '; marc.AddWarnings("Invalid second indicator. Forced second indicator to blank for tag " + tag + "."); } } //Split the subfield data into subfield name and data pairs List <Subfield> subfieldData = new List <Subfield>(); foreach (string subfield in rawSubfields) { if (subfield.Length > 0) { subfieldData.Add(new Subfield(subfield[0], subfield.Substring(1))); } else { marc.AddWarnings("No subfield data found in tag " + tag + "."); } } if (subfieldData.Count == 0) { marc.AddWarnings("No subfield data found in tag " + tag + "."); } marc.Fields.Add(new DataField(tag, subfieldData, ind1, ind2)); } } return(marc); }
public void WarningsTest() { Record target = new Record(); string expected = "IT'S ALL BROKEN! SOUND THE ALARM!"; target.AddWarnings("IT'S ALL BROKEN! SOUND THE ALARM!"); List<string> actual; actual = target.Warnings; Assert.AreEqual(expected, actual[0]); }
/// <summary> /// Decodes the raw MARC record into a <see cref="MARC.Record"/> at the specified index./// /// </summary> /// <param name="index">The index of the record to retrieve.</param> /// <returns></returns> private Record Decode(int index) { string raw = _rawSource[index]; Record marc = new Record(); Match match = Regex.Match(raw, "^(\\d{5})"); int recordLength = 0; // Store record length if (match.Captures.Count == 0) marc.AddWarnings("MARC record length is not numeric."); else recordLength = Convert.ToInt32(match.Captures[0].Value); if (recordLength != raw.Length) { marc.AddWarnings("MARC record length does not match actual length."); recordLength = raw.Length; } if (!raw.EndsWith(END_OF_RECORD.ToString())) throw new InvalidDataException("MARC record ends with an invalid terminator"); //Store leader marc.Leader = raw.Substring(0, LEADER_LEN); //Bytes 12-16 of leader give offset to the body of the record int dataStart = Convert.ToInt32(raw.Substring(12, 5)); //Immediately after the leader comes the directory (no separator) string directory = raw.Substring(LEADER_LEN, dataStart - LEADER_LEN - 1); //Character after the directory should be END_OF_FIELD if (raw.Substring(dataStart - 1, 1) != END_OF_FIELD.ToString()) marc.AddWarnings("No directory found."); //All directory entries must be DIRECTORY_ENTRY_LEN long, so length % DIRECTORY_ENTRY_LEN should be 0 if (directory.Length % DIRECTORY_ENTRY_LEN != 0) marc.AddWarnings("Invalid directory length."); //Go through all the fields int fieldCount = directory.Length / DIRECTORY_ENTRY_LEN; for (int i = 0; i < fieldCount; i++) { string tag = directory.Substring(i * DIRECTORY_ENTRY_LEN, DIRECTORY_ENTRY_LEN).Substring(0, 3); int fieldLength = 0; int fieldOffset = 0; try { fieldLength = Convert.ToInt32(directory.Substring(i * DIRECTORY_ENTRY_LEN, DIRECTORY_ENTRY_LEN).Substring(3, 4)); } catch (FormatException) { marc.AddWarnings("Invalid Directory Tag Length for tag " + tag + "."); } try { fieldOffset = Convert.ToInt32(directory.Substring(i * DIRECTORY_ENTRY_LEN, DIRECTORY_ENTRY_LEN).Substring(7, 5)); } catch (FormatException) { marc.AddWarnings("Invalid Directory Offset for tag " + tag + "."); } //Check Directory validity //If a tag isn't valid, default it to ZZZ. This should at least make the record valid enough to be readable and not throw exceptions if (!Field.ValidateTag(tag)) { marc.AddWarnings("Invalid tag " + tag + " in directory."); tag = "ZZZ"; } if (fieldOffset + fieldLength > recordLength) marc.AddWarnings("Directory entry for tag " + tag + " runs past the end of the record."); int fieldStart = dataStart + fieldOffset; if (fieldStart > recordLength) { marc.AddWarnings("Directory entry for tag " + tag + " starts past the end of the record. Skipping tag and all proceeding tags."); break; } else if (fieldStart + fieldLength > recordLength) { marc.AddWarnings("Directory entry for tag " + tag + " runs past the end of the record."); fieldLength = recordLength - fieldStart; } string tagData = raw.Substring(fieldStart, fieldLength); if (tagData.Substring(tagData.Length - 1, 1) == END_OF_FIELD.ToString()) { //Get rid of the end of tag character tagData = tagData.Remove(tagData.Length - 1); fieldLength--; } else marc.AddWarnings("Field for tag " + tag + " does not end with an end of field character."); match = Regex.Match(tag, "^\\d+$"); if (match.Captures.Count > 0 && Convert.ToInt32(tag) < 10) marc.Fields.Add(new ControlField(tag, tagData)); else { List<string> rawSubfields = new List<string>(tagData.Split(SUBFIELD_INDICATOR)); string indicators = rawSubfields[0]; rawSubfields.RemoveAt(0); char ind1; char ind2; if (indicators.Length != 2) { marc.AddWarnings("Invalid indicator length. Forced indicators to blanks for tag " + tag + "."); ind1 = ind2 = ' '; } else { ind1 = char.ToLower(indicators[0]); if (!DataField.ValidateIndicator(ind1)) { ind1 = ' '; marc.AddWarnings("Invalid first indicator. Forced first indicator to blank for tag " + tag + "."); } ind2 = char.ToLower(indicators[1]); if (!DataField.ValidateIndicator(ind2)) { ind2 = ' '; marc.AddWarnings("Invalid second indicator. Forced second indicator to blank for tag " + tag + "."); } } //Split the subfield data into subfield name and data pairs List<Subfield> subfieldData = new List<Subfield>(); foreach (string subfield in rawSubfields) { if (subfield.Length > 0) subfieldData.Add(new Subfield(subfield[0], subfield.Substring(1))); else marc.AddWarnings("No subfield data found in tag " + tag + "."); } if (subfieldData.Count == 0) marc.AddWarnings("No subfield data found in tag " + tag + "."); marc.Fields.Add(new DataField(tag, subfieldData, ind1, ind2)); } } return marc; }
/// <summary> /// Decodes the raw MARC record into a <see cref="MARC.Record"/> at the specified index./// /// </summary> /// <param name="index">The index of the record to retrieve.</param> /// <returns></returns> private Record Decode(int index) { string raw = _rawSource[index]; Record marc = new Record(); Match match = Regex.Match(raw, "^(\\d{5})"); int recordLength = 0; // Store record length if (match.Captures.Count == 0) { marc.AddWarnings("MARC record length is not numeric."); } else { recordLength = Convert.ToInt32(match.Captures[0].Value); } if (recordLength != raw.Length) { marc.AddWarnings("MARC record length does not match actual length."); recordLength = raw.Length; } if (!raw.EndsWith(END_OF_RECORD.ToString())) { throw new InvalidDataException("MARC record ends with an invalid terminator"); } //Store leader marc.Leader = raw.Substring(0, LEADER_LEN); //Bytes 12-16 of leader give offset to the body of the record int dataStart = Convert.ToInt32(raw.Substring(12, 5)); //Immediately after the leader comes the directory (no separator) string directory = raw.Substring(LEADER_LEN, dataStart - LEADER_LEN - 1); //Character after the directory should be END_OF_FIELD if (raw.Substring(dataStart - 1, 1) != END_OF_FIELD.ToString()) { marc.AddWarnings("No directory found."); } //All directory entries must be DIRECTORY_ENTRY_LEN long, so length % DIRECTORY_ENTRY_LEN should be 0 if (directory.Length % DIRECTORY_ENTRY_LEN != 0) { marc.AddWarnings("Invalid directory length."); } //Go through all the fields int fieldCount = directory.Length / DIRECTORY_ENTRY_LEN; for (int i = 0; i < fieldCount; i++) { string tag = directory.Substring(i * DIRECTORY_ENTRY_LEN, DIRECTORY_ENTRY_LEN).Substring(0, 3); int fieldLength = 0; int fieldOffset = 0; try { fieldLength = Convert.ToInt32(directory.Substring(i * DIRECTORY_ENTRY_LEN, DIRECTORY_ENTRY_LEN).Substring(3, 4)); } catch (FormatException) { marc.AddWarnings("Invalid Directory Tag Length for tag " + tag + "."); } try { fieldOffset = Convert.ToInt32(directory.Substring(i * DIRECTORY_ENTRY_LEN, DIRECTORY_ENTRY_LEN).Substring(7, 5)); } catch (FormatException) { marc.AddWarnings("Invalid Directory Offset for tag " + tag + "."); } //Check Directory validity //If a tag isn't valid, default it to ZZZ. This should at least make the record valid enough to be readable and not throw exceptions if (!Field.ValidateTag(tag)) { marc.AddWarnings("Invalid tag " + tag + " in directory."); tag = "ZZZ"; } if (fieldOffset + fieldLength > recordLength) { marc.AddWarnings("Directory entry for tag " + tag + " runs past the end of the record."); } int fieldStart = dataStart + fieldOffset; if (fieldStart > recordLength) { marc.AddWarnings("Directory entry for tag " + tag + " starts past the end of the record. Skipping tag and all proceeding tags."); break; } else if (fieldStart + fieldLength > recordLength) { marc.AddWarnings("Directory entry for tag " + tag + " runs past the end of the record."); fieldLength = recordLength - fieldStart; } string tagData = raw.Substring(fieldStart, fieldLength); if (tagData.Substring(tagData.Length - 1, 1) == END_OF_FIELD.ToString()) { //Get rid of the end of tag character tagData = tagData.Remove(tagData.Length - 1); fieldLength--; } else { marc.AddWarnings("Field for tag " + tag + " does not end with an end of field character."); } match = Regex.Match(tag, "^\\d+$"); if (match.Captures.Count > 0 && Convert.ToInt32(tag) < 10) { marc.Fields.Add(new ControlField(tag, tagData)); } else { List <string> rawSubfields = new List <string>(tagData.Split(SUBFIELD_INDICATOR)); string indicators = rawSubfields[0]; rawSubfields.RemoveAt(0); char ind1; char ind2; if (indicators.Length != 2) { marc.AddWarnings("Invalid indicator length. Forced indicators to blanks for tag " + tag + "."); ind1 = ind2 = ' '; } else { ind1 = char.ToLower(indicators[0]); if (!DataField.ValidateIndicator(ind1)) { ind1 = ' '; marc.AddWarnings("Invalid first indicator. Forced first indicator to blank for tag " + tag + "."); } ind2 = char.ToLower(indicators[1]); if (!DataField.ValidateIndicator(ind2)) { ind2 = ' '; marc.AddWarnings("Invalid second indicator. Forced second indicator to blank for tag " + tag + "."); } } //Split the subfield data into subfield name and data pairs List <Subfield> subfieldData = new List <Subfield>(); foreach (string subfield in rawSubfields) { if (subfield.Length > 0) { subfieldData.Add(new Subfield(subfield[0], subfield.Substring(1))); } else { marc.AddWarnings("No subfield data found in tag " + tag + "."); } } if (subfieldData.Count == 0) { marc.AddWarnings("No subfield data found in tag " + tag + "."); } marc.Fields.Add(new DataField(tag, subfieldData, ind1, ind2)); } } return(marc); }
/// <summary> /// Makes a deep clone of this instance. /// </summary> /// <returns></returns> public Record Clone() { Record clone = new Record(); clone.Leader = this.Leader; foreach (string needsCloned in Warnings) clone.AddWarnings(needsCloned); foreach (Field needsCloned in this.Fields) clone.Fields.Add(needsCloned.Clone()); return clone; }