// 将机内格式记录构造为ISO2709格式记录。 // parameters: // baMARC [in]机内格式记录。已经通过适当Encoding对象转换为ByteArray了 // baResult [out]ISO2709格式记录。 // return: // -1 error // 0 succeed public static int BuildISO2709Record(byte[] baMARC, out byte[] baResult) { int nLen; byte[] baMuci = null; // 目次区 byte[] baBody = null; // 数据区 byte[] baFldName = null; string strFldLen; string strFldStart; byte[] baFldContent = null; int nStartPos; int nFldLen; int nFldStart; bool bEnd = false; int nPos; int nRecLen = 0; baResult = null; if (baMARC == null) { return(-1); } if (baMARC.Length < 24) { return(-1); } // 2018/3/8 if (baMARC[0] == 0 || baMARC[1] == 0) { throw new Exception("ISO2709 格式无法使用编码方式 UCS-2 (UTF-16)"); } MarcHeaderStruct header = new MarcHeaderStruct(baMARC); /* * ISO2709ANSIHEADER header; * memcpy(&header, * (LPCSTR)advstrMARC, * sizeof(header)); */ nLen = baMARC.Length; for (nStartPos = 24, nFldStart = 0; ;) { nPos = ByteArray.IndexOf(baMARC, (byte)FLDEND, nStartPos); // nPos = FindCharInStringA((LPCSTR)advstrMARC, FLDEND, nStartPos); if (nPos == -1) { nFldLen = nLen - nStartPos; bEnd = true; } else { nFldLen = nPos - nStartPos + 1; } if (nFldLen < 3) { goto SKIP; } // strFldName = advstrMARC.MidA(nStartPos, 3); baFldName = new byte[3]; Array.Copy(baMARC, nStartPos, baFldName, 0, 3); // advstrFldContent = advstrMARC.MidA(nStartPos + 3, nFldLen - 3); baFldContent = new byte[nFldLen - 3]; Array.Copy(baMARC, nStartPos + 3, baFldContent, 0, nFldLen - 3); //advstrFldLen.Format("%04d", nFldLen - 3); strFldLen = Convert.ToString(nFldLen - 3); strFldLen = strFldLen.PadLeft(4, '0'); // advstrFldStart.Format("%05d", nFldStart); strFldStart = Convert.ToString(nFldStart); strFldStart = strFldStart.PadLeft(5, '0'); nFldStart += nFldLen - 3; // advstrMuci += (LPCSTR)advstrFldName; baMuci = ByteArray.Add(baMuci, baFldName); // advstrMuci += (LPCSTR)advstrFldLen; baMuci = ByteArray.Add(baMuci, Encoding.UTF8.GetBytes(strFldLen)); // advstrMuci += (LPCSTR)advstrFldStart; baMuci = ByteArray.Add(baMuci, Encoding.UTF8.GetBytes(strFldStart)); baBody = ByteArray.Add(baBody, baFldContent); SKIP: if (bEnd) { break; } nStartPos = nPos + 1; } nRecLen = baMuci.Length + 1 + baBody.Length + 1 + 24; /* * advstrText.Format( * "%05d", * nRecLen); * * memcpy(header.reclen, * (LPCSTR)advstrText, * advstrText.GetLengthA()); */ header.RecLength = nRecLen; /* * advstrText.Format( * "%05d", * sizeof(header) + advstrMuci.GetLengthA() + 1); * memcpy(header.baseaddr, * (LPCSTR)advstrText, * advstrText.GetLengthA()); */ header.BaseAddress = 24 + baMuci.Length + 1; // ForceUNIMARCHeader(&header); /* * In USMARC format, leader postion 09, one character indicate the character coding scheme: * * 09 - Character coding scheme * Identifies the character coding scheme used in the record. # - MARC-8 # a - UCS/Unicode # (http://lcweb.loc.gov/marc/bibliographic/ecbdldrd.html) */ //baTarget.SetSize(nRecLen); /* * memcpy(baTarget.GetData(), * (char *)&header, * sizeof(header)); */ baResult = ByteArray.Add(baResult, header.GetBytes()); /* * memcpy((char *)baTarget.GetData() + sizeof(header), * (LPCSTR)advstrMuci, * advstrMuci.GetLengthA()); */ baResult = ByteArray.Add(baResult, baMuci); /* *((char *)baTarget.GetData() + sizeof(header) + advstrMuci.GetLengthA()) * = FLDEND; */ baResult = ByteArray.Add(baResult, (byte)FLDEND); /* * memcpy((char *)baTarget.GetData() + sizeof(header)+ advstrMuci.GetLengthA() + 1, * (LPCSTR)advstrBody, * advstrBody.GetLengthA()); */ baResult = ByteArray.Add(baResult, baBody); /* *((char *)baTarget.GetData() + nRecLen - 1) * = RECEND; */ baResult = ByteArray.Add(baResult, (byte)RECEND); return(0); }
// 将ISO2709格式记录转换为字段数组 // aResult的每个元素为byte[]型,内容是一个字段。第一个元素是头标区,一定是24bytes // return: // -1 一般性错误 // -2 MARC格式错误 public static int Cvt2709ToFieldArray( Encoding encoding, // 2007/7/11 byte[] s, out List <byte[]> aResult, // out out string strErrorInfo) { strErrorInfo = ""; aResult = new List <byte[]>(); // const char *sopp; int maxbytes = 2000000; // 约2000K,防止攻击 // const byte RECEND = 29; // const byte FLDEND = 30; // const byte SUBFLD = 31; if (encoding.Equals(Encoding.Unicode) == true) { throw new Exception("UCS2编码方式应当使用 ForceCvt2709ToFieldArray(),而不是 Cvt2709ToFieldArray()"); } MarcHeaderStruct header = new MarcHeaderStruct(encoding, s); { // 输出头标区 byte[] tarray = null; tarray = new byte[24]; Array.Copy(s, 0, tarray, 0, 24); // 2014/5/9 // 防范头标区出现 0 字符 for (int j = 0; j < tarray.Length; j++) { if (tarray[j] == 0) { tarray[j] = (byte)'*'; } } aResult.Add(tarray); } int somaxlen; int reclen, baseaddr, lenoffld, startposoffld; int len, startpos; // char *dirp; int offs = 0; int t = 0; int i; // char temp[30]; somaxlen = s.Length; try { reclen = header.RecLength; } catch (FormatException ex) { strErrorInfo = "头标区开始5字符 '" + header.RecLengthString + "' 不是纯数字 :" + ex.Message; // throw(new MarcException(strErrorInfo)); goto ERROR2; } if (reclen > somaxlen) { strErrorInfo = "头标区头5字符表示的记录长度" + Convert.ToString(reclen) + "大于源缓冲区整个内容的长度" + Convert.ToString(somaxlen); goto ERROR2; } if (reclen < 24) { strErrorInfo = "头标区头5字符表示的记录长度" + Convert.ToString(reclen) + "小于24"; goto ERROR2; } if (s[reclen - 1] != RECEND) { strErrorInfo = "头标区声称的结束位置不是MARC记录结束符"; goto ERROR2; // 结束符不正确 } for (i = 0; i < reclen - 1; i++) { if (s[i] == RECEND) { strErrorInfo = "记录内容中不能有记录结束符"; goto ERROR2; } } try { baseaddr = header.BaseAddress; } catch (FormatException ex) { strErrorInfo = "头标区数据基地址5字符 '" + header.BaseAddressString + " '不是纯数字 :" + ex.Message; //throw(new MarcException(strErrorInfo)); goto ERROR2; } if (baseaddr > somaxlen) { strErrorInfo = "数据基地址值 " + Convert.ToString(baseaddr) + " 已经超出源缓冲区整个内容的长度 " + Convert.ToString(somaxlen); goto ERROR2; } if (baseaddr <= 24) { strErrorInfo = "数据基地址值 " + Convert.ToString(baseaddr) + " 小于24"; goto ERROR2; // 数据基地址太小 } if (s[baseaddr - 1] != FLDEND) { strErrorInfo = "没有在目次区尾部位置" + Convert.ToString(baseaddr) + "找到FLDEND符号"; goto ERROR2; // } try { lenoffld = header.WidthOfFieldLength; } catch (FormatException ex) { strErrorInfo = "头标区目次区字段长度1字符 '" + header.WidthOfFieldLengthString + " '不是纯数字 :" + ex.Message; //throw(new MarcException(strErrorInfo)); goto ERROR2; } try { startposoffld = header.WidthOfStartPositionOfField; } catch (FormatException ex) { strErrorInfo = "头标区目次区字段起始位置1字符 '" + header.WidthOfStartPositionOfFieldString + " '不是纯数字 :" + ex.Message; // throw(new MarcException(strErrorInfo)); goto ERROR2; } if (lenoffld <= 0 || lenoffld > 30) { strErrorInfo = "目次区中字段长度值占用字符数 " + Convert.ToString(lenoffld) + " 不正确,应在1和29之间..."; goto ERROR2; } if (lenoffld != 4) { // 2001/5/15 strErrorInfo = "目次区中字段长度值占用字符数 " + Convert.ToString(lenoffld) + " 不正确,应为4..."; goto ERROR2; } lenoffld = 4; if (startposoffld <= 0 || startposoffld > 30) { strErrorInfo = "目次区中字段起始位置值占用字符数 " + Convert.ToString(startposoffld) + " 不正确,应在1到29之间..."; goto ERROR2; } startposoffld = 5; // 开始处理目次区 // dirp = (char *)sopp; t = 24; offs = 24; MyByteList baField = null; for (i = 0; ; i++) { if (s[offs] == FLDEND) { break; // 目次区结束 } // 将字段名装入目标 if (offs + 3 >= baseaddr) { break; } if (t + 3 >= maxbytes) { break; } /* * baTarget.SetSize(t+3, CHUNK_SIZE); * memcpy((char *)baTarget.GetData()+t, * dirp+offs, * 3); * t+=3; */ baField = new MyByteList(); baField.AddRange(s, offs, 3); t += 3; // 得到字段长度 offs += 3; if (offs + lenoffld >= baseaddr) { break; } len = MarcHeaderStruct.IntValue(s, offs, lenoffld); // 得到字段内容开始地址 offs += lenoffld; if (offs + startposoffld >= baseaddr) { break; } startpos = MarcHeaderStruct.IntValue(s, offs, startposoffld); offs += startposoffld; if (offs >= baseaddr) { break; } // 将字段内容装入目标 if (t + len >= maxbytes) { break; } if (s[baseaddr + startpos - 1] != FLDEND) { // errnoiso2709 = ERROR_BADFLDCONTENT; strErrorInfo = "缺乏字段结束符"; goto ERROR2; } if (s[baseaddr + startpos + len - 1] != FLDEND) { //errnoiso2709 = ERROR_BADFLDCONTENT; strErrorInfo = "缺乏字段结束符"; goto ERROR2; } /* * baTarget.SetSize(t+len, CHUNK_SIZE); * memcpy((char *)baTarget.GetData()+t, * sopp+baseaddr+startpos, * len); * t += len; */ baField.AddRange(s, baseaddr + startpos, len == 0 ? len : len - 1); t += len; aResult.Add(baField.GetByteArray()); baField = null; } if (t + 1 >= maxbytes) { // errnoiso2709 = ERROR_TARGETBUFFEROVERFLOW; strErrorInfo = "记录太大"; goto ERROR2; // 目标空间不够 } /* * baField.Add((char)RECEND); * t ++; */ /* * baTarget.SetSize(t+1, CHUNK_SIZE); *((char *)baTarget.GetData() + t++) = RECEND; * if (t+1>=maxbytes) * { * errnoiso2709 = ERROR_TARGETBUFFEROVERFLOW; * goto ERROR1; // 目标空间不够 * } */ Debug.Assert(t != -2, ""); return(t); //ERROR1: // return -1; // 一般性错误 ERROR2: // 调试用 Debug.Assert(false, ""); return(-2); // MARC格式错误 }
// 强制将ISO2709格式记录转换为字段数组 // 本函数采用的算法是将目次区的地址和长度忽略,只取3字符的字段名 // aResult的每个元素为byte[]型,内容是一个字段。第一个元素是头标区,一定是24bytes // return: // -1 一般性错误 // -2 MARC格式错误 public static int ForceCvt2709ToFieldArray( ref Encoding encoding, // 2007/7/11 函数内可能发生变化 byte[] s, out List <byte[]> aResult, out string strErrorInfo) { strErrorInfo = ""; aResult = new List <byte[]>(); Debug.Assert(s != null, ""); List <MyByteList> results = new List <MyByteList>(); bool bUcs2 = false; if (encoding.Equals(Encoding.Unicode) == true) { bUcs2 = true; } if (bUcs2 == true) { string strRecord = encoding.GetString(s); // 变换成UTF-8编码方式处理 s = Encoding.UTF8.GetBytes(strRecord); encoding = Encoding.UTF8; } MarcHeaderStruct header = null; try { header = new MarcHeaderStruct(encoding, s); } catch (ArgumentException) { // 不足 24 字符的,给与宽容 header = new MarcHeaderStruct(Encoding.ASCII, Encoding.ASCII.GetBytes("012345678901234567890123")); } header.ForceUNIMARCHeader(); // 强制将某些位置设置为缺省值 results.Add(header.GetByteList()); int somaxlen; int offs; int i, j; somaxlen = s.Length; // 开始处理目次区 offs = 24; MyByteList baField = null; bool bFound = false; for (i = 0; ; i++) { bFound = false; for (j = offs; j < offs + 3 + 4 + 5; j++) { if (j >= somaxlen) { break; } if (s[j] == FLDEND) { bFound = true; break; } } if (j >= somaxlen) { offs = j; break; } if (bFound == true) { if (j <= offs + 3) { offs = j + 1; break; } } // 将字段名装入目标 baField = new MyByteList(); baField.AddRange(s, offs, 3); results.Add(baField); baField = null; // 得到字段内容开始地址 offs += 3; offs += 4; offs += 5; if (bFound == true) { offs = j + 1; break; } } if (offs >= somaxlen) { return(0); } int nFieldNumber = 1; baField = null; // 加入对应的字段内容 for (; offs < somaxlen; offs++) { byte c = s[offs]; if (c == RECEND) { break; } if (c == FLDEND) { nFieldNumber++; baField = null; } else { if (baField == null) { // 确保下标不越界 while (nFieldNumber >= results.Count) { MyByteList temp = new MyByteList(); temp.Add((byte)'?'); temp.Add((byte)'?'); temp.Add((byte)'?'); results.Add(temp); } baField = results[nFieldNumber]; } baField.Add(c); } } aResult = new List <byte[]>(); foreach (MyByteList list in results) { aResult.Add(list.GetByteArray()); } return(0); // ERROR1: // return -1; // 一般性错误 // ERROR2: // return -2; // MARC格式错误 }