Exemple #1
0
        // 将机内格式记录构造为ISO2709格式记录。
        // parameters:
        //		baMARC		[in]机内格式记录。已经通过适当Encoding对象转换为ByteArray了
        //		baResult	[out]ISO2709格式记录。
        // return:
        //		-1	error
        //		0	succeed
        public static int BuildISO2709Record(byte[] baMARC,
                                             out byte[] baResult)
        {
            int nLen;

            byte[] baMuci    = null;    // 目次区
            byte[] baBody    = null;    // 数据区
            byte[] baFldName = null;
            string strFldLen;
            string strFldStart;

            byte[] baFldContent = null;
            int    nStartPos;
            int    nFldLen;
            int    nFldStart;
            bool   bEnd = false;
            int    nPos;
            int    nRecLen = 0;

            baResult = null;

            if (baMARC == null)
            {
                return(-1);
            }
            if (baMARC.Length < 24)
            {
                return(-1);
            }

            // 2018/3/8
            if (baMARC[0] == 0 ||
                baMARC[1] == 0)
            {
                throw new Exception("ISO2709 格式无法使用编码方式 UCS-2 (UTF-16)");
            }

            MarcHeaderStruct header = new MarcHeaderStruct(baMARC);

            /*
             * ISO2709ANSIHEADER header;
             * memcpy(&header,
             *  (LPCSTR)advstrMARC,
             *  sizeof(header));
             */

            nLen = baMARC.Length;

            for (nStartPos = 24, nFldStart = 0; ;)
            {
                nPos = ByteArray.IndexOf(baMARC, (byte)FLDEND, nStartPos);
                // nPos = FindCharInStringA((LPCSTR)advstrMARC, FLDEND, nStartPos);
                if (nPos == -1)
                {
                    nFldLen = nLen - nStartPos;
                    bEnd    = true;
                }
                else
                {
                    nFldLen = nPos - nStartPos + 1;
                }
                if (nFldLen < 3)
                {
                    goto SKIP;
                }
                // strFldName = advstrMARC.MidA(nStartPos, 3);
                baFldName = new byte[3];
                Array.Copy(baMARC,
                           nStartPos,
                           baFldName, 0,
                           3);

                // advstrFldContent = advstrMARC.MidA(nStartPos + 3, nFldLen - 3);
                baFldContent = new byte[nFldLen - 3];
                Array.Copy(baMARC,
                           nStartPos + 3,
                           baFldContent, 0,
                           nFldLen - 3);

                //advstrFldLen.Format("%04d", nFldLen - 3);
                strFldLen = Convert.ToString(nFldLen - 3);
                strFldLen = strFldLen.PadLeft(4, '0');

                // advstrFldStart.Format("%05d", nFldStart);
                strFldStart = Convert.ToString(nFldStart);
                strFldStart = strFldStart.PadLeft(5, '0');

                nFldStart += nFldLen - 3;

                // advstrMuci += (LPCSTR)advstrFldName;
                baMuci = ByteArray.Add(baMuci, baFldName);
                // advstrMuci += (LPCSTR)advstrFldLen;
                baMuci = ByteArray.Add(baMuci, Encoding.UTF8.GetBytes(strFldLen));
                // advstrMuci += (LPCSTR)advstrFldStart;
                baMuci = ByteArray.Add(baMuci, Encoding.UTF8.GetBytes(strFldStart));

                baBody = ByteArray.Add(baBody, baFldContent);
SKIP:
                if (bEnd)
                {
                    break;
                }
                nStartPos = nPos + 1;
            }


            nRecLen = baMuci.Length + 1
                      + baBody.Length + 1 + 24;

            /*
             * advstrText.Format(
             *  "%05d",
             *  nRecLen);
             *
             * memcpy(header.reclen,
             *  (LPCSTR)advstrText,
             *  advstrText.GetLengthA());
             */
            header.RecLength = nRecLen;


            /*
             * advstrText.Format(
             *  "%05d",
             *  sizeof(header) + advstrMuci.GetLengthA() + 1);
             * memcpy(header.baseaddr,
             *  (LPCSTR)advstrText,
             *  advstrText.GetLengthA());
             */
            header.BaseAddress = 24 + baMuci.Length + 1;

            // ForceUNIMARCHeader(&header);

            /*
             * In USMARC format, leader postion 09, one character indicate the character coding scheme:
             *
             * 09 - Character coding scheme
             * Identifies the character coding scheme used in the record.
             # - MARC-8
             # a - UCS/Unicode
             # (http://lcweb.loc.gov/marc/bibliographic/ecbdldrd.html)
             */



            //baTarget.SetSize(nRecLen);

            /*
             * memcpy(baTarget.GetData(),
             *  (char *)&header,
             *  sizeof(header));
             */
            baResult = ByteArray.Add(baResult, header.GetBytes());


            /*
             * memcpy((char *)baTarget.GetData() + sizeof(header),
             *  (LPCSTR)advstrMuci,
             *  advstrMuci.GetLengthA());
             */
            baResult = ByteArray.Add(baResult, baMuci);

            /*
             *((char *)baTarget.GetData() + sizeof(header) + advstrMuci.GetLengthA())
             *  = FLDEND;
             */
            baResult = ByteArray.Add(baResult, (byte)FLDEND);

            /*
             * memcpy((char *)baTarget.GetData() + sizeof(header)+ advstrMuci.GetLengthA() + 1,
             *  (LPCSTR)advstrBody,
             *  advstrBody.GetLengthA());
             */
            baResult = ByteArray.Add(baResult, baBody);

            /*
             *((char *)baTarget.GetData() + nRecLen - 1)
             *  = RECEND;
             */
            baResult = ByteArray.Add(baResult, (byte)RECEND);

            return(0);
        }
Exemple #2
0
        // 将ISO2709格式记录转换为字段数组
        // aResult的每个元素为byte[]型,内容是一个字段。第一个元素是头标区,一定是24bytes
        // return:
        //	-1	一般性错误
        //	-2	MARC格式错误
        public static int Cvt2709ToFieldArray(
            Encoding encoding,         // 2007/7/11
            byte[] s,
            out List <byte[]> aResult, // out
            out string strErrorInfo)
        {
            strErrorInfo = "";
            aResult      = new List <byte[]>();

            // const char *sopp;
            int maxbytes = 2000000;     // 约2000K,防止攻击

            // const byte RECEND = 29;
            // const byte FLDEND = 30;
            // const byte SUBFLD = 31;

            if (encoding.Equals(Encoding.Unicode) == true)
            {
                throw new Exception("UCS2编码方式应当使用 ForceCvt2709ToFieldArray(),而不是 Cvt2709ToFieldArray()");
            }

            MarcHeaderStruct header = new MarcHeaderStruct(encoding, s);

            {
                // 输出头标区
                byte[] tarray = null;
                tarray = new byte[24];
                Array.Copy(s, 0, tarray, 0, 24);

                // 2014/5/9
                // 防范头标区出现 0 字符
                for (int j = 0; j < tarray.Length; j++)
                {
                    if (tarray[j] == 0)
                    {
                        tarray[j] = (byte)'*';
                    }
                }

                aResult.Add(tarray);
            }

            int somaxlen;
            int reclen, baseaddr, lenoffld, startposoffld;
            int len, startpos;
            // char *dirp;
            int offs = 0;
            int t    = 0;
            int i;

            // char temp[30];

            somaxlen = s.Length;
            try
            {
                reclen = header.RecLength;
            }
            catch (FormatException ex)
            {
                strErrorInfo = "头标区开始5字符 '" + header.RecLengthString + "' 不是纯数字 :" + ex.Message;
                // throw(new MarcException(strErrorInfo));
                goto ERROR2;
            }
            if (reclen > somaxlen)
            {
                strErrorInfo = "头标区头5字符表示的记录长度"
                               + Convert.ToString(reclen)
                               + "大于源缓冲区整个内容的长度"
                               + Convert.ToString(somaxlen);
                goto ERROR2;
            }
            if (reclen < 24)
            {
                strErrorInfo = "头标区头5字符表示的记录长度"
                               + Convert.ToString(reclen)
                               + "小于24";
                goto ERROR2;
            }

            if (s[reclen - 1] != RECEND)
            {
                strErrorInfo = "头标区声称的结束位置不是MARC记录结束符";
                goto ERROR2;  // 结束符不正确
            }

            for (i = 0; i < reclen - 1; i++)
            {
                if (s[i] == RECEND)
                {
                    strErrorInfo = "记录内容中不能有记录结束符";
                    goto ERROR2;
                }
            }

            try
            {
                baseaddr = header.BaseAddress;
            }
            catch (FormatException ex)
            {
                strErrorInfo = "头标区数据基地址5字符 '" + header.BaseAddressString + " '不是纯数字 :" + ex.Message;
                //throw(new MarcException(strErrorInfo));
                goto ERROR2;
            }

            if (baseaddr > somaxlen)
            {
                strErrorInfo = "数据基地址值 "
                               + Convert.ToString(baseaddr)
                               + " 已经超出源缓冲区整个内容的长度 "
                               + Convert.ToString(somaxlen);
                goto ERROR2;
            }
            if (baseaddr <= 24)
            {
                strErrorInfo = "数据基地址值 "
                               + Convert.ToString(baseaddr)
                               + " 小于24";
                goto ERROR2;  // 数据基地址太小
            }
            if (s[baseaddr - 1] != FLDEND)
            {
                strErrorInfo = "没有在目次区尾部位置" + Convert.ToString(baseaddr) + "找到FLDEND符号";
                goto ERROR2;  //
            }

            try
            {
                lenoffld = header.WidthOfFieldLength;
            }
            catch (FormatException ex)
            {
                strErrorInfo = "头标区目次区字段长度1字符 '" + header.WidthOfFieldLengthString + " '不是纯数字 :" + ex.Message;
                //throw(new MarcException(strErrorInfo));
                goto ERROR2;
            }

            try
            {
                startposoffld = header.WidthOfStartPositionOfField;
            }
            catch (FormatException ex)
            {
                strErrorInfo = "头标区目次区字段起始位置1字符 '" + header.WidthOfStartPositionOfFieldString + " '不是纯数字 :" + ex.Message;
                // throw(new MarcException(strErrorInfo));
                goto ERROR2;
            }


            if (lenoffld <= 0 || lenoffld > 30)
            {
                strErrorInfo = "目次区中字段长度值占用字符数 "
                               + Convert.ToString(lenoffld)
                               + " 不正确,应在1和29之间...";
                goto ERROR2;
            }

            if (lenoffld != 4)
            {   // 2001/5/15
                strErrorInfo = "目次区中字段长度值占用字符数 "
                               + Convert.ToString(lenoffld)
                               + " 不正确,应为4...";
                goto ERROR2;
            }

            lenoffld = 4;
            if (startposoffld <= 0 || startposoffld > 30)
            {
                strErrorInfo = "目次区中字段起始位置值占用字符数 "
                               + Convert.ToString(startposoffld)
                               + " 不正确,应在1到29之间...";
                goto ERROR2;
            }

            startposoffld = 5;

            // 开始处理目次区
            // dirp = (char *)sopp;
            t    = 24;
            offs = 24;
            MyByteList baField = null;

            for (i = 0; ; i++)
            {
                if (s[offs] == FLDEND)
                {
                    break;  // 目次区结束
                }
                // 将字段名装入目标
                if (offs + 3 >= baseaddr)
                {
                    break;
                }
                if (t + 3 >= maxbytes)
                {
                    break;
                }

                /*
                 * baTarget.SetSize(t+3, CHUNK_SIZE);
                 * memcpy((char *)baTarget.GetData()+t,
                 *  dirp+offs,
                 *  3);
                 * t+=3;
                 */
                baField = new MyByteList();
                baField.AddRange(s, offs, 3);
                t += 3;


                // 得到字段长度
                offs += 3;
                if (offs + lenoffld >= baseaddr)
                {
                    break;
                }
                len = MarcHeaderStruct.IntValue(s, offs, lenoffld);

                // 得到字段内容开始地址
                offs += lenoffld;
                if (offs + startposoffld >= baseaddr)
                {
                    break;
                }
                startpos = MarcHeaderStruct.IntValue(s, offs, startposoffld);

                offs += startposoffld;
                if (offs >= baseaddr)
                {
                    break;
                }

                // 将字段内容装入目标
                if (t + len >= maxbytes)
                {
                    break;
                }
                if (s[baseaddr + startpos - 1] != FLDEND)
                {
                    // errnoiso2709 = ERROR_BADFLDCONTENT;
                    strErrorInfo = "缺乏字段结束符";
                    goto ERROR2;
                }

                if (s[baseaddr + startpos + len - 1] != FLDEND)
                {
                    //errnoiso2709 = ERROR_BADFLDCONTENT;
                    strErrorInfo = "缺乏字段结束符";
                    goto ERROR2;
                }

                /*
                 * baTarget.SetSize(t+len, CHUNK_SIZE);
                 * memcpy((char *)baTarget.GetData()+t,
                 *  sopp+baseaddr+startpos,
                 *  len);
                 * t += len;
                 */
                baField.AddRange(s, baseaddr + startpos, len == 0 ? len : len - 1);
                t += len;

                aResult.Add(baField.GetByteArray());
                baField = null;
            }

            if (t + 1 >= maxbytes)
            {
                // errnoiso2709 = ERROR_TARGETBUFFEROVERFLOW;
                strErrorInfo = "记录太大";
                goto ERROR2;  // 目标空间不够
            }

            /*
             * baField.Add((char)RECEND);
             * t ++;
             */

            /*
             * baTarget.SetSize(t+1, CHUNK_SIZE);
             *((char *)baTarget.GetData() + t++) = RECEND;
             * if (t+1>=maxbytes)
             * {
             *  errnoiso2709 = ERROR_TARGETBUFFEROVERFLOW;
             *  goto ERROR1;  // 目标空间不够
             * }
             */

            Debug.Assert(t != -2, "");
            return(t);

            //ERROR1:
            //	return -1;	// 一般性错误
ERROR2:
            // 调试用
            Debug.Assert(false, "");
            return(-2);  // MARC格式错误
        }
Exemple #3
0
        // 强制将ISO2709格式记录转换为字段数组
        // 本函数采用的算法是将目次区的地址和长度忽略,只取3字符的字段名
        // aResult的每个元素为byte[]型,内容是一个字段。第一个元素是头标区,一定是24bytes
        // return:
        //	-1	一般性错误
        //	-2	MARC格式错误
        public static int ForceCvt2709ToFieldArray(
            ref Encoding encoding,  // 2007/7/11 函数内可能发生变化
            byte[] s,
            out List <byte[]> aResult,
            out string strErrorInfo)
        {
            strErrorInfo = "";
            aResult      = new List <byte[]>();

            Debug.Assert(s != null, "");

            List <MyByteList> results = new List <MyByteList>();

            bool bUcs2 = false;

            if (encoding.Equals(Encoding.Unicode) == true)
            {
                bUcs2 = true;
            }

            if (bUcs2 == true)
            {
                string strRecord = encoding.GetString(s);

                // 变换成UTF-8编码方式处理
                s        = Encoding.UTF8.GetBytes(strRecord);
                encoding = Encoding.UTF8;
            }

            MarcHeaderStruct header = null;

            try
            {
                header = new MarcHeaderStruct(encoding, s);
            }
            catch (ArgumentException)
            {
                // 不足 24 字符的,给与宽容
                header = new MarcHeaderStruct(Encoding.ASCII, Encoding.ASCII.GetBytes("012345678901234567890123"));
            }
            header.ForceUNIMARCHeader();        // 强制将某些位置设置为缺省值

            results.Add(header.GetByteList());

            int somaxlen;
            int offs;
            int i, j;

            somaxlen = s.Length;

            // 开始处理目次区
            offs = 24;
            MyByteList baField = null;
            bool       bFound  = false;

            for (i = 0; ; i++)
            {
                bFound = false;
                for (j = offs; j < offs + 3 + 4 + 5; j++)
                {
                    if (j >= somaxlen)
                    {
                        break;
                    }
                    if (s[j] == FLDEND)
                    {
                        bFound = true;
                        break;
                    }
                }

                if (j >= somaxlen)
                {
                    offs = j;
                    break;
                }

                if (bFound == true)
                {
                    if (j <= offs + 3)
                    {
                        offs = j + 1;
                        break;
                    }
                }


                // 将字段名装入目标
                baField = new MyByteList();
                baField.AddRange(s, offs, 3);

                results.Add(baField);
                baField = null;
                // 得到字段内容开始地址
                offs += 3;
                offs += 4;
                offs += 5;

                if (bFound == true)
                {
                    offs = j + 1;
                    break;
                }
            }

            if (offs >= somaxlen)
            {
                return(0);
            }

            int nFieldNumber = 1;

            baField = null;
            // 加入对应的字段内容
            for (; offs < somaxlen; offs++)
            {
                byte c = s[offs];
                if (c == RECEND)
                {
                    break;
                }
                if (c == FLDEND)
                {
                    nFieldNumber++;
                    baField = null;
                }
                else
                {
                    if (baField == null)
                    {
                        // 确保下标不越界
                        while (nFieldNumber >= results.Count)
                        {
                            MyByteList temp = new MyByteList();
                            temp.Add((byte)'?');
                            temp.Add((byte)'?');
                            temp.Add((byte)'?');
                            results.Add(temp);
                        }
                        baField = results[nFieldNumber];
                    }

                    baField.Add(c);
                }
            }

            aResult = new List <byte[]>();
            foreach (MyByteList list in results)
            {
                aResult.Add(list.GetByteArray());
            }

            return(0);
            //		ERROR1:
            //			return -1;	// 一般性错误
            //		ERROR2:
            //			return -2;	// MARC格式错误
        }