Пример #1
0
        public PieceDescriptor(byte[] buf, int offset)
        {
            descriptor = LittleEndian.GetShort(buf, offset);
            offset    += LittleEndianConsts.SHORT_SIZE;
            fc         = LittleEndian.GetInt(buf, offset);
            offset    += LittleEndianConsts.INT_SIZE;
            prm        = new PropertyModifier(LittleEndian.GetShort(buf, offset));

            // see if this piece uses unicode.
            if ((fc & 0x40000000) == 0)
            {
                unicode = true;
            }
            else
            {
                unicode = false;
                fc     &= ~(0x40000000);//gives me FC in doc stream
                fc     /= 2;
            }
        }
Пример #2
0
        public PieceDescriptor(byte[] buf, int offset)
        {
            descriptor = LittleEndian.GetShort(buf, offset);
            offset += LittleEndianConsts.SHORT_SIZE;
            fc = LittleEndian.GetInt(buf, offset);
            offset += LittleEndianConsts.INT_SIZE;
            prm = new PropertyModifier(LittleEndian.GetShort(buf, offset));

            // see if this piece uses unicode.
            if ((fc & 0x40000000) == 0)
            {
                unicode = true;
            }
            else
            {
                unicode = false;
                fc &= ~(0x40000000);//gives me FC in doc stream
                fc /= 2;
            }

        }
Пример #3
0
        public override bool Equals(Object obj)
        {
            if (this == obj)
            {
                return(true);
            }
            if (obj == null)
            {
                return(false);
            }
            if (this.GetType() != obj.GetType())
            {
                return(false);
            }
            PropertyModifier other = (PropertyModifier)obj;

            if (value != other.value)
            {
                return(false);
            }
            return(true);
        }
Пример #4
0
        public void Rebuild(ComplexFileTable complexFileTable)
        {
            long start = DateTime.Now.Ticks;

            if (complexFileTable != null)
            {
                SprmBuffer[] sprmBuffers = complexFileTable.GetGrpprls();

                // adding CHPX from fast-saved SPRMs
                foreach (TextPiece textPiece in complexFileTable.GetTextPieceTable()
                         .TextPieces)
                {
                    PropertyModifier prm = textPiece.PieceDescriptor.Prm;
                    if (!prm.IsComplex())
                    {
                        continue;
                    }
                    int igrpprl = prm.GetIgrpprl();

                    if (igrpprl < 0 || igrpprl >= sprmBuffers.Length)
                    {
                        logger.Log(POILogger.WARN, textPiece
                                   + "'s PRM references to unknown grpprl");
                        continue;
                    }

                    bool       hasChp     = false;
                    SprmBuffer sprmBuffer = sprmBuffers[igrpprl];
                    for (SprmIterator iterator = sprmBuffer.Iterator(); ; iterator
                         .HasNext())
                    {
                        SprmOperation sprmOperation = iterator.Next();
                        if (sprmOperation.Type == SprmOperation.TYPE_CHP)
                        {
                            hasChp = true;
                            break;
                        }
                    }

                    if (hasChp)
                    {
                        SprmBuffer newSprmBuffer;
                        newSprmBuffer = (SprmBuffer)sprmBuffer.Clone();


                        CHPX chpx = new CHPX(textPiece.Start,
                                             textPiece.End, newSprmBuffer);
                        _textRuns.Add(chpx);
                    }
                }
                logger.Log(POILogger.DEBUG,
                           "Merged with CHPX from complex file table in ",
                           DateTime.Now.Ticks - start,
                           " ms (", _textRuns.Count,
                           " elements in total)");
                start = DateTime.Now.Ticks;
            }

            List <CHPX> oldChpxSortedByStartPos = new List <CHPX>(_textRuns);

            oldChpxSortedByStartPos.Sort(
                (IComparer <CHPX>)PropertyNode.CHPXComparator.instance);

            logger.Log(POILogger.DEBUG, "CHPX sorted by start position in ",
                       DateTime.Now.Ticks - start, " ms");
            start = DateTime.Now.Ticks;

            Dictionary <CHPX, int> chpxToFileOrder = new Dictionary <CHPX, int>();

            int counter = 0;

            foreach (CHPX chpx in _textRuns)
            {
                chpxToFileOrder.Add(chpx, counter++);
            }


            logger.Log(POILogger.DEBUG, "CHPX's order map created in ",
                       DateTime.Now.Ticks - start, " ms");
            start = DateTime.Now.Ticks;

            List <int> textRunsBoundariesList;

            List <int> textRunsBoundariesSet = new List <int>();

            foreach (CHPX chpx in _textRuns)
            {
                textRunsBoundariesSet.Add(chpx.Start);
                textRunsBoundariesSet.Add(chpx.End);
            }
            textRunsBoundariesSet.Remove(0);
            textRunsBoundariesList = new List <int>(
                textRunsBoundariesSet);
            textRunsBoundariesList.Sort();


            logger.Log(POILogger.DEBUG, "Texts CHPX boundaries collected in ",
                       DateTime.Now.Ticks - start, " ms");
            start = DateTime.Now.Ticks;

            List <CHPX> newChpxs         = new List <CHPX>();
            int         lastTextRunStart = 0;

            foreach (int objBoundary in textRunsBoundariesList)
            {
                int boundary = objBoundary;

                int startInclusive = lastTextRunStart;
                int endExclusive   = boundary;
                lastTextRunStart = endExclusive;

                int startPosition = BinarySearch(oldChpxSortedByStartPos, boundary);
                startPosition = Math.Abs(startPosition);
                while (startPosition >= oldChpxSortedByStartPos.Count)
                {
                    startPosition--;
                }
                while (startPosition > 0 &&
                       oldChpxSortedByStartPos[startPosition].Start >= boundary)
                {
                    startPosition--;
                }

                List <CHPX> chpxs = new List <CHPX>();
                for (int c = startPosition; c < oldChpxSortedByStartPos.Count; c++)
                {
                    CHPX chpx = oldChpxSortedByStartPos[c];

                    if (boundary < chpx.Start)
                    {
                        break;
                    }

                    int left  = Math.Max(startInclusive, chpx.Start);
                    int right = Math.Min(endExclusive, chpx.End);

                    if (left < right)
                    {
                        chpxs.Add(chpx);
                    }
                }

                if (chpxs.Count == 0)
                {
                    logger.Log(POILogger.WARN, "Text piece [",
                               startInclusive, "; ",
                               endExclusive,
                               ") has no CHPX. Creating new one.");
                    // create it manually
                    CHPX chpx = new CHPX(startInclusive, endExclusive,
                                         new SprmBuffer(0));
                    newChpxs.Add(chpx);
                    continue;
                }

                if (chpxs.Count == 1)
                {
                    // can we reuse existing?
                    CHPX existing = chpxs[0];
                    if (existing.Start == startInclusive &&
                        existing.End == endExclusive)
                    {
                        newChpxs.Add(existing);
                        continue;
                    }
                }
                CHPXToFileComparer chpxFileOrderComparator = new CHPXToFileComparer(chpxToFileOrder);
                chpxs.Sort(chpxFileOrderComparator);

                SprmBuffer sprmBuffer = new SprmBuffer(0);
                foreach (CHPX chpx in chpxs)
                {
                    sprmBuffer.Append(chpx.GetGrpprl(), 0);
                }
                CHPX newChpx = new CHPX(startInclusive, endExclusive, sprmBuffer);
                newChpxs.Add(newChpx);

                continue;
            }
            this._textRuns = new List <CHPX>(newChpxs);

            logger.Log(POILogger.DEBUG, "CHPX rebuilded in ",
                       DateTime.Now.Ticks - start, " ms (",
                       _textRuns.Count, " elements)");
            start = DateTime.Now.Ticks;

            CHPX previous = null;

            for (int iterator = _textRuns.Count; iterator != 0;)
            {
                CHPX current = previous;

                previous = _textRuns[--iterator];
                if (current == null)
                {
                    continue;
                }

                if (previous.End == current.Start &&
                    Arrays
                    .Equals(previous.GetGrpprl(), current.GetGrpprl()))
                {
                    previous.End = current.End;
                    _textRuns.Remove(current);
                    continue;
                }

                previous = current;
            }

            logger.Log(POILogger.DEBUG, "CHPX compacted in ",
                       DateTime.Now.Ticks - start, " ms (",
                       _textRuns.Count, " elements)");
        }
Пример #5
0
        public void Rebuild(StringBuilder docText,
                            ComplexFileTable complexFileTable)
        {
            long start = DateTime.Now.Ticks;

            if (complexFileTable != null)
            {
                SprmBuffer[] sprmBuffers = complexFileTable.GetGrpprls();

                // adding PAPX from fast-saved SPRMs
                foreach (TextPiece textPiece in complexFileTable.GetTextPieceTable()
                         .TextPieces)
                {
                    PropertyModifier prm = textPiece.PieceDescriptor.Prm;
                    if (!prm.IsComplex())
                    {
                        continue;
                    }
                    int igrpprl = prm.GetIgrpprl();

                    if (igrpprl < 0 || igrpprl >= sprmBuffers.Length)
                    {
                        logger.Log(POILogger.WARN, textPiece
                                   + "'s PRM references to unknown grpprl");
                        continue;
                    }

                    bool       hasPap     = false;
                    SprmBuffer sprmBuffer = sprmBuffers[igrpprl];
                    for (SprmIterator iterator = sprmBuffer.Iterator(); iterator
                         .HasNext();)
                    {
                        SprmOperation sprmOperation = iterator.Next();
                        if (sprmOperation.Type == SprmOperation.TYPE_PAP)
                        {
                            hasPap = true;
                            break;
                        }
                    }

                    if (hasPap)
                    {
                        SprmBuffer newSprmBuffer = new SprmBuffer(2);
                        newSprmBuffer.Append(sprmBuffer.ToByteArray());

                        PAPX papx = new PAPX(textPiece.Start,
                                             textPiece.End, newSprmBuffer);
                        _paragraphs.Add(papx);
                    }
                }

                logger.Log(POILogger.DEBUG,
                           "Merged (?) with PAPX from complex file table in ",
                           DateTime.Now.Ticks - start,
                           " ms (", _paragraphs.Count,
                           " elements in total)");
                start = DateTime.Now.Ticks;
            }

            List <PAPX> oldPapxSortedByEndPos = new List <PAPX>(_paragraphs);

            oldPapxSortedByEndPos.Sort(
                (IComparer <PAPX>)PropertyNode.PAPXComparator.instance);

            logger.Log(POILogger.DEBUG, "PAPX sorted by end position in ",
                       DateTime.Now.Ticks - start, " ms");
            start = DateTime.Now.Ticks;

            Dictionary <PAPX, int> papxToFileOrder = new Dictionary <PAPX, int>();
            int counter = 0;

            foreach (PAPX papx in _paragraphs)
            {
                papxToFileOrder[papx] = counter++;
            }

            logger.Log(POILogger.DEBUG, "PAPX's order map created in ",
                       DateTime.Now.Ticks - start, " ms");
            start = DateTime.Now.Ticks;

            List <PAPX> newPapxs      = new List <PAPX>();
            int         lastParStart  = 0;
            int         lastPapxIndex = 0;

            for (int charIndex = 0; charIndex < docText.Length; charIndex++)
            {
                char c = docText[charIndex];
                if (c != 13 && c != 7 && c != 12)
                {
                    continue;
                }

                int startInclusive = lastParStart;
                int endExclusive   = charIndex + 1;

                bool        broken = false;
                List <PAPX> papxs  = new List <PAPX>();
                for (int papxIndex = lastPapxIndex; papxIndex < oldPapxSortedByEndPos
                     .Count; papxIndex++)
                {
                    broken = false;
                    PAPX papx = oldPapxSortedByEndPos[papxIndex];


                    if (papx.End - 1 > charIndex)
                    {
                        lastPapxIndex = papxIndex;
                        broken        = true;
                        break;
                    }

                    papxs.Add(papx);
                }
                if (!broken)
                {
                    lastPapxIndex = oldPapxSortedByEndPos.Count - 1;
                }

                if (papxs.Count == 0)
                {
                    logger.Log(POILogger.WARN, "Paragraph [",
                               startInclusive, "; ",
                               endExclusive,
                               ") has no PAPX. Creating new one.");
                    // create it manually
                    PAPX papx = new PAPX(startInclusive, endExclusive,
                                         new SprmBuffer(2));
                    newPapxs.Add(papx);

                    lastParStart = endExclusive;
                    continue;
                }

                if (papxs.Count == 1)
                {
                    // can we reuse existing?
                    PAPX existing = papxs[0];
                    if (existing.Start == startInclusive &&
                        existing.End == endExclusive)
                    {
                        newPapxs.Add(existing);
                        lastParStart = endExclusive;
                        continue;
                    }
                }
                PAPXToFileComparer papxFileOrderComparator = new PAPXToFileComparer(papxToFileOrder);
                // restore file order of PAPX
                papxs.Sort(papxFileOrderComparator);

                SprmBuffer sprmBuffer = null;
                foreach (PAPX papx in papxs)
                {
                    if (sprmBuffer == null)
                    {
                        sprmBuffer = (SprmBuffer)papx.GetSprmBuf().Clone();
                    }

                    else
                    {
                        sprmBuffer.Append(papx.GetGrpprl(), 2);
                    }
                }
                PAPX newPapx = new PAPX(startInclusive, endExclusive, sprmBuffer);
                newPapxs.Add(newPapx);

                lastParStart = endExclusive;
                continue;
            }
            this._paragraphs = new List <PAPX>(newPapxs);

            logger.Log(POILogger.DEBUG, "PAPX rebuilded from document text in ",
                       DateTime.Now.Ticks - start, " ms (",
                       _paragraphs.Count, " elements)");
            start = DateTime.Now.Ticks;
        }