public PieceDescriptor(byte[] buf, int offset) { descriptor = LittleEndian.GetShort(buf, offset); offset += LittleEndianConsts.SHORT_SIZE; fc = LittleEndian.GetInt(buf, offset); offset += LittleEndianConsts.INT_SIZE; prm = new PropertyModifier(LittleEndian.GetShort(buf, offset)); // see if this piece uses unicode. if ((fc & 0x40000000) == 0) { unicode = true; } else { unicode = false; fc &= ~(0x40000000);//gives me FC in doc stream fc /= 2; } }
public override bool Equals(Object obj) { if (this == obj) { return(true); } if (obj == null) { return(false); } if (this.GetType() != obj.GetType()) { return(false); } PropertyModifier other = (PropertyModifier)obj; if (value != other.value) { return(false); } return(true); }
public void Rebuild(ComplexFileTable complexFileTable) { long start = DateTime.Now.Ticks; if (complexFileTable != null) { SprmBuffer[] sprmBuffers = complexFileTable.GetGrpprls(); // adding CHPX from fast-saved SPRMs foreach (TextPiece textPiece in complexFileTable.GetTextPieceTable() .TextPieces) { PropertyModifier prm = textPiece.PieceDescriptor.Prm; if (!prm.IsComplex()) { continue; } int igrpprl = prm.GetIgrpprl(); if (igrpprl < 0 || igrpprl >= sprmBuffers.Length) { logger.Log(POILogger.WARN, textPiece + "'s PRM references to unknown grpprl"); continue; } bool hasChp = false; SprmBuffer sprmBuffer = sprmBuffers[igrpprl]; for (SprmIterator iterator = sprmBuffer.Iterator(); ; iterator .HasNext()) { SprmOperation sprmOperation = iterator.Next(); if (sprmOperation.Type == SprmOperation.TYPE_CHP) { hasChp = true; break; } } if (hasChp) { SprmBuffer newSprmBuffer; newSprmBuffer = (SprmBuffer)sprmBuffer.Clone(); CHPX chpx = new CHPX(textPiece.Start, textPiece.End, newSprmBuffer); _textRuns.Add(chpx); } } logger.Log(POILogger.DEBUG, "Merged with CHPX from complex file table in ", DateTime.Now.Ticks - start, " ms (", _textRuns.Count, " elements in total)"); start = DateTime.Now.Ticks; } List <CHPX> oldChpxSortedByStartPos = new List <CHPX>(_textRuns); oldChpxSortedByStartPos.Sort( (IComparer <CHPX>)PropertyNode.CHPXComparator.instance); logger.Log(POILogger.DEBUG, "CHPX sorted by start position in ", DateTime.Now.Ticks - start, " ms"); start = DateTime.Now.Ticks; Dictionary <CHPX, int> chpxToFileOrder = new Dictionary <CHPX, int>(); int counter = 0; foreach (CHPX chpx in _textRuns) { chpxToFileOrder.Add(chpx, counter++); } logger.Log(POILogger.DEBUG, "CHPX's order map created in ", DateTime.Now.Ticks - start, " ms"); start = DateTime.Now.Ticks; List <int> textRunsBoundariesList; List <int> textRunsBoundariesSet = new List <int>(); foreach (CHPX chpx in _textRuns) { textRunsBoundariesSet.Add(chpx.Start); textRunsBoundariesSet.Add(chpx.End); } textRunsBoundariesSet.Remove(0); textRunsBoundariesList = new List <int>( textRunsBoundariesSet); textRunsBoundariesList.Sort(); logger.Log(POILogger.DEBUG, "Texts CHPX boundaries collected in ", DateTime.Now.Ticks - start, " ms"); start = DateTime.Now.Ticks; List <CHPX> newChpxs = new List <CHPX>(); int lastTextRunStart = 0; foreach (int objBoundary in textRunsBoundariesList) { int boundary = objBoundary; int startInclusive = lastTextRunStart; int endExclusive = boundary; lastTextRunStart = endExclusive; int startPosition = BinarySearch(oldChpxSortedByStartPos, boundary); startPosition = Math.Abs(startPosition); while (startPosition >= oldChpxSortedByStartPos.Count) { startPosition--; } while (startPosition > 0 && oldChpxSortedByStartPos[startPosition].Start >= boundary) { startPosition--; } List <CHPX> chpxs = new List <CHPX>(); for (int c = startPosition; c < oldChpxSortedByStartPos.Count; c++) { CHPX chpx = oldChpxSortedByStartPos[c]; if (boundary < chpx.Start) { break; } int left = Math.Max(startInclusive, chpx.Start); int right = Math.Min(endExclusive, chpx.End); if (left < right) { chpxs.Add(chpx); } } if (chpxs.Count == 0) { logger.Log(POILogger.WARN, "Text piece [", startInclusive, "; ", endExclusive, ") has no CHPX. Creating new one."); // create it manually CHPX chpx = new CHPX(startInclusive, endExclusive, new SprmBuffer(0)); newChpxs.Add(chpx); continue; } if (chpxs.Count == 1) { // can we reuse existing? CHPX existing = chpxs[0]; if (existing.Start == startInclusive && existing.End == endExclusive) { newChpxs.Add(existing); continue; } } CHPXToFileComparer chpxFileOrderComparator = new CHPXToFileComparer(chpxToFileOrder); chpxs.Sort(chpxFileOrderComparator); SprmBuffer sprmBuffer = new SprmBuffer(0); foreach (CHPX chpx in chpxs) { sprmBuffer.Append(chpx.GetGrpprl(), 0); } CHPX newChpx = new CHPX(startInclusive, endExclusive, sprmBuffer); newChpxs.Add(newChpx); continue; } this._textRuns = new List <CHPX>(newChpxs); logger.Log(POILogger.DEBUG, "CHPX rebuilded in ", DateTime.Now.Ticks - start, " ms (", _textRuns.Count, " elements)"); start = DateTime.Now.Ticks; CHPX previous = null; for (int iterator = _textRuns.Count; iterator != 0;) { CHPX current = previous; previous = _textRuns[--iterator]; if (current == null) { continue; } if (previous.End == current.Start && Arrays .Equals(previous.GetGrpprl(), current.GetGrpprl())) { previous.End = current.End; _textRuns.Remove(current); continue; } previous = current; } logger.Log(POILogger.DEBUG, "CHPX compacted in ", DateTime.Now.Ticks - start, " ms (", _textRuns.Count, " elements)"); }
public void Rebuild(StringBuilder docText, ComplexFileTable complexFileTable) { long start = DateTime.Now.Ticks; if (complexFileTable != null) { SprmBuffer[] sprmBuffers = complexFileTable.GetGrpprls(); // adding PAPX from fast-saved SPRMs foreach (TextPiece textPiece in complexFileTable.GetTextPieceTable() .TextPieces) { PropertyModifier prm = textPiece.PieceDescriptor.Prm; if (!prm.IsComplex()) { continue; } int igrpprl = prm.GetIgrpprl(); if (igrpprl < 0 || igrpprl >= sprmBuffers.Length) { logger.Log(POILogger.WARN, textPiece + "'s PRM references to unknown grpprl"); continue; } bool hasPap = false; SprmBuffer sprmBuffer = sprmBuffers[igrpprl]; for (SprmIterator iterator = sprmBuffer.Iterator(); iterator .HasNext();) { SprmOperation sprmOperation = iterator.Next(); if (sprmOperation.Type == SprmOperation.TYPE_PAP) { hasPap = true; break; } } if (hasPap) { SprmBuffer newSprmBuffer = new SprmBuffer(2); newSprmBuffer.Append(sprmBuffer.ToByteArray()); PAPX papx = new PAPX(textPiece.Start, textPiece.End, newSprmBuffer); _paragraphs.Add(papx); } } logger.Log(POILogger.DEBUG, "Merged (?) with PAPX from complex file table in ", DateTime.Now.Ticks - start, " ms (", _paragraphs.Count, " elements in total)"); start = DateTime.Now.Ticks; } List <PAPX> oldPapxSortedByEndPos = new List <PAPX>(_paragraphs); oldPapxSortedByEndPos.Sort( (IComparer <PAPX>)PropertyNode.PAPXComparator.instance); logger.Log(POILogger.DEBUG, "PAPX sorted by end position in ", DateTime.Now.Ticks - start, " ms"); start = DateTime.Now.Ticks; Dictionary <PAPX, int> papxToFileOrder = new Dictionary <PAPX, int>(); int counter = 0; foreach (PAPX papx in _paragraphs) { papxToFileOrder[papx] = counter++; } logger.Log(POILogger.DEBUG, "PAPX's order map created in ", DateTime.Now.Ticks - start, " ms"); start = DateTime.Now.Ticks; List <PAPX> newPapxs = new List <PAPX>(); int lastParStart = 0; int lastPapxIndex = 0; for (int charIndex = 0; charIndex < docText.Length; charIndex++) { char c = docText[charIndex]; if (c != 13 && c != 7 && c != 12) { continue; } int startInclusive = lastParStart; int endExclusive = charIndex + 1; bool broken = false; List <PAPX> papxs = new List <PAPX>(); for (int papxIndex = lastPapxIndex; papxIndex < oldPapxSortedByEndPos .Count; papxIndex++) { broken = false; PAPX papx = oldPapxSortedByEndPos[papxIndex]; if (papx.End - 1 > charIndex) { lastPapxIndex = papxIndex; broken = true; break; } papxs.Add(papx); } if (!broken) { lastPapxIndex = oldPapxSortedByEndPos.Count - 1; } if (papxs.Count == 0) { logger.Log(POILogger.WARN, "Paragraph [", startInclusive, "; ", endExclusive, ") has no PAPX. Creating new one."); // create it manually PAPX papx = new PAPX(startInclusive, endExclusive, new SprmBuffer(2)); newPapxs.Add(papx); lastParStart = endExclusive; continue; } if (papxs.Count == 1) { // can we reuse existing? PAPX existing = papxs[0]; if (existing.Start == startInclusive && existing.End == endExclusive) { newPapxs.Add(existing); lastParStart = endExclusive; continue; } } PAPXToFileComparer papxFileOrderComparator = new PAPXToFileComparer(papxToFileOrder); // restore file order of PAPX papxs.Sort(papxFileOrderComparator); SprmBuffer sprmBuffer = null; foreach (PAPX papx in papxs) { if (sprmBuffer == null) { sprmBuffer = (SprmBuffer)papx.GetSprmBuf().Clone(); } else { sprmBuffer.Append(papx.GetGrpprl(), 2); } } PAPX newPapx = new PAPX(startInclusive, endExclusive, sprmBuffer); newPapxs.Add(newPapx); lastParStart = endExclusive; continue; } this._paragraphs = new List <PAPX>(newPapxs); logger.Log(POILogger.DEBUG, "PAPX rebuilded from document text in ", DateTime.Now.Ticks - start, " ms (", _paragraphs.Count, " elements)"); start = DateTime.Now.Ticks; }