private CompiledNode compileNode(UnCompiledNode <T> nodeIn, int tailLength) { long node; long bytesPosStart = bytes.getPosition(); //TODO: deduphash node = fst.addNode(this, nodeIn); Debug.Assert(node != -2); long bytesPosEnd = bytes.getPosition(); if (bytesPosEnd != bytesPosStart) { // The FST added a new node: Debug.Assert(bytesPosEnd > bytesPosStart); lastFrozenNode = node; } nodeIn.clear(); CompiledNode fn = new CompiledNode(); fn.node = node; return(fn); }
// serializes new node by appending its bytes to the end // of the current byte[] public long addNode(Builder <T> builder, UnCompiledNode <T> nodeIn) { T NO_OUTPUT = outputs.getNoOutput(); if (nodeIn.numArcs == 0) { if (nodeIn.isFinal) { return(FINAL_END_NODE); } else { return(NON_FINAL_END_NODE); } } long startAddress = builder.bytes.getPosition(); bool doFixedLengthArcs = shouldExpandNodeWithFixedLengthArcs(builder, nodeIn); if (doFixedLengthArcs) { if (builder.numBytesPerArc.Length < nodeIn.numArcs) { builder.numBytesPerArc = new int[ArrayUtil.oversize(nodeIn.numArcs, 4)]; builder.numLabelBytesPerArc = new int[builder.numBytesPerArc.Length]; } } builder.arcCount += nodeIn.numArcs; int lastArc = nodeIn.numArcs - 1; long lastArcStart = builder.bytes.getPosition(); int maxBytesPerArc = 0; int maxBytesPerArcWithoutLabel = 0; for (int arcIdx = 0; arcIdx < nodeIn.numArcs; arcIdx++) { Arc <T> arc = nodeIn.arcs[arcIdx]; CompiledNode target = (CompiledNode)arc.target; int flags = 0; if (arcIdx == lastArc) { flags += BIT_LAST_ARC; } if (builder.lastFrozenNode == target.node && !doFixedLengthArcs) { // TODO: for better perf (but more RAM used) we // could avoid this except when arc is "near" the // last arc: flags += BIT_TARGET_NEXT; } if (arc.isFinal) { flags += BIT_FINAL_ARC; if (!NO_OUTPUT.Equals(arc.nextFinalOutput)) { flags += BIT_ARC_HAS_FINAL_OUTPUT; } } else { Debug.Assert(NO_OUTPUT.Equals(arc.nextFinalOutput)); } bool targetHasArcs = target.node > 0; if (!targetHasArcs) { flags += BIT_STOP_NODE; } if (!NO_OUTPUT.Equals(arc.output)) { flags += BIT_ARC_HAS_OUTPUT; } builder.bytes.writeByte((byte)flags); long labelStart = builder.bytes.getPosition(); writeLabel(builder.bytes, arc.label); int numLabelBytes = (int)(builder.bytes.getPosition() - labelStart); if (!NO_OUTPUT.Equals(arc.output)) { throw new NotImplementedException(); //TODO: outputs.write(arc.output, builder.bytes); } if (!NO_OUTPUT.Equals(arc.nextFinalOutput)) { throw new NotImplementedException(); //TODO: outputs.writeFinalOutput(arc.nextFinalOutput, builder.bytes); } if (targetHasArcs && (flags & BIT_TARGET_NEXT) == 0) { Debug.Assert(target.node > 0); builder.bytes.writeVLong(target.node); } // just write the arcs "like normal" on first pass, but record how many bytes each one took // and max byte size: if (doFixedLengthArcs) { int numArcBytes = (int)(builder.bytes.getPosition() - lastArcStart); builder.numBytesPerArc[arcIdx] = numArcBytes; builder.numLabelBytesPerArc[arcIdx] = numLabelBytes; lastArcStart = builder.bytes.getPosition(); maxBytesPerArc = Math.Max(maxBytesPerArc, numArcBytes); maxBytesPerArcWithoutLabel = Math.Max(maxBytesPerArcWithoutLabel, numArcBytes - numLabelBytes); } } if (doFixedLengthArcs) { Debug.Assert(maxBytesPerArc > 0); // 2nd pass just "expands" all arcs to take up a fixed byte size int labelRange = nodeIn.arcs[nodeIn.numArcs - 1].label - nodeIn.arcs[0].label + 1; Debug.Assert(labelRange > 0); if (shouldExpandNodeWithDirectAddressing(builder, nodeIn, maxBytesPerArc, maxBytesPerArcWithoutLabel, labelRange)) { //writeNodeForDirectAddressing(builder, nodeIn, startAddress, maxBytesPerArcWithoutLabel, labelRange); //builder.directAddressingNodeCount++; throw new NotImplementedException(); } else { writeNodeForBinarySearch(builder, nodeIn, startAddress, maxBytesPerArc); builder.binarySearchNodeCount++; } } long thisNodeAddress = builder.bytes.getPosition() - 1; builder.bytes.reverse(startAddress, thisNodeAddress); builder.nodeCount++; return(thisNodeAddress); }