/// <summary> /// Create a new sequence from an existing content byte array. /// <para /> /// The entire array (indexes 0 through length-1) is used as the content. /// </summary> /// <param name="input"> /// the content array. The array is never modified, so passing /// through cached arrays is safe. /// </param> internal RawText(byte[] input) { content = input; lines = RawParseUtils.lineMap(content, 0, content.Length); hashes = computeHashes(); }
/** * Index the region between <code>[ptr, end)</code> to find line starts. * <para /> * The returned list is 1 indexed. Index 0 contains * {@link Integer#MIN_VALUE} to pad the list out. * <para /> * Using a 1 indexed list means that line numbers can be directly accessed * from the list, so <code>list.get(1)</code> (aka get line 1) returns * <code>ptr</code>. * <para /> * The last element (index <code>map.size()-1</code>) always contains * <code>end</code>. * * @param buf * buffer to scan. * @param ptr * position within the buffer corresponding to the first byte of * line 1. * @param end * 1 past the end of the content within <code>buf</code>. * @return a line map indexing the start position of each line. */ internal static IntList lineMap(byte[] buf, int ptr, int end) { // Experimentally derived from multiple source repositories // the average number of bytes/line is 36. Its a rough guess // to initially size our map close to the target. // IntList map = new IntList((end - ptr) / 36); map.fillTo(1, int.MinValue); for (; ptr < end; ptr = nextLF(buf, ptr)) map.add(ptr); map.add(end); return map; }
private IntList computeHashes() { var r = new IntList(lines.size()); r.add(0); for (int lno = 1; lno < lines.size() - 1; lno++) { int ptr = lines.get(lno); int end = lines.get(lno + 1); r.add(HashLine(content, ptr, end)); } r.add(0); return r; }