public void Build(string sa_name, SequenceBuilder seq_builder = null, BitmapFromBitStream bitmap_builder = null) { if (seq_builder == null) { seq_builder = SequenceBuilders.GetSeqXLB_DiffSetRL2_64(16, 63); } using (var Input = new BinaryReader (File.OpenRead (sa_name + ".structs"))) { this.newF = RankSelectGenericIO.Load (Input); if (bitmap_builder != null) { var newF_stream = new BitStream32(); for (int i = 0; i < this.newF.Count; ++i) { newF_stream.Write (this.newF.Access(i)); } this.newF = bitmap_builder(new FakeBitmap(newF_stream)); } int len = this.newF.Count1; this.charT = new int[len]; // Console.WriteLine ("*****>> charT => {0} bytes", this.charT.Length * 4); PrimitiveIO<int>.ReadFromFile (Input, len, this.charT); } using (var Input = new BinaryReader (File.OpenRead (sa_name + ".psi"))) { int seqlen = this.newF.Count; var seq = new int[seqlen]; var L = new List<int>(this.N/this.Sigma + 1); int curr = 0; for (int i = 1; i <= this.AlphabetSize; i++) { int next; if (i == this.AlphabetSize) { next = this.newF.Count; } else { next = this.newF.Select1 (i + 1); } int len = next - curr; L.Clear(); PrimitiveIO<int>.ReadFromFile (Input, len, L); for (int j = 0; j < len; ++j) { var x = L[j]; try { seq[ x ] = i - 1; } catch (Exception e) { Console.WriteLine ("== i: {0}, j: {1}, x: {2}, seq-count: {3}, len: {4}", i, j, x, seq.Length, len); throw e; } } curr = next; } this.SeqPsi = seq_builder(seq, this.AlphabetSize); } using (var Input = new BinaryReader (File.OpenRead (sa_name + ".samples"))) { this.SA_sample_step = Input.ReadInt16 (); this.SA_marked = RankSelectGenericIO.Load (Input); var _samples = new ListIFS (); _samples.Load (Input); var _invsamples = new ListIFS (); _invsamples.Load (Input); this.SA_samples = _samples; this.SA_invsamples = _invsamples; } }
public void Build(IList<int> seq, int sigma, PermutationBuilder perm_builder, BitmapFromBitStream bitmap_builder) { // NOTE: Please check sigma <=> BlockSize in this method this.sigma = sigma; this.n = seq.Count; var B_stream = new BitStream32 (); var X_stream = new BitStream32[ sigma ]; for (int i = 0; i < sigma; i++) { X_stream [i] = new BitStream32 (); } var lists = new List<int>[sigma]; for (int i = 0; i < sigma; i++) { lists [i] = new List<int> (); } int num_blocks = (int)Math.Ceiling (this.n * 1.0 / this.sigma); this.perms = new List<IPermutation> (num_blocks); for (int i = 0; i < this.n; i+= this.sigma) { // writing block separators foreach (var b in X_stream) { b.Write (true); } // clearing perm B // selecting block size int s = Math.Min (this.n - i, this.sigma); this.BuildPermInvIndex (seq, i, s, lists); var P = new List<int> (s); for (int j = 0; j < this.sigma; j++) { var c = lists [j].Count; B_stream.Write (false); if (c > 0) { X_stream [j].Write (false, c); B_stream.Write (true, c); foreach (var u in lists[j]) { P.Add (u); } } } var _perm = perm_builder(P); this.perms.Add (_perm); } var _X_stream = X_stream [0]; for (int i = 1; i < X_stream.Length; i++) { var _X_curr = X_stream [i]; for (int j = 0; j < _X_curr.CountBits; j++) { // esto se podria hace por entero en lugar de bit _X_stream.Write (_X_curr [j]); } } // If we write a zero at the end of the streams the code is simplified _X_stream.Write (true); B_stream.Write (false); this.B = bitmap_builder (new FakeBitmap (B_stream)); this.X = bitmap_builder (new FakeBitmap (_X_stream)); this.compute_num_blocks (); }
public static PermutationBuilder GetCyclicPermsListIDiffs(int t, short bsize, BitmapFromBitStream marks_builder = null, IIEncoder32 encoder = null) { return delegate (IList<int> perm) { var P = new CyclicPerms_MRRR (); var permbuilder = ListIBuilders.GetListIDiffs(bsize, marks_builder, encoder); var backbuilder = ListIBuilders.GetListIFS(); P.Build (perm, t, permbuilder, backbuilder); return P; }; }
public void BuildWebGraph(string filename, SequenceBuilder seqbuilder, BitmapFromBitStream bitmapbuilder = null) { if (bitmapbuilder == null) { bitmapbuilder = BitmapBuilders.GetGGMN_wt (12); } var len_stream = new BitStream32 (); var seq = new List<int> (); int prev_context = -1; using (var Input = File.OpenText (filename)) { string line; int lineno = 0; int counterlineno = 0; while (true) { { if (lineno % 10000 == 0) { if (counterlineno % 10 == 0) { Console.WriteLine (); Console.Write ("Processing lines: "); } ++counterlineno; Console.Write ("{0}, ", lineno); } ++lineno; } line = Input.ReadLine (); if (line == null) { break; } if (line.StartsWith ("#")) { continue; } var link = line.Split ('\t', ' '); var start_node = int.Parse (link [0]); var end_node = int.Parse (link [1]); // on webgraph format, starting nodes are already sorted, just advance and count if (start_node != prev_context) { for (int diffcount = start_node - prev_context; diffcount > 0; --diffcount) { len_stream.Write (true); } prev_context = start_node; } len_stream.Write (false); seq.Add (end_node); } // a simple hack simplifying direct-neighbors's retrieval len_stream.Write (true); } this.SEQ = seqbuilder (seq, prev_context + 1); this.LENS = bitmapbuilder (new FakeBitmap (len_stream)); }
// public static SequenceBuilder GetGolynski (PermutationBuilder perm_builder = null, public static SequenceBuilder GetGolynski(int cyclic_perm_t, BitmapFromBitStream bitmap_builder = null) { // if (perm_builder == null) { // perm_builder = PermutationBuilders.GetCyclicPermsListIFS(16); // } if (bitmap_builder == null) { bitmap_builder = BitmapBuilders.GetGGMN_wt(16); } return delegate (IList<int> seq, int sigma) { var S = new GolynskiMunroRaoSeq (); //S.Build (seq, sigma, perm_builder, bitmap_builder); S.Build (seq, sigma, bitmap_builder, cyclic_perm_t); return S; }; }
public void Build(IList<int> seq, int sigma, short B = 0, ListIBuilder list_builder = null, BitmapFromBitStream bitmap_builder = null) { if (list_builder == null) { list_builder = ListIBuilders.GetListIFS (); } if (bitmap_builder == null) { bitmap_builder = BitmapBuilders.GetGGMN_wt (16); } if (B <= 0) { B = (short)sigma; } this.sigma = sigma; this.B = B; var S = new BitStream32[sigma]; int n = seq.Count; // Console.WriteLine ("===== building"); // bool show_more = false; for (int i = 0; i < n; ++i) { if (i % this.B == 0) { for (int c = 0; c < sigma; ++c) { if (i == 0) { S [c] = new BitStream32 (); } S [c].Write (true); } } var sym = seq [i]; // if (i < 128 && sym == 14) { // Console.WriteLine ("i: {0}, sym: {1}", i, sym); // show_more = true; // } S [sym].Write (false); } var ostream = S [0]; for (int c = 1; c < sigma; ++c) { var istream = S [c]; for (int i = 0; i < istream.CountBits; ++i) { ostream.Write (istream [i]); } } this.X = bitmap_builder (new FakeBitmap (ostream)); // if (show_more) { // Console.WriteLine ("=== STREAM: {0}", S [14]); // Console.WriteLine ("=== BUILD n: {0}, X.Count: {1}, X.Count1: {2}", n, this.X.Count, this.X.Count1); // } this.SEQ = list_builder(seq, sigma); }
public void Build(IList<int> seq, int sigma, PermutationBuilder perm_builder, BitmapFromBitStream bitmap_builder) { // A counting sort construction of the permutation var counters = new int[sigma]; foreach (var s in seq) { if (s + 1 < sigma) { counters [s + 1]++; } } for (int i = 1; i < sigma; i++) { counters [i] += counters [i - 1]; } var n = seq.Count; var P = new int[n]; for (int i = 0; i < n; i++) { var sym = seq [i]; var pos = counters [sym]; P [pos] = i; counters [sym] = pos + 1; } // the bitmap to save the lengths var lens = new BitStream32 (); int prevc = 0; foreach (var c in counters) { var len = c - prevc; prevc = c; lens.Write (true); lens.Write (false, len); } // an additional 1 to the end, to simplify source code lens.Write (true); var bb_lens = new FakeBitmap (lens); this.LENS = bitmap_builder(bb_lens); this.PERM = perm_builder(P); }
public void Build(string out_filename, IList<IList<byte>> data_list, BitmapFromBitStream len_builder = null) { this.Name = out_filename; int counter = 0; var data_stream = new List<byte> (); var lens_stream = new BitStream32 (); foreach (var data in data_list) { ++counter; if (counter % 1000 == 0) { Console.WriteLine ("*** Processing docid {0}/{1} (adv: {2:0.000}%)", counter, data_list.Count, counter*100.0/data_list.Count); } lens_stream.Write (true); lens_stream.Write (false, data.Count-1); // data_stream.Capacity += data.Count; foreach (var b in data) { data_stream.Add (b); } } lens_stream.Write(true); if (len_builder == null) { len_builder = BitmapBuilders.GetGGMN_wt (12); } this.LENS = len_builder (new FakeBitmap (lens_stream)); this.DATA = data_stream; }
/// <summary> /// Read the database from a listing file (one filename per line) /// </summary> public void Build(string filename, BitmapFromBitStream len_builder = null) { Console.WriteLine ("****** Reading database from list of files"); this.Name = filename; var NAMES = File.ReadAllLines (filename); int counter = 0; var data_stream = new List<byte> (); var lens_stream = new BitStream32 (); foreach (var s in NAMES) { ++counter; if (s.Length == 0) { continue; } if (counter % 1000 == 0) { Console.WriteLine ("*** Processing docid {0}/{1} (adv: {2:0.000}%): '{3}'", counter, NAMES.Length, counter*100.0/NAMES.Length, s); } var data = (IList<byte>)this.Parse (s, true); if (data.Count == 0) { throw new ArgumentException(String.Format("AFP files must not be empty: {0}", s)); } lens_stream.Write (true); lens_stream.Write (false, data.Count-1); data_stream.Capacity += data.Count; foreach (var b in data) { data_stream.Add (b); } } lens_stream.Write(true); if (len_builder == null) { len_builder = BitmapBuilders.GetGGMN_wt (12); } this.LENS = len_builder (new FakeBitmap (lens_stream)); this.DATA = data_stream; }
/// <summary> /// Builds the index for the sequence /// </summary> public void Build(IList<int> sequence, int alphabet_size, int t = 16, BitmapFromList rowbuilder = null, BitmapFromBitStream lenbuilder = null) { if (rowbuilder == null) { rowbuilder = BitmapBuilders.GetSArray (); } if (lenbuilder == null) { lenbuilder = BitmapBuilders.GetGGMN_wt (12); } var invindex = new IList<int>[alphabet_size]; for (int i = 0; i < alphabet_size; i++) { invindex [i] = new List<int> (); } int pos = 0; foreach (var c in sequence) { invindex [c].Add (pos); pos++; } pos = 0; this.N = sequence.Count; this.InvIndex = new Bitmap[alphabet_size]; var lens = new BitStream32 (); for (int i = 0; i < alphabet_size; i++) { if (i % 1000 == 0) { if (i % 10000 == 0) { Console.WriteLine (); Console.Write ("*** InvIndexXLBSeq {0}/{1}", i, alphabet_size); } else { Console.Write (", {0}", i); } } this.InvIndex [i] = rowbuilder (invindex [i]); lens.Write (true); lens.Write (false, invindex [i].Count); invindex [i] = null; } lens.Write (true); Console.WriteLine (); Console.WriteLine ("done, now saving permutation and the Len bitmap"); this.Lens = lenbuilder (new FakeBitmap (lens)); var p = new ListGen_MRRR (); p.Build (this.GetNotIdxPERM (), t, null); Console.WriteLine ("done"); this.Perm = p; }
public static SequenceBuilder GetInvIndexXLBSeq(short t = 16, BitmapFromList row_builder = null, BitmapFromBitStream len_builder = null) { if (row_builder == null) { row_builder = BitmapBuilders.GetSArray (); } if (len_builder == null) { len_builder = BitmapBuilders.GetGGMN_wt (12); } return delegate (IList<int> seq, int sigma) { var iis = new InvIndexXLBSeq(); iis.Build (seq, sigma, t, row_builder, len_builder); return iis; }; }
public static BitmapFromBitStream GetSArray_wt(BitmapFromBitStream H_builder = null) { return delegate (FakeBitmap b) { var rs = new SArray (); rs.Build (CreateSortedList (b), b.Count, H_builder); return rs; }; }
public void Build(string filename, SequenceBuilder seqbuilder, BitmapFromBitStream bitmapbuilder = null) { this.BuildWebGraph (filename, seqbuilder, bitmapbuilder); }
public void Build(IList<long> orderedList, long n, byte numLowerBits, BitmapFromBitStream H_builder) { //this.M = orderedList.Count; int M = orderedList.Count; this.N = n; if (M > this.N) { Console.WriteLine ("XXXXX LastItem: {0}", orderedList [orderedList.Count - 1]); throw new ArgumentOutOfRangeException (String.Format ("SArray N < M, N: {0}, M: {1}", this.N, M)); } if (numLowerBits < 1) { numLowerBits = 1; } // this.NumLowerBits = numLowerBits; this.L = new ListIFS (numLowerBits, new BitStream32 ((numLowerBits / 32) * M)); // Creating bitmaps // 2^ (log N - log N / M) = 2^ \log N M / N = M. // 2^ (log N - log N / M) = 2^ \log N M / N = M. int numpart = (int)Math.Ceiling (Math.Pow (2, (Math.Ceiling (Math.Log (this.N)) - this.GetNumLowerBits ()))); var H_stream = new BitStream32 (M + (numpart / 32 + 1)); long mask = this.get_mask (); int prevblock = -1; for (int i = 0; i < M; i++) { this.L.Add ((int)(orderedList [i] & mask)); int currentblock = (int)(orderedList [i] >> this.GetNumLowerBits ()); if (prevblock != currentblock) { while (prevblock < currentblock) { H_stream.Write (false); prevblock++; } } H_stream.Write (true); } //an additional technical zero H_stream.Write (false, M - prevblock); H_stream.Write (false); if (H_builder == null) { H_builder = BitmapBuilders.GetDArray_wt(16,32); } var fb = new FakeBitmap(H_stream); this.H = H_builder(fb); }
public static SequenceBuilder GetSeqSinglePermListIDiffs(short t, short bsize = 16, BitmapFromBitStream bitmap_builder = null, IIEncoder32 encoder = null) { var pbuilder = PermutationBuilders.GetCyclicPermsListIDiffs(t, bsize, bitmap_builder, encoder); return GetSeqSinglePerm(pbuilder, null); }
public static SequenceBuilder GetSeqSinglePerm(PermutationBuilder perm_builder = null, BitmapFromBitStream bitmap_builder = null) { if (perm_builder == null) { perm_builder = PermutationBuilders.GetCyclicPermsListIDiffs (16, 63); } if (bitmap_builder == null) { bitmap_builder = BitmapBuilders.GetGGMN_wt(8); } return delegate (IList<int> seq, int sigma) { var S = new SeqSinglePerm (); S.Build (seq, sigma,perm_builder, bitmap_builder); return S; }; }
public static SequenceBuilder GetSeqSinglePermIFS(short t, BitmapFromBitStream bitmap_builder = null) { var pbuilder = PermutationBuilders.GetCyclicPermsListIFS(t); return GetSeqSinglePerm(pbuilder, bitmap_builder); }
public static SequenceBuilder GetSeqPlainRL(short B = 0, BitmapFromBitStream bitmap_builder = null) { return GetSeqPlain(B, ListIBuilders.GetListEqRL(), bitmap_builder); }
public static SequenceBuilder GetSeqPlain(short B = 0, ListIBuilder list_builder = null, BitmapFromBitStream bitmap_builder = null, bool CopyOnUnravel = false) { return delegate (IList<int> seq, int sigma) { if (CopyOnUnravel) { var s = new SeqPlainCopyOnUnravel(); s.Build(seq, sigma, B, list_builder, bitmap_builder); return s; } else { var s = new SeqPlain(); s.Build(seq, sigma, B, list_builder, bitmap_builder); return s; } }; }
public static BitmapFromList GetSArray(BitmapFromBitStream H_builder = null) { return delegate (IList<int> L) { var rs = new SArray (); rs.Build (L, 0, H_builder); return rs; }; }
public void Build(IList<long> orderedList, long n = 0, BitmapFromBitStream H_builder = null) { if (n == 0 && orderedList.Count > 0) { n = orderedList[orderedList.Count - 1] + 1; } byte z = Log_N_over_M(n, orderedList.Count); if (z == 0) { z++; } // Console.WriteLine("n: {0}, m: {1}, z: {2}", n, orderedList.Count, z); this.Build( orderedList, n, z, H_builder); }
public static SequenceBuilder GetWT( BitmapFromBitStream bitmap_builder = null, Func<int, IIEncoder32> get_coder = null ) { if (bitmap_builder == null) { bitmap_builder = BitmapBuilders.GetGGMN_wt(16); } return delegate (IList<int> seq, int sigma) { var wt = new WaveletTree (); wt.BitmapBuilder = bitmap_builder; // var enc = new BinaryCoding (numbits); IIEncoder32 enc; if (get_coder == null) { int numbits = (int)Math.Ceiling (Math.Log (sigma, 2)); enc = new BinaryCoding (numbits); } else { enc = get_coder(sigma); } wt.Build (seq, sigma, enc); return wt; }; }
public void Build(BitStream32 bitmap, BitmapFromBitStream H_builder) { IList<long> L = new List<long> (); for (int i = 0; i < bitmap.CountBits; i++) { if (bitmap[i]) { L.Add (i); } } this.Build (L, bitmap.CountBits, H_builder); }
public static SequenceBuilder GetWT_BinaryCoding(BitmapFromBitStream bitmap_builder = null) { return GetWT(bitmap_builder); }
public static SequenceBuilder GetWT( BitmapFromBitStream bitmap_builder, Func<int, IIEncoder32> get_coder ) { return delegate (IList<int> seq, int sigma) { var wt = new WaveletTree (); wt.BitmapBuilder = bitmap_builder; var enc = get_coder (sigma); // var enc = new BinaryCoding (numbits); wt.Build (enc, sigma, seq); return wt; }; }
public static SequenceBuilder GetWT_BinaryCoding(BitmapFromBitStream bitmap_builder) { return delegate (IList<int> seq, int sigma) { var wt = new WaveletTree (); wt.BitmapBuilder = bitmap_builder; int numbits = (int)Math.Ceiling (Math.Log (sigma, 2)); var enc = new BinaryCoding (numbits); wt.Build (enc, sigma, seq); return wt; }; }
public static SequenceBuilder GetSeqXLB_SArray64(short t = 16, BitmapFromBitStream H_builder = null) { return GetSeqXLB (t, BitmapBuilders.GetSArray64 (H_builder)); }
// 64 bit bitmaps public static BitmapFromList64 GetSArray64(BitmapFromBitStream H_builder = null) { return delegate (IList<long> L, long n) { var rs = new SArray64 (); rs.Build (L, n, H_builder); return rs; }; }