public void Build(string sa_name, SequenceBuilder seq_builder = null, BitmapFromBitStream bitmap_builder = null) { if (seq_builder == null) { seq_builder = SequenceBuilders.GetSeqXLB_DiffSetRL2_64(16, 63); } using (var Input = new BinaryReader (File.OpenRead (sa_name + ".structs"))) { this.newF = GenericIO<Bitmap>.Load (Input); if (bitmap_builder != null) { var newF_stream = new BitStream32(); for (int i = 0; i < this.newF.Count; ++i) { newF_stream.Write (this.newF.Access(i)); } this.newF = bitmap_builder(new FakeBitmap(newF_stream)); } int len = this.newF.Count1; this.charT = new int[len]; // Console.WriteLine ("*****>> charT => {0} bytes", this.charT.Length * 4); PrimitiveIO<int>.LoadVector (Input, len, this.charT); } using (var Input = new BinaryReader (File.OpenRead (sa_name + ".psi"))) { int seqlen = this.newF.Count; var seq = new int[seqlen]; var L = new List<int>(this.N/this.Sigma + 1); int curr = 0; for (int i = 1; i <= this.AlphabetSize; i++) { int next; if (i == this.AlphabetSize) { next = this.newF.Count; } else { next = this.newF.Select1 (i + 1); } int len = next - curr; L.Clear(); PrimitiveIO<int>.LoadVector (Input, len, L); for (int j = 0; j < len; ++j) { var x = L[j]; try { seq[ x ] = i - 1; } catch (Exception e) { Console.WriteLine ("== i: {0}, j: {1}, x: {2}, seq-count: {3}, len: {4}", i, j, x, seq.Length, len); throw e; } } curr = next; } this.SeqPsi = seq_builder(seq, this.AlphabetSize); } using (var Input = new BinaryReader (File.OpenRead (sa_name + ".samples"))) { this.SA_sample_step = Input.ReadInt16 (); this.SA_marked = GenericIO<Bitmap>.Load (Input); var _samples = new ListIFS (); _samples.Load (Input); var _invsamples = new ListIFS (); _invsamples.Load (Input); this.SA_samples = _samples; this.SA_invsamples = _invsamples; } }
public void Build(string sa_name, ListIBuilder list_builder = null) { using (var Input = new BinaryReader (File.OpenRead (sa_name + ".structs"))) { this.newF = GenericIO<Bitmap>.Load (Input); int len = this.newF.Count1; this.charT = new int[len]; // Console.WriteLine ("*****>> charT => {0} bytes", this.charT.Length * 4); PrimitiveIO<int>.LoadVector (Input, len, this.charT); } using (var Input = new BinaryReader (File.OpenRead (sa_name + ".psi"))) { var seq = PrimitiveIO<int>.LoadVector(Input, this.N+1, null); if (list_builder == null) { list_builder = ListIBuilders.GetListIDiffs(63); } this.Psi = list_builder(seq, seq.Count-1); } using (var Input = new BinaryReader (File.OpenRead (sa_name + ".samples"))) { this.SA_sample_step = Input.ReadInt16 (); this.SA_marked = GenericIO<Bitmap>.Load (Input); var _samples = new ListIFS (); _samples.Load (Input); var _invsamples = new ListIFS (); _invsamples.Load (Input); this.SA_samples = _samples; this.SA_invsamples = _invsamples; } }
/// <summary> /// Suffix array built in /// </summary> public void Build(IList<int> text, int alphabet_size) { this.Text = text; var SS = new SA_fss (text, alphabet_size); this.A = SS.A; this.charT = SS.charT; this.newF = SS.newF; }
/// <summary> /// Suffix array built in /// </summary> public void Build(IList<int> text, int alphabet_size) { this.Text = text; var SS = new SuffixSorter (text, alphabet_size); SS.Sort (); this.SA = SS.SA; this.charT = SS.charT; this.newF = SS.newF; }
public override void AssertEquality(Bitmap obj) { var other = obj as DArray; this.BaseIndex.AssertEquality (other.BaseIndex); this.IsLargeBlock.AssertEquality (other.IsLargeBlock); Assertions.AssertIList<int> (this.PosAbs, other.PosAbs, "DArray.PosAbs"); Assertions.AssertIList<int> (this.SavedPos, other.SavedPos, "DArray.SavedPos"); if (this.B != other.B) { throw new ArgumentException ("DArray inequality on B"); } }
public sort_pair(Bitmap invlist, string word) { this.invlist = invlist; this.word = word; }
public void Load(string basename) { using (var Input = new BinaryReader (File.OpenRead (basename + ".idx"))) { this.newF = GenericIO<Bitmap>.Load (Input); this.charT = new int[this.newF.Count1]; PrimitiveIO<int>.LoadVector (Input, this.charT.Length, this.charT); } using (var Input = new BinaryReader (File.OpenRead (basename + ".psi"))) { this.Psi = ListIGenericIO.Load(Input); } using (var Input = new BinaryReader (File.OpenRead (basename + ".samples"))) { this.SA_sample_step = Input.ReadInt16 (); this.SA_marked = GenericIO<Bitmap>.Load (Input); var _samples = new ListIFS (); _samples.Load (Input); var _invsamples = new ListIFS (); _invsamples.Load (Input); this.SA_samples = _samples; this.SA_invsamples = _invsamples; } }
public void Build(IList<long> orderedList, long n, byte numLowerBits, BitmapFromBitStream H_builder) { //this.M = orderedList.Count; int M = orderedList.Count; this.N = n; if (M > this.N) { Console.WriteLine ("XXXXX LastItem: {0}", orderedList [orderedList.Count - 1]); throw new ArgumentOutOfRangeException (String.Format ("SArray N < M, N: {0}, M: {1}", this.N, M)); } if (numLowerBits < 1) { numLowerBits = 1; } // this.NumLowerBits = numLowerBits; this.L = new ListIFS (numLowerBits, new BitStream32 ((numLowerBits / 32) * M)); // Creating bitmaps // 2^ (log N - log N / M) = 2^ \log N M / N = M. // 2^ (log N - log N / M) = 2^ \log N M / N = M. int numpart = (int)Math.Ceiling (Math.Pow (2, (Math.Ceiling (Math.Log (this.N)) - this.GetNumLowerBits ()))); var H_stream = new BitStream32 (M + (numpart / 32 + 1)); long mask = this.get_mask (); int prevblock = -1; for (int i = 0; i < M; i++) { this.L.Add ((int)(orderedList [i] & mask)); int currentblock = (int)(orderedList [i] >> this.GetNumLowerBits ()); if (prevblock != currentblock) { while (prevblock < currentblock) { H_stream.Write (false); prevblock++; } } H_stream.Write (true); } //an additional technical zero H_stream.Write (false, M - prevblock); H_stream.Write (false); if (H_builder == null) { H_builder = BitmapBuilders.GetDArray_wt(16,32); } var fb = new FakeBitmap(H_stream); this.H = H_builder(fb); }
public override void Load(BinaryReader input) { this.N = input.ReadInt64 (); this.H = GenericIO<Bitmap>.Load (input); var list = new ListIFS (); list.Load (input); this.L = list; }
public override void AssertEquality(Bitmap obj) { var other = obj as SArray; if (this.N != other.N) { throw new ArgumentException (String.Format ("SArray.N inequality. this.N {0}, other.N: {1}", this.N, other.N)); } this.H.AssertEquality (other.H); Assertions.AssertIList<int> (this.L, other.L, "SArray.L"); }
public void Load(BinaryReader Input) { this.LENS = GenericIO<Bitmap>.Load(Input); this.SymbolSize = Input.ReadInt32(); this.Q = Input.ReadInt32(); var len = Input.ReadInt32 (); this.Data = new byte[len]; PrimitiveIO<byte>.LoadVector(Input, len, this.Data); this.Name = Input.ReadString(); }
// int alphabet_numbits; public SA_fss(IList<int> text, int alphabet_size) { this.TXT = text; var n = text.Count; // this.alphabet_numbits = ListIFS.GetNumBits(alphabet_size); // this.SA = new int[n]; //this.Char_Offsets = new int[alphabet_size]; this.Char_SA = new SkipListRank<int>[alphabet_size]; var cmp_fun = new Comparison<int> (this.compare_suffixes); for (int i = 0; i < alphabet_size; ++i) { this.Char_SA [i] = new SkipListRank<int> (cmp_fun); } this.SA_pointers = new SkipList2<SkipListRank<int>.DataRank>.Node[n]; for (int suffixID = this.TXT.Count-1; suffixID >= 0; --suffixID) { var c = this.TXT [suffixID]; var list = this.Char_SA [c]; //Console.WriteLine ("=== adding: {0} ({1})", c, Convert.ToChar(c)); var p = list.Add (suffixID); this.SA_pointers [suffixID] = p; } this.A = new int[n+1]; this.A[0] = n; int I = 1; foreach (var SLR in this.Char_SA) { foreach (var data in SLR.SKIPLIST.Traverse()) { this.A[I] = data.Data; ++I; } } this.SA_pointers = null; var stream = new BitStream32(); this.charT = new List<int>(); stream.Write(true); // $ symbol this.charT.Add(0); for (int i = 0; i < alphabet_size; ++i) { var count = this.Char_SA[i].Count; if (count > 0) { stream.Write(true); stream.Write(false, count-1); this.charT.Add(i+1); } this.Char_SA[i] = null; } this.Char_SA = null; this.newF = BitmapBuilders.GetGGMN_wt(12).Invoke(new FakeBitmap(stream)); }
public void Build(IList<int> seq, int sigma, BitmapFromBitStream bitmap_builder, int cyclic_perm_t) { // NOTE: Please check sigma <=> BlockSize in this method this.sigma = sigma; this.n = seq.Count; var B_stream = new BitStream32 (); var X_stream = new BitStream32[ sigma ]; for (int i = 0; i < sigma; i++) { X_stream [i] = new BitStream32 (); } var lists = new List<int>[sigma]; for (int i = 0; i < sigma; i++) { lists [i] = new List<int> (); } int num_blocks = (int)Math.Ceiling (this.n * 1.0 / this.sigma); //this.perms = new IPermutation[num_blocks]; this.perms = new CyclicPerms_MRRR[num_blocks]; for (int i = 0, I = 0; i < this.n; i+= this.sigma, ++I) { // writing block separators foreach (var b in X_stream) { b.Write (true); } // clearing perm B // selecting block size int s = Math.Min (this.n - i, this.sigma); this.BuildPermInvIndex (seq, i, s, lists); var P = new List<int> (s); for (int j = 0; j < this.sigma; j++) { var c = lists [j].Count; B_stream.Write (false); if (c > 0) { X_stream [j].Write (false, c); B_stream.Write (true, c); foreach (var u in lists[j]) { P.Add (u); } } } //var _perm = perm_builder(P); //this.perms[I] = _perm; this.perms [I] = (CyclicPerms_MRRR)PermutationBuilders.GetCyclicPermsListIFS(cyclic_perm_t).Invoke (P); } var _X_stream = X_stream [0]; for (int i = 1; i < X_stream.Length; i++) { var _X_curr = X_stream [i]; for (int j = 0; j < _X_curr.CountBits; j++) { // esto se podria hace por entero en lugar de bit _X_stream.Write (_X_curr [j]); } } // If we write a zero at the end of the streams the code is simplified _X_stream.Write (true); B_stream.Write (false); this.B = bitmap_builder (new FakeBitmap (B_stream)); this.X = bitmap_builder (new FakeBitmap (_X_stream)); this.compute_num_blocks (); }
public void Load(BinaryReader Input) { this.n = Input.ReadInt32 (); this.sigma = Input.ReadInt32 (); var c = Input.ReadInt32 (); this.perms = new CyclicPerms_MRRR[c]; for (int i = 0; i < c; i++) { this.perms [i] = GenericIO<CyclicPerms_MRRR>.Load (Input); } this.B = GenericIO<Bitmap>.Load (Input); this.X = GenericIO<Bitmap>.Load (Input); /*var len = Input.ReadInt32 (); this.Xacc = new int[len]; PrimitiveIO<int>.ReadFromFile (Input, len, this.Xacc);*/ this.compute_num_blocks (); }
public void Load(BinaryReader Input) { this.Name = Input.ReadString(); var len = Input.ReadInt32 (); this.DATA = new List<byte> (len); PrimitiveIO<byte>.LoadVector(Input, len, this.DATA); this.LENS = GenericIO<Bitmap>.Load(Input); }
public void Build(string out_filename, IList<IList<byte>> data_list, BitmapFromBitStream len_builder = null) { this.Name = out_filename; int counter = 0; var data_stream = new List<byte> (); var lens_stream = new BitStream32 (); foreach (var data in data_list) { ++counter; if (counter % 1000 == 0) { Console.WriteLine ("*** Processing docid {0}/{1} (adv: {2:0.000}%)", counter, data_list.Count, counter*100.0/data_list.Count); } lens_stream.Write (true); lens_stream.Write (false, data.Count-1); // data_stream.Capacity += data.Count; foreach (var b in data) { data_stream.Add (b); } } lens_stream.Write(true); if (len_builder == null) { len_builder = BitmapBuilders.GetGGMN_wt (12); } this.LENS = len_builder (new FakeBitmap (lens_stream)); this.DATA = data_stream; }
/// <summary> /// Read the database from a listing file (one filename per line) /// </summary> public void Build(string filename, BitmapFromBitStream len_builder = null) { Console.WriteLine ("****** Reading database from list of files"); this.Name = filename; var NAMES = File.ReadAllLines (filename); int counter = 0; var data_stream = new List<byte> (); var lens_stream = new BitStream32 (); foreach (var s in NAMES) { ++counter; if (s.Length == 0) { continue; } if (counter % 1000 == 0) { Console.WriteLine ("*** Processing docid {0}/{1} (adv: {2:0.000}%): '{3}'", counter, NAMES.Length, counter*100.0/NAMES.Length, s); } var data = (IList<byte>)this.Parse (s); if (data.Count == 0) { throw new ArgumentException(String.Format("AFP files must not be empty: {0}", s)); } lens_stream.Write (true); lens_stream.Write (false, data.Count-1); data_stream.Capacity += data.Count; foreach (var b in data) { data_stream.Add (b); } } lens_stream.Write(true); if (len_builder == null) { len_builder = BitmapBuilders.GetGGMN_wt (12); } this.LENS = len_builder (new FakeBitmap (lens_stream)); this.DATA = data_stream; }
public override void AssertEquality(Bitmap _other) { DiffSetRL2 other = _other as DiffSetRL2; if (this.N != other.N) { throw new ArgumentException ("DiffSet N difference"); } if (this.M != other.M) { throw new ArgumentException ("DiffSet M difference"); } if (this.B != other.B) { throw new ArgumentException ("DiffSet B difference"); } Assertions.AssertIList<int> (this.Samples, other.Samples, "DiffSet Samples difference"); Assertions.AssertIList<long> (this.Offsets, other.Offsets, "DiffSet Offsets difference"); this.Stream.AssertEquality (other.Stream); }
/// <summary> /// Asserts the equality. /// </summary> public override void AssertEquality(Bitmap other) { throw new NotSupportedException (); }
public void Load(string basename) { using (var Input = new BinaryReader (File.OpenRead (basename + ".structs"))) { this.newF = GenericIO<Bitmap>.Load (Input); this.charT = new int[this.newF.Count1]; PrimitiveIO<int>.LoadVector (Input, this.charT.Length, this.charT); } // this.seqIndex = new WaveletTree (); // this.seqIndex.Load (Input); using (var Input = new BinaryReader (File.OpenRead (basename + ".bwt-index"))) { this.seqIndex = GenericIO<Sequence>.Load (Input); } using (var Input = new BinaryReader (File.OpenRead (basename + ".structs-samples"))) { this.SA_sample_step = Input.ReadInt16 (); this.SA_marked = GenericIO<Bitmap>.Load (Input); } using (var Input = new BinaryReader (File.OpenRead (basename + ".samples"))) { var _samples = new ListIFS (); _samples.Load (Input); var _invsamples = new ListIFS (); _invsamples.Load (Input); this.SA_samples = _samples; this.SA_invsamples = _invsamples; } }
public override void AssertEquality(Bitmap _other) { RRR other = _other as RRR; if (other == null) { throw new ArgumentNullException ("RRR Other should be a RRR object too"); } if (this.N != other.N) { throw new ArgumentNullException ("RRR Inequality on N"); } if (this.BlockSize != other.BlockSize) { throw new ArgumentException ("RRR Inequality on BlockSize"); } Assertions.AssertIList<int> (this.Klasses, other.Klasses, "RRR Classes"); Assertions.AssertIList<int> (this.AbsRank, other.AbsRank, "RRR AbsRank"); Assertions.AssertIList<int> (this.AbsOffset, other.AbsOffset, "RRR AbsOffset"); }
public void Load(BinaryReader Input) { this.LENS = GenericIO<Bitmap>.Load (Input); this.SEQ = GenericIO<Sequence>.Load (Input); }
public void Build(string listname, int qsize, int symsize) { this.Q = qsize; this.SymbolSize = symsize; this.Name = listname; int linenum = 0; var lens = new List<int>(); var D = new List<byte>(); lens.Add(0); foreach (var filename in File.ReadAllLines (listname)) { linenum++; Console.WriteLine ("**** Loading line-number: {0}, file: {1}", linenum, filename); var data = BinQ8HammingSpace.LoadObjectFromFile (filename, false); //D.Capacity += data.Count; foreach (var b in data) { D.Add(b); } lens.Add(lens[lens.Count-1]+data.Length); } this.LENS = BitmapBuilders.GetSArray().Invoke(lens); this.Data = D.ToArray(); }
public void Load(BinaryReader Input) { this.PERM = GenericIO<IPermutation>.Load (Input); this.LENS = GenericIO<Bitmap>.Load (Input); }
public override void AssertEquality(Bitmap obj) { var other = obj as GGMN; if (this.N != other.N) { throw new ArgumentException (String.Format ("GNBitmap.N inequality")); } if (this.B != other.B) { throw new ArgumentException (String.Format ("GNBitmap.B inequality")); } Assertions.AssertIList<uint> (this.BitBlocks, other.BitBlocks, "GNBitmap.Bitmap"); Assertions.AssertIList<uint> (this.Abs, other.Abs, "GNBitmap.Abs"); }
public void Build(IList<int> seq, int sigma, PermutationBuilder perm_builder, BitmapFromBitStream bitmap_builder) { // A counting sort construction of the permutation var counters = new int[sigma]; foreach (var s in seq) { if (s + 1 < sigma) { counters [s + 1]++; } } for (int i = 1; i < sigma; i++) { counters [i] += counters [i - 1]; } var n = seq.Count; var P = new int[n]; for (int i = 0; i < n; i++) { var sym = seq [i]; var pos = counters [sym]; P [pos] = i; counters [sym] = pos + 1; } // the bitmap to save the lengths var lens = new BitStream32 (); int prevc = 0; foreach (var c in counters) { var len = c - prevc; prevc = c; lens.Write (true); lens.Write (false, len); } // an additional 1 to the end, to simplify source code lens.Write (true); var bb_lens = new FakeBitmap (lens); this.LENS = bitmap_builder(bb_lens); this.PERM = perm_builder(P); }
public void Build(string sa_name, SequenceBuilder seq_builder = null) { using (var Input = new BinaryReader (File.OpenRead (sa_name + ".structs"))) { this.newF = GenericIO<Bitmap>.Load (Input); int len = (int)this.newF.Count1; this.charT = new int[len]; PrimitiveIO<int>.LoadVector (Input, len, this.charT); } if (seq_builder == null) { // seq_builder = SequenceBuilders.GetWT_BinaryCoding(BitmapBuilders.GetRRR_wt(16)); seq_builder = SequenceBuilders.GetSeqXLB_DiffSet64(); } using (var Input = new BinaryReader (File.OpenRead (sa_name + ".bwt"))) { var L = new ListIFS (); L.Load (Input); this.seqIndex = seq_builder (L, this.charT.Length); } using (var Input = new BinaryReader (File.OpenRead (sa_name + ".samples"))) { this.SA_sample_step = Input.ReadInt16 (); this.SA_marked = GenericIO<Bitmap>.Load (Input); var _samples = new ListIFS (); _samples.Load (Input); var _invsamples = new ListIFS (); _invsamples.Load (Input); this.SA_samples = _samples; this.SA_invsamples = _invsamples; } }
public void Load(BinaryReader Input) { this.N = Input.ReadInt32 (); int vocsize = Input.ReadInt32 (); this.InvIndex = new Bitmap[vocsize]; for (int i = 0; i < vocsize; i++) { this.InvIndex [i] = GenericIO<Bitmap>.Load (Input); } this.Lens = GenericIO<Bitmap>.Load (Input); var p = new ListGen_MRRR (); p.Load (Input); p.SetPERM (this.GetNotIdxPERM ()); this.Perm = p; }
public void BuildWebGraph(string filename, SequenceBuilder seqbuilder, BitmapFromBitStream bitmapbuilder = null) { if (bitmapbuilder == null) { bitmapbuilder = BitmapBuilders.GetGGMN_wt (12); } var len_stream = new BitStream32 (); var seq = new List<int> (); int prev_context = -1; using (var Input = File.OpenText (filename)) { string line; int lineno = 0; int counterlineno = 0; while (true) { { if (lineno % 10000 == 0) { if (counterlineno % 10 == 0) { Console.WriteLine (); Console.Write ("Processing lines: "); } ++counterlineno; Console.Write ("{0}, ", lineno); } ++lineno; } line = Input.ReadLine (); if (line == null) { break; } if (line.StartsWith ("#")) { continue; } var link = line.Split ('\t', ' '); var start_node = int.Parse (link [0]); var end_node = int.Parse (link [1]); // on webgraph format, starting nodes are already sorted, just advance and count if (start_node != prev_context) { for (int diffcount = start_node - prev_context; diffcount > 0; --diffcount) { len_stream.Write (true); } prev_context = start_node; } len_stream.Write (false); seq.Add (end_node); } // a simple hack simplifying direct-neighbors's retrieval len_stream.Write (true); } this.SEQ = seqbuilder (seq, prev_context + 1); this.LENS = bitmapbuilder (new FakeBitmap (len_stream)); }
/// <summary> /// Builds the index for the sequence /// </summary> public void Build(IList<int> sequence, int alphabet_size, int t = 16, BitmapFromList rowbuilder = null, BitmapFromBitStream lenbuilder = null) { if (rowbuilder == null) { rowbuilder = BitmapBuilders.GetSArray (); } if (lenbuilder == null) { lenbuilder = BitmapBuilders.GetGGMN_wt (12); } var invindex = new IList<int>[alphabet_size]; for (int i = 0; i < alphabet_size; i++) { invindex [i] = new List<int> (); } int pos = 0; foreach (var c in sequence) { invindex [c].Add (pos); pos++; } pos = 0; this.N = sequence.Count; this.InvIndex = new Bitmap[alphabet_size]; var lens = new BitStream32 (); for (int i = 0; i < alphabet_size; i++) { if (i % 1000 == 0) { if (i % 10000 == 0) { Console.WriteLine (); Console.Write ("*** InvIndexXLBSeq {0}/{1}", i, alphabet_size); } else { Console.Write (", {0}", i); } } this.InvIndex [i] = rowbuilder (invindex [i]); lens.Write (true); lens.Write (false, invindex [i].Count); invindex [i] = null; } lens.Write (true); Console.WriteLine (); Console.WriteLine ("done, now saving permutation and the Len bitmap"); this.Lens = lenbuilder (new FakeBitmap (lens)); var p = new ListGen_MRRR (); p.Build (this.GetNotIdxPERM (), t, null); Console.WriteLine ("done"); this.Perm = p; }