Beispiel #1
0
        public void Build(string sa_name, SequenceBuilder seq_builder = null, BitmapFromBitStream bitmap_builder = null)
        {
            if (seq_builder == null) {
                seq_builder = SequenceBuilders.GetSeqXLB_DiffSetRL2_64(16, 63);
            }
            using (var Input = new BinaryReader (File.OpenRead (sa_name + ".structs"))) {
                this.newF = RankSelectGenericIO.Load (Input);
                if (bitmap_builder != null) {
                    var newF_stream = new BitStream32();
                    for (int i = 0; i < this.newF.Count; ++i) {
                        newF_stream.Write (this.newF.Access(i));
                    }
                    this.newF = bitmap_builder(new FakeBitmap(newF_stream));
                }
                int len = this.newF.Count1;
                this.charT = new int[len];
                // Console.WriteLine ("*****>> charT => {0} bytes", this.charT.Length * 4);
                PrimitiveIO<int>.ReadFromFile (Input, len, this.charT);
            }
            using (var Input = new BinaryReader (File.OpenRead (sa_name + ".psi"))) {
                int seqlen = this.newF.Count;
                var seq = new int[seqlen];
                var L = new List<int>(this.N/this.Sigma + 1);
                int curr = 0;
                for (int i = 1; i <= this.AlphabetSize; i++) {
                    int next;
                    if (i == this.AlphabetSize) {
                        next = this.newF.Count;
                    } else {
                        next = this.newF.Select1 (i + 1);
                    }
                    int len = next - curr;
                    L.Clear();
                    PrimitiveIO<int>.ReadFromFile (Input, len, L);
                    for (int j = 0; j < len; ++j) {
                        var x = L[j];
                        try {
                            seq[ x ] = i - 1;
                        } catch (Exception e) {
                            Console.WriteLine ("== i: {0}, j: {1}, x: {2}, seq-count: {3}, len: {4}",
                                               i, j, x, seq.Length, len);
                            throw e;
                        }
                    }
                    curr = next;
                }
                this.SeqPsi = seq_builder(seq, this.AlphabetSize);
            }

            using (var Input = new BinaryReader (File.OpenRead (sa_name + ".samples"))) {
                this.SA_sample_step = Input.ReadInt16 ();
                this.SA_marked = RankSelectGenericIO.Load (Input);
                var _samples = new ListIFS ();
                _samples.Load (Input);
                var _invsamples = new ListIFS ();
                _invsamples.Load (Input);
                this.SA_samples = _samples;
                this.SA_invsamples = _invsamples;
            }
        }
Beispiel #2
0
        public void Build(IList<int> seq, int sigma, PermutationBuilder perm_builder, BitmapFromBitStream bitmap_builder)
        {
            // NOTE: Please check sigma <=> BlockSize in this method
            this.sigma = sigma;
            this.n = seq.Count;
            var B_stream = new BitStream32 ();
            var X_stream = new BitStream32[ sigma ];
            for (int i = 0; i < sigma; i++) {
                X_stream [i] = new BitStream32 ();
            }
            var lists = new List<int>[sigma];
            for (int i = 0; i < sigma; i++) {
                lists [i] = new List<int> ();
            }
            int num_blocks = (int)Math.Ceiling (this.n * 1.0 / this.sigma);
            this.perms = new List<IPermutation> (num_blocks);
            for (int i = 0; i < this.n; i+= this.sigma) {
                // writing block separators
                foreach (var b in X_stream) {
                    b.Write (true);
                }
                // clearing perm B
                // selecting block size
                int s = Math.Min (this.n - i, this.sigma);
                this.BuildPermInvIndex (seq, i, s, lists);
                var P = new List<int> (s);
                for (int j = 0; j < this.sigma; j++) {
                    var c = lists [j].Count;
                    B_stream.Write (false);
                    if (c > 0) {
                        X_stream [j].Write (false, c);
                        B_stream.Write (true, c);
                        foreach (var u in lists[j]) {
                            P.Add (u);
                        }
                    }
                }
                var _perm = perm_builder(P);
                this.perms.Add (_perm);
            }
            var _X_stream = X_stream [0];

            for (int i = 1; i < X_stream.Length; i++) {
                var _X_curr = X_stream [i];
                for (int j = 0; j < _X_curr.CountBits; j++) {
                    // esto se podria hace por entero en lugar de bit
                    _X_stream.Write (_X_curr [j]);
                }
            }
            // If we write a zero at the end of the streams the code is simplified
            _X_stream.Write (true);
            B_stream.Write (false);
            this.B = bitmap_builder (new FakeBitmap (B_stream));
            this.X = bitmap_builder (new FakeBitmap (_X_stream));
            this.compute_num_blocks ();
        }
Beispiel #3
0
        public static PermutationBuilder GetCyclicPermsListIDiffs(int t, short bsize,
		                                                           BitmapFromBitStream marks_builder = null,
		                                                           IIEncoder32 encoder = null)
        {
            return delegate (IList<int> perm) {
                var P = new CyclicPerms_MRRR ();
                var permbuilder = ListIBuilders.GetListIDiffs(bsize, marks_builder, encoder);
                var backbuilder = ListIBuilders.GetListIFS();
                P.Build (perm, t, permbuilder, backbuilder);
                return P;
            };
        }
Beispiel #4
0
 public void BuildWebGraph(string filename, SequenceBuilder seqbuilder, BitmapFromBitStream bitmapbuilder = null)
 {
     if (bitmapbuilder == null) {
         bitmapbuilder = BitmapBuilders.GetGGMN_wt (12);
     }
     var len_stream = new BitStream32 ();
     var seq = new List<int> ();
     int prev_context = -1;
     using (var Input = File.OpenText (filename)) {
         string line;
         int lineno = 0;
         int counterlineno = 0;
         while (true) {
             {
                 if (lineno % 10000 == 0) {
                     if (counterlineno % 10 == 0) {
                         Console.WriteLine ();
                         Console.Write ("Processing lines: ");
                     }
                     ++counterlineno;
                     Console.Write ("{0}, ", lineno);
                 }
                 ++lineno;
             }
             line = Input.ReadLine ();
             if (line == null) {
                 break;
             }
             if (line.StartsWith ("#")) {
                 continue;
             }
             var link = line.Split ('\t', ' ');
             var start_node = int.Parse (link [0]);
             var end_node = int.Parse (link [1]);
             // on webgraph format, starting nodes are already sorted, just advance and count
             if (start_node != prev_context) {
                 for (int diffcount = start_node - prev_context; diffcount > 0; --diffcount) {
                     len_stream.Write (true);
                 }
                 prev_context = start_node;
             }
             len_stream.Write (false);
             seq.Add (end_node);
         }
         // a simple hack simplifying  direct-neighbors's retrieval
         len_stream.Write (true);
     }
     this.SEQ = seqbuilder (seq, prev_context + 1);
     this.LENS = bitmapbuilder (new FakeBitmap (len_stream));
 }
Beispiel #5
0
 //        public static SequenceBuilder GetGolynski (PermutationBuilder perm_builder = null,
 public static SequenceBuilder GetGolynski(int cyclic_perm_t, BitmapFromBitStream bitmap_builder = null)
 {
     //			if (perm_builder == null) {
     //				perm_builder = PermutationBuilders.GetCyclicPermsListIFS(16);
     //			}
     if (bitmap_builder == null) {
         bitmap_builder = BitmapBuilders.GetGGMN_wt(16);
     }
     return delegate (IList<int> seq, int sigma) {
         var S = new GolynskiMunroRaoSeq ();
         //S.Build (seq, sigma, perm_builder, bitmap_builder);
         S.Build (seq, sigma, bitmap_builder, cyclic_perm_t);
         return S;
     };
 }
Beispiel #6
0
 public void Build(IList<int> seq, int sigma, short B = 0, ListIBuilder list_builder = null, BitmapFromBitStream bitmap_builder = null)
 {
     if (list_builder == null) {
         list_builder = ListIBuilders.GetListIFS ();
     }
     if (bitmap_builder == null) {
         bitmap_builder = BitmapBuilders.GetGGMN_wt (16);
     }
     if (B <= 0) {
         B = (short)sigma;
     }
     this.sigma = sigma;
     this.B = B;
     var S = new BitStream32[sigma];
     int n = seq.Count;
     //			Console.WriteLine ("===== building");
     //			bool show_more = false;
     for (int i = 0; i < n; ++i) {
         if (i % this.B == 0) {
             for (int c = 0; c < sigma; ++c) {
                 if (i == 0) {
                     S [c] = new BitStream32 ();
                 }
                 S [c].Write (true);
             }
         }
         var sym = seq [i];
     //				if (i < 128 && sym == 14) {
     //					Console.WriteLine ("i: {0}, sym: {1}", i, sym);
     //					show_more = true;
     //				}
         S [sym].Write (false);
     }
     var ostream = S [0];
     for (int c = 1; c < sigma; ++c) {
         var istream = S [c];
         for (int i = 0; i < istream.CountBits; ++i) {
             ostream.Write (istream [i]);
         }
     }
     this.X = bitmap_builder (new FakeBitmap (ostream));
     //			if (show_more) {
     //				Console.WriteLine ("=== STREAM: {0}", S [14]);
     //				Console.WriteLine ("=== BUILD n: {0}, X.Count: {1}, X.Count1: {2}", n, this.X.Count, this.X.Count1);
     //			}
     this.SEQ = list_builder(seq, sigma);
 }
Beispiel #7
0
        public void Build(IList<int> seq, int sigma, PermutationBuilder perm_builder, BitmapFromBitStream bitmap_builder)
        {
            // A counting sort construction of the permutation
            var counters = new int[sigma];
            foreach (var s in seq) {
                if (s + 1 < sigma) {
                    counters [s + 1]++;
                }
            }
            for (int i = 1; i < sigma; i++) {
                counters [i] += counters [i - 1];
            }
            var n = seq.Count;
            var P = new int[n];
            for (int i = 0; i < n; i++) {
                var sym = seq [i];
                var pos = counters [sym];
                P [pos] = i;
                counters [sym] = pos + 1;
            }
            // the bitmap to save the lengths
            var lens = new BitStream32 ();
            int prevc = 0;
            foreach (var c in counters) {
                var len = c - prevc;
                prevc = c;
                lens.Write (true);
                lens.Write (false, len);
            }
            // an additional 1 to the end, to simplify source code
            lens.Write (true);

            var bb_lens = new FakeBitmap (lens);
            this.LENS = bitmap_builder(bb_lens);
            this.PERM = perm_builder(P);
        }
Beispiel #8
0
 public void Build(string out_filename, IList<IList<byte>> data_list, BitmapFromBitStream len_builder = null)
 {
     this.Name = out_filename;
     int counter = 0;
     var data_stream = new List<byte> ();
     var lens_stream = new BitStream32 ();
     foreach (var data in data_list) {
         ++counter;
         if (counter % 1000 == 0) {
             Console.WriteLine ("*** Processing docid {0}/{1} (adv: {2:0.000}%)",
                                counter, data_list.Count, counter*100.0/data_list.Count);
         }
         lens_stream.Write (true);
         lens_stream.Write (false, data.Count-1);
         // data_stream.Capacity += data.Count;
         foreach (var b in data) {
             data_stream.Add (b);
         }
     }
     lens_stream.Write(true);
     if (len_builder == null) {
         len_builder = BitmapBuilders.GetGGMN_wt (12);
     }
     this.LENS = len_builder (new FakeBitmap (lens_stream));
     this.DATA = data_stream;
 }
Beispiel #9
0
 /// <summary>
 /// Read the database from a listing file (one filename per line)
 /// </summary>
 public void Build(string filename, BitmapFromBitStream len_builder = null)
 {
     Console.WriteLine ("****** Reading database from list of files");
     this.Name = filename;
     var NAMES = File.ReadAllLines (filename);
     int counter = 0;
     var data_stream = new List<byte> ();
     var lens_stream = new BitStream32 ();
     foreach (var s in NAMES) {
         ++counter;
         if (s.Length == 0) {
             continue;
         }
         if (counter % 1000 == 0) {
             Console.WriteLine ("*** Processing docid {0}/{1} (adv: {2:0.000}%): '{3}'",
                                counter, NAMES.Length, counter*100.0/NAMES.Length, s);
         }
         var data = (IList<byte>)this.Parse (s, true);
         if (data.Count == 0) {
             throw new ArgumentException(String.Format("AFP files must not be empty: {0}", s));
         }
         lens_stream.Write (true);
         lens_stream.Write (false, data.Count-1);
         data_stream.Capacity += data.Count;
         foreach (var b in data) {
             data_stream.Add (b);
         }
     }
     lens_stream.Write(true);
     if (len_builder == null) {
         len_builder = BitmapBuilders.GetGGMN_wt (12);
     }
     this.LENS = len_builder (new FakeBitmap (lens_stream));
     this.DATA = data_stream;
 }
Beispiel #10
0
        /// <summary>
        /// Builds the index for the sequence
        /// </summary>
        public void Build(IList<int> sequence, int alphabet_size, int t = 16,
		                   BitmapFromList rowbuilder = null, BitmapFromBitStream lenbuilder = null)
        {
            if (rowbuilder == null) {
                rowbuilder = BitmapBuilders.GetSArray ();
            }
            if (lenbuilder == null) {
                lenbuilder = BitmapBuilders.GetGGMN_wt (12);
            }
            var invindex = new IList<int>[alphabet_size];
            for (int i = 0; i < alphabet_size; i++) {
                invindex [i] = new List<int> ();
            }
            int pos = 0;
            foreach (var c in sequence) {
                invindex [c].Add (pos);
                pos++;
            }
            pos = 0;
            this.N = sequence.Count;
            this.InvIndex = new Bitmap[alphabet_size];
            var lens = new BitStream32 ();
            for (int i = 0; i < alphabet_size; i++) {
                if (i % 1000 == 0) {
                    if (i % 10000 == 0) {
                        Console.WriteLine ();
                        Console.Write ("*** InvIndexXLBSeq {0}/{1}", i, alphabet_size);
                    } else {
                        Console.Write (", {0}", i);
                    }
                }
                this.InvIndex [i] = rowbuilder (invindex [i]);
                lens.Write (true);
                lens.Write (false, invindex [i].Count);
                invindex [i] = null;
            }
            lens.Write (true);
            Console.WriteLine ();
            Console.WriteLine ("done, now saving permutation and the Len bitmap");
            this.Lens = lenbuilder (new FakeBitmap (lens));
            var p = new ListGen_MRRR ();
            p.Build (this.GetNotIdxPERM (), t, null);
            Console.WriteLine ("done");
            this.Perm = p;
        }
Beispiel #11
0
 public static SequenceBuilder GetInvIndexXLBSeq(short t = 16, BitmapFromList row_builder = null, BitmapFromBitStream len_builder = null)
 {
     if (row_builder == null) {
         row_builder = BitmapBuilders.GetSArray ();
     }
     if (len_builder == null) {
         len_builder = BitmapBuilders.GetGGMN_wt (12);
     }
     return delegate (IList<int> seq, int sigma) {
         var iis = new InvIndexXLBSeq();
         iis.Build (seq, sigma, t, row_builder, len_builder);
         return iis;
     };
 }
Beispiel #12
0
 public static BitmapFromBitStream GetSArray_wt(BitmapFromBitStream H_builder = null)
 {
     return delegate (FakeBitmap b) {
         var rs = new SArray ();
         rs.Build (CreateSortedList (b), b.Count, H_builder);
         return rs;
     };
 }
Beispiel #13
0
 public void Build(string filename, SequenceBuilder seqbuilder, BitmapFromBitStream bitmapbuilder = null)
 {
     this.BuildWebGraph (filename, seqbuilder, bitmapbuilder);
 }
Beispiel #14
0
 public void Build(IList<long> orderedList, long n, byte numLowerBits, BitmapFromBitStream H_builder)
 {
     //this.M = orderedList.Count;
     int M = orderedList.Count;
     this.N = n;
     if (M > this.N) {
         Console.WriteLine ("XXXXX LastItem: {0}", orderedList [orderedList.Count - 1]);
         throw new ArgumentOutOfRangeException (String.Format ("SArray N < M, N: {0}, M: {1}", this.N, M));
     }
     if (numLowerBits < 1) {
         numLowerBits = 1;
     }
     // this.NumLowerBits = numLowerBits;
     this.L = new ListIFS (numLowerBits, new BitStream32 ((numLowerBits / 32) * M));
     // Creating bitmaps
     // 2^ (log N - log N / M) = 2^ \log N M / N = M.
     // 2^ (log N - log N / M) = 2^ \log N M / N = M.
     int numpart = (int)Math.Ceiling (Math.Pow (2, (Math.Ceiling (Math.Log (this.N)) - this.GetNumLowerBits ())));
     var H_stream = new BitStream32 (M + (numpart / 32 + 1));
     long mask = this.get_mask ();
     int prevblock = -1;
     for (int i = 0; i < M; i++) {
         this.L.Add ((int)(orderedList [i] & mask));
         int currentblock = (int)(orderedList [i] >> this.GetNumLowerBits ());
         if (prevblock != currentblock) {
             while (prevblock < currentblock) {
                 H_stream.Write (false);
                 prevblock++;
             }
         }
         H_stream.Write (true);
     }
     //an additional technical zero
     H_stream.Write (false, M - prevblock);
     H_stream.Write (false);
     if (H_builder == null) {
         H_builder = BitmapBuilders.GetDArray_wt(16,32);
     }
     var fb = new FakeBitmap(H_stream);
     this.H = H_builder(fb);
 }
Beispiel #15
0
 public static SequenceBuilder GetSeqSinglePermListIDiffs(short t, short bsize = 16, BitmapFromBitStream bitmap_builder = null, IIEncoder32 encoder = null)
 {
     var pbuilder = PermutationBuilders.GetCyclicPermsListIDiffs(t, bsize, bitmap_builder, encoder);
     return GetSeqSinglePerm(pbuilder, null);
 }
Beispiel #16
0
 public static SequenceBuilder GetSeqSinglePerm(PermutationBuilder perm_builder = null, BitmapFromBitStream bitmap_builder = null)
 {
     if (perm_builder == null) {
         perm_builder = PermutationBuilders.GetCyclicPermsListIDiffs (16, 63);
     }
     if (bitmap_builder == null) {
         bitmap_builder = BitmapBuilders.GetGGMN_wt(8);
     }
     return delegate (IList<int> seq, int sigma) {
         var S = new SeqSinglePerm ();
         S.Build (seq, sigma,perm_builder, bitmap_builder);
         return S;
     };
 }
Beispiel #17
0
 public static SequenceBuilder GetSeqSinglePermIFS(short t, BitmapFromBitStream bitmap_builder = null)
 {
     var pbuilder = PermutationBuilders.GetCyclicPermsListIFS(t);
     return GetSeqSinglePerm(pbuilder, bitmap_builder);
 }
Beispiel #18
0
 public static SequenceBuilder GetSeqPlainRL(short B = 0, BitmapFromBitStream bitmap_builder = null)
 {
     return GetSeqPlain(B, ListIBuilders.GetListEqRL(), bitmap_builder);
 }
Beispiel #19
0
 public static SequenceBuilder GetSeqPlain(short B = 0, ListIBuilder list_builder = null, BitmapFromBitStream bitmap_builder = null, bool CopyOnUnravel = false)
 {
     return delegate (IList<int> seq, int sigma) {
         if (CopyOnUnravel) {
             var s = new SeqPlainCopyOnUnravel();
             s.Build(seq, sigma, B, list_builder, bitmap_builder);
             return s;
         } else {
             var s = new SeqPlain();
             s.Build(seq, sigma, B, list_builder, bitmap_builder);
             return s;
         }
     };
 }
Beispiel #20
0
 public static BitmapFromList GetSArray(BitmapFromBitStream H_builder = null)
 {
     return delegate (IList<int> L) {
         var rs = new SArray ();
         rs.Build (L, 0, H_builder);
         return rs;
     };
 }
Beispiel #21
0
 public void Build(IList<long> orderedList, long n = 0, BitmapFromBitStream H_builder = null)
 {
     if (n == 0 && orderedList.Count > 0) {
         n = orderedList[orderedList.Count - 1] + 1;
     }
     byte z = Log_N_over_M(n, orderedList.Count);
     if (z == 0) {
         z++;
     }
     // Console.WriteLine("n: {0}, m: {1}, z: {2}", n, orderedList.Count, z);
     this.Build( orderedList, n, z, H_builder);
 }
Beispiel #22
0
        public static SequenceBuilder GetWT(
			BitmapFromBitStream bitmap_builder = null,
			Func<int, IIEncoder32> get_coder = null
		)
        {
            if (bitmap_builder == null) {
                bitmap_builder = BitmapBuilders.GetGGMN_wt(16);
            }
            return delegate (IList<int> seq, int sigma) {
                var wt = new WaveletTree ();
                wt.BitmapBuilder = bitmap_builder;
                // var enc = new BinaryCoding (numbits);
                IIEncoder32 enc;
                if (get_coder == null) {
                    int numbits = (int)Math.Ceiling (Math.Log (sigma, 2));
                    enc = new BinaryCoding (numbits);
                } else {
                    enc = get_coder(sigma);
                }
                wt.Build (seq, sigma, enc);
                return wt;
            };
        }
Beispiel #23
0
 public void Build(BitStream32 bitmap, BitmapFromBitStream H_builder)
 {
     IList<long> L = new List<long> ();
     for (int i = 0; i < bitmap.CountBits; i++) {
         if (bitmap[i]) {
             L.Add (i);
         }
     }
     this.Build (L, bitmap.CountBits, H_builder);
 }
Beispiel #24
0
 public static SequenceBuilder GetWT_BinaryCoding(BitmapFromBitStream bitmap_builder = null)
 {
     return GetWT(bitmap_builder);
 }
Beispiel #25
0
        public static SequenceBuilder GetWT(
			BitmapFromBitStream bitmap_builder,
			Func<int, IIEncoder32> get_coder
		)
        {
            return delegate (IList<int> seq, int sigma) {
                var wt = new WaveletTree ();
                wt.BitmapBuilder = bitmap_builder;
                var enc = get_coder (sigma);
                // var enc = new BinaryCoding (numbits);
                wt.Build (enc, sigma, seq);
                return wt;

            };
        }
Beispiel #26
0
 public static SequenceBuilder GetWT_BinaryCoding(BitmapFromBitStream bitmap_builder)
 {
     return delegate (IList<int> seq, int sigma) {
         var wt = new WaveletTree ();
         wt.BitmapBuilder = bitmap_builder;
         int numbits = (int)Math.Ceiling (Math.Log (sigma, 2));
         var enc = new BinaryCoding (numbits);
         wt.Build (enc, sigma, seq);
         return wt;
     };
 }
Beispiel #27
0
 public static SequenceBuilder GetSeqXLB_SArray64(short t = 16, BitmapFromBitStream H_builder = null)
 {
     return GetSeqXLB (t, BitmapBuilders.GetSArray64 (H_builder));
 }
Beispiel #28
0
 // 64 bit bitmaps
 public static BitmapFromList64 GetSArray64(BitmapFromBitStream H_builder = null)
 {
     return delegate (IList<long> L, long n) {
         var rs = new SArray64 ();
         rs.Build (L, n, H_builder);
         return rs;
     };
 }