Ejemplo n.º 1
0
 /// <summary>
 /// Saves "seq" to the "Output"
 /// </summary>
 public static void Save(BinaryWriter Output, IRankSelectSeq seq)
 {
     var type = seq.GetType ();
     byte idType = 255;
     for (byte i = 0; i < Catalog.Count; i++) {
         if (type == Catalog [i]) {
             idType = i;
             break;
         }
     }
     if (idType == 255) {
         var s = String.Format ("Type {0} is not a recognized indexed sequence, please add it to " +
             "RankSelectSeqGenericIO.Catalog", type);
         throw new ArgumentException (s);
     }
     Output.Write (idType);
     seq.Save (Output);
 }
Ejemplo n.º 2
0
 public static int[] ToIntArray(IRankSelectSeq seq, bool use_access_based_copy)
 {
     var S = new int[seq.Count];
     if (use_access_based_copy) {
         for (int i = 0; i < seq.Count; ++i) {
             S[i] = seq.Access(i);
         }
     } else {
         for (int sym = 0; sym < seq.Sigma; ++sym) {
             var rs = seq.Unravel (sym);
             var count1 = rs.Count1;
             for (int i = 1; i <= count1; ++i) {
                 var p = rs.Select1 (i);
                 S [p] = sym;
             }
         }
     }
     return S;
 }
Ejemplo n.º 3
0
 public void Load(BinaryReader Input)
 {
     this.Seq = RankSelectSeqGenericIO.Load (Input);
     this.RecSep = Input.ReadInt32 ();
     int len = Input.ReadInt32();
     this.Voc = new string[len];
     for (int i = 0; i < len; ++i) {
         this.Voc[i] = Input.ReadString();
     }
 }
Ejemplo n.º 4
0
Archivo: LSC.cs Proyecto: vfaby/natix
        public virtual void Build(MetricDB db, int sampleSize,
		                           SequenceBuilder seq_builder = null, Func<int,object> get_item = null)
        {
            this.DB = db;
            if (seq_builder == null) {
                seq_builder = SequenceBuilders.GetSeqXLB_SArray64 (16);
            }
            this.H = new ushort[sampleSize];
            Random rand = new Random ();
            {
                HashSet<int> _coordinates = new HashSet<int> ();
                int i = 0;
                while (_coordinates.Count < sampleSize) {
                    var p = (ushort)(rand.Next () % ushort.MaxValue);
                    if (_coordinates.Add (p)) {
                        this.H [i] = p;
                        ++i;
                    }
                }
                Array.Sort (this.H);
            }
            int len = this.DB.Count;
            int pc = len / 100 + 1;
            int numbits = sampleSize > 32 ? 32 : sampleSize;
            var seq = new ListIFS (numbits);
            // Console.WriteLine ("DIMENSION: {0}, LENGTH: {1}", numbits, len);
            for (int docid = 0; docid < len; docid++) {
                if (docid % pc == 0) {
                    Console.WriteLine ("Advance: {0:0.00}%, docid: {1}, total: {2}", docid * 100.0 / len, docid, len);
                }
                int hash;
                if (get_item == null) {
                    hash = this.ComputeHash (this.DB [docid]);
                } else {
                    hash = this.ComputeHash (get_item (docid));
                }
                // Console.WriteLine ("hash: {0}, max: {1}, sample-size: {2}", hash, 1 << sampleSize, sampleSize);
                seq.Add (hash);
            }
            Console.WriteLine ("*** Creating index of sequences");
            this.Seq = seq_builder (seq, 1 << numbits);
            // IndexLoader.Save(outname, this);
        }
Ejemplo n.º 5
0
 public Column(IRankSelectSeq seq, IList<string> voc, string recsep)
 {
     this.Seq = seq;
     this.Voc = voc;
     this.GetWordId(recsep, out this.RecSep);
 }
Ejemplo n.º 6
0
        public void Build(string sa_name, SequenceBuilder seq_builder = null, BitmapFromBitStream bitmap_builder = null)
        {
            if (seq_builder == null) {
                seq_builder = SequenceBuilders.GetSeqXLB_DiffSetRL2_64(16, 63);
            }
            using (var Input = new BinaryReader (File.OpenRead (sa_name + ".structs"))) {
                this.newF = RankSelectGenericIO.Load (Input);
                if (bitmap_builder != null) {
                    var newF_stream = new BitStream32();
                    for (int i = 0; i < this.newF.Count; ++i) {
                        newF_stream.Write (this.newF.Access(i));
                    }
                    this.newF = bitmap_builder(new FakeBitmap(newF_stream));
                }
                int len = this.newF.Count1;
                this.charT = new int[len];
                // Console.WriteLine ("*****>> charT => {0} bytes", this.charT.Length * 4);
                PrimitiveIO<int>.ReadFromFile (Input, len, this.charT);
            }
            using (var Input = new BinaryReader (File.OpenRead (sa_name + ".psi"))) {
                int seqlen = this.newF.Count;
                var seq = new int[seqlen];
                var L = new List<int>(this.N/this.Sigma + 1);
                int curr = 0;
                for (int i = 1; i <= this.AlphabetSize; i++) {
                    int next;
                    if (i == this.AlphabetSize) {
                        next = this.newF.Count;
                    } else {
                        next = this.newF.Select1 (i + 1);
                    }
                    int len = next - curr;
                    L.Clear();
                    PrimitiveIO<int>.ReadFromFile (Input, len, L);
                    for (int j = 0; j < len; ++j) {
                        var x = L[j];
                        try {
                            seq[ x ] = i - 1;
                        } catch (Exception e) {
                            Console.WriteLine ("== i: {0}, j: {1}, x: {2}, seq-count: {3}, len: {4}",
                                               i, j, x, seq.Length, len);
                            throw e;
                        }
                    }
                    curr = next;
                }
                this.SeqPsi = seq_builder(seq, this.AlphabetSize);
            }

            using (var Input = new BinaryReader (File.OpenRead (sa_name + ".samples"))) {
                this.SA_sample_step = Input.ReadInt16 ();
                this.SA_marked = RankSelectGenericIO.Load (Input);
                var _samples = new ListIFS ();
                _samples.Load (Input);
                var _invsamples = new ListIFS ();
                _invsamples.Load (Input);
                this.SA_samples = _samples;
                this.SA_invsamples = _invsamples;
            }
        }
Ejemplo n.º 7
0
Archivo: LSC.cs Proyecto: vfaby/natix
 public override void Load(BinaryReader Input)
 {
     base.Load(Input);
     var c = Input.ReadInt32 ();
     this.H = new ushort[c];
     PrimitiveIO<ushort>.ReadFromFile(Input, c, this.H);
     this.Seq = RankSelectSeqGenericIO.Load(Input);
 }
Ejemplo n.º 8
0
 public virtual void Build(LC_RNN lc, SequenceBuilder seq_builder = null)
 {
     this.COV = lc.COV;
     this.DB = lc.DB;
     this.CENTERS = new List<int>(lc.CENTERS);
     var S = lc.SEQ.GetRawSeq();
     if (seq_builder == null) {
         seq_builder = SequenceBuilders.GetIISeq(BitmapBuilders.GetPlainSortedList());
     }
     this.SEQ = seq_builder(S, lc.SEQ.Sigma);
 }
Ejemplo n.º 9
0
 public void Load(string basename)
 {
     using (var Input = new BinaryReader (File.OpenRead (basename + ".idx"))) {
         this.newF = RankSelectGenericIO.Load (Input);
         this.charT = new int[this.newF.Count1];
         PrimitiveIO<int>.ReadFromFile (Input, this.charT.Length, this.charT);
     }
     using (var Input = new BinaryReader (File.OpenRead (basename + ".psi"))) {
         this.SeqPsi = RankSelectSeqGenericIO.Load (Input);
     }
     using (var Input = new BinaryReader (File.OpenRead (basename + ".samples"))) {
         this.SA_sample_step = Input.ReadInt16 ();
         this.SA_marked = RankSelectGenericIO.Load (Input);
         var _samples = new ListIFS ();
         _samples.Load (Input);
         var _invsamples = new ListIFS ();
         _invsamples.Load (Input);
         this.SA_samples = _samples;
         this.SA_invsamples = _invsamples;
     }
 }
Ejemplo n.º 10
0
 public virtual void BuildInternal(BitStream32 IsCenter, int[] seq_lc, SequenceBuilder seq_builder)
 {
     int len = this.DB.Count;
     int pc = len / 100 + 1;
     for (int docid = 0; docid < len; docid++) {
         if (docid % pc == 0) {
             Console.WriteLine ("docid {0} of {1}, advance {2:0.00}%, timestamp: {3}", docid, len, docid * 100.0 / len, DateTime.Now);
         }
         if (IsCenter [docid]) {
             seq_lc[docid] = this.CENTERS.Count;
             continue;
         }
         int nn_center;
         double nn_dist;
         this.BuildSearchNN (this.DB[docid], out nn_center, out nn_dist);
         seq_lc[docid] = nn_center;
         if (this.COV [nn_center] < nn_dist) {
             this.COV [nn_center] = (float)nn_dist;
         }
     }
     if (seq_builder == null) {
         seq_builder = SequenceBuilders.GetIISeq(BitmapBuilders.GetPlainSortedList());
     }
     this.SEQ = seq_builder(seq_lc, this.CENTERS.Count + 1);
 }
Ejemplo n.º 11
0
 public override void Load(BinaryReader Input)
 {
     base.Load (Input);
     int m = Input.ReadInt32();
     this.CENTERS = new int[m];
     this.COV = new float[m];
     // PrimitiveIO<int>.ReadFromFile(Input, m, this.CENTERS);
     PrimitiveIO<float>.ReadFromFile(Input, m, this.COV);
     this.SEQ = RankSelectSeqGenericIO.Load(Input);
     var L = new SortedListRSCache(this.SEQ.Unravel(this.SEQ.Sigma - 1));
     this.CENTERS = new List<int>(L);
 }
Ejemplo n.º 12
0
 public void Build(string sa_name)
 {
     using (var Input = new BinaryReader (File.OpenRead (sa_name + ".structs"))) {
         this.newF = RankSelectGenericIO.Load (Input);
         int len = this.newF.Count1;
         this.charT = new int[len];
         PrimitiveIO<int>.ReadFromFile (Input, len, this.charT);
     }
     using (var Input = new BinaryReader (File.OpenRead (sa_name + ".bwt"))) {
         var L = new ListIFS ();
         L.Load (Input);
         this.seqIndex = this.SeqBuilder (L, this.charT.Length);
     }
     using (var Input = new BinaryReader (File.OpenRead (sa_name + ".samples"))) {
         this.SA_sample_step = Input.ReadInt16 ();
         this.SA_marked = RankSelectGenericIO.Load (Input);
         var _samples = new ListIFS ();
         _samples.Load (Input);
         var _invsamples = new ListIFS ();
         _invsamples.Load (Input);
         this.SA_samples = _samples;
         this.SA_invsamples = _invsamples;
     }
 }
Ejemplo n.º 13
0
 public void Load(string basename)
 {
     using (var Input = new BinaryReader (File.OpenRead (basename + ".structs"))) {
         this.newF = RankSelectGenericIO.Load (Input);
         this.charT = new int[this.newF.Count1];
         PrimitiveIO<int>.ReadFromFile (Input, this.charT.Length, this.charT);
     }
     // this.seqIndex = new WaveletTree ();
     // this.seqIndex.Load (Input);
     using (var Input = new BinaryReader (File.OpenRead (basename + ".bwt-index"))) {
         this.seqIndex = RankSelectSeqGenericIO.Load (Input);
     }
     using (var Input = new BinaryReader (File.OpenRead (basename + ".structs-samples"))) {
         this.SA_sample_step = Input.ReadInt16 ();
         this.SA_marked = RankSelectGenericIO.Load (Input);
     }
     using (var Input = new BinaryReader (File.OpenRead (basename + ".samples"))) {
         var _samples = new ListIFS ();
         _samples.Load (Input);
         var _invsamples = new ListIFS ();
         _invsamples.Load (Input);
         this.SA_samples = _samples;
         this.SA_invsamples = _invsamples;
     }
 }
Ejemplo n.º 14
0
 public void BuildWebGraph(string filename, SequenceBuilder seqbuilder, BitmapFromBitStream bitmapbuilder = null)
 {
     if (bitmapbuilder == null) {
         bitmapbuilder = BitmapBuilders.GetGGMN_wt (12);
     }
     var len_stream = new BitStream32 ();
     var seq = new List<int> ();
     int prev_context = -1;
     using (var Input = File.OpenText (filename)) {
         string line;
         int lineno = 0;
         int counterlineno = 0;
         while (true) {
             {
                 if (lineno % 10000 == 0) {
                     if (counterlineno % 10 == 0) {
                         Console.WriteLine ();
                         Console.Write ("Processing lines: ");
                     }
                     ++counterlineno;
                     Console.Write ("{0}, ", lineno);
                 }
                 ++lineno;
             }
             line = Input.ReadLine ();
             if (line == null) {
                 break;
             }
             if (line.StartsWith ("#")) {
                 continue;
             }
             var link = line.Split ('\t', ' ');
             var start_node = int.Parse (link [0]);
             var end_node = int.Parse (link [1]);
             // on webgraph format, starting nodes are already sorted, just advance and count
             if (start_node != prev_context) {
                 for (int diffcount = start_node - prev_context; diffcount > 0; --diffcount) {
                     len_stream.Write (true);
                 }
                 prev_context = start_node;
             }
             len_stream.Write (false);
             seq.Add (end_node);
         }
         // a simple hack simplifying  direct-neighbors's retrieval
         len_stream.Write (true);
     }
     this.SEQ = seqbuilder (seq, prev_context + 1);
     this.LENS = bitmapbuilder (new FakeBitmap (len_stream));
 }
Ejemplo n.º 15
0
 public void Load(BinaryReader Input)
 {
     this.LENS = RankSelectGenericIO.Load (Input);
     this.SEQ = RankSelectSeqGenericIO.Load (Input);
 }
Ejemplo n.º 16
0
 /// <summary>
 /// Creates an unraveled symbol using "_symbol" over "_seqindex"
 /// </summary>
 public UnraveledSymbol(IRankSelectSeq _seqindex, int _symbol)
 {
     this.seqindex = _seqindex;
     this.symbol = _symbol;
 }