/// <summary> /// Saves "seq" to the "Output" /// </summary> public static void Save(BinaryWriter Output, IRankSelectSeq seq) { var type = seq.GetType (); byte idType = 255; for (byte i = 0; i < Catalog.Count; i++) { if (type == Catalog [i]) { idType = i; break; } } if (idType == 255) { var s = String.Format ("Type {0} is not a recognized indexed sequence, please add it to " + "RankSelectSeqGenericIO.Catalog", type); throw new ArgumentException (s); } Output.Write (idType); seq.Save (Output); }
public static int[] ToIntArray(IRankSelectSeq seq, bool use_access_based_copy) { var S = new int[seq.Count]; if (use_access_based_copy) { for (int i = 0; i < seq.Count; ++i) { S[i] = seq.Access(i); } } else { for (int sym = 0; sym < seq.Sigma; ++sym) { var rs = seq.Unravel (sym); var count1 = rs.Count1; for (int i = 1; i <= count1; ++i) { var p = rs.Select1 (i); S [p] = sym; } } } return S; }
public void Load(BinaryReader Input) { this.Seq = RankSelectSeqGenericIO.Load (Input); this.RecSep = Input.ReadInt32 (); int len = Input.ReadInt32(); this.Voc = new string[len]; for (int i = 0; i < len; ++i) { this.Voc[i] = Input.ReadString(); } }
public virtual void Build(MetricDB db, int sampleSize, SequenceBuilder seq_builder = null, Func<int,object> get_item = null) { this.DB = db; if (seq_builder == null) { seq_builder = SequenceBuilders.GetSeqXLB_SArray64 (16); } this.H = new ushort[sampleSize]; Random rand = new Random (); { HashSet<int> _coordinates = new HashSet<int> (); int i = 0; while (_coordinates.Count < sampleSize) { var p = (ushort)(rand.Next () % ushort.MaxValue); if (_coordinates.Add (p)) { this.H [i] = p; ++i; } } Array.Sort (this.H); } int len = this.DB.Count; int pc = len / 100 + 1; int numbits = sampleSize > 32 ? 32 : sampleSize; var seq = new ListIFS (numbits); // Console.WriteLine ("DIMENSION: {0}, LENGTH: {1}", numbits, len); for (int docid = 0; docid < len; docid++) { if (docid % pc == 0) { Console.WriteLine ("Advance: {0:0.00}%, docid: {1}, total: {2}", docid * 100.0 / len, docid, len); } int hash; if (get_item == null) { hash = this.ComputeHash (this.DB [docid]); } else { hash = this.ComputeHash (get_item (docid)); } // Console.WriteLine ("hash: {0}, max: {1}, sample-size: {2}", hash, 1 << sampleSize, sampleSize); seq.Add (hash); } Console.WriteLine ("*** Creating index of sequences"); this.Seq = seq_builder (seq, 1 << numbits); // IndexLoader.Save(outname, this); }
public Column(IRankSelectSeq seq, IList<string> voc, string recsep) { this.Seq = seq; this.Voc = voc; this.GetWordId(recsep, out this.RecSep); }
public void Build(string sa_name, SequenceBuilder seq_builder = null, BitmapFromBitStream bitmap_builder = null) { if (seq_builder == null) { seq_builder = SequenceBuilders.GetSeqXLB_DiffSetRL2_64(16, 63); } using (var Input = new BinaryReader (File.OpenRead (sa_name + ".structs"))) { this.newF = RankSelectGenericIO.Load (Input); if (bitmap_builder != null) { var newF_stream = new BitStream32(); for (int i = 0; i < this.newF.Count; ++i) { newF_stream.Write (this.newF.Access(i)); } this.newF = bitmap_builder(new FakeBitmap(newF_stream)); } int len = this.newF.Count1; this.charT = new int[len]; // Console.WriteLine ("*****>> charT => {0} bytes", this.charT.Length * 4); PrimitiveIO<int>.ReadFromFile (Input, len, this.charT); } using (var Input = new BinaryReader (File.OpenRead (sa_name + ".psi"))) { int seqlen = this.newF.Count; var seq = new int[seqlen]; var L = new List<int>(this.N/this.Sigma + 1); int curr = 0; for (int i = 1; i <= this.AlphabetSize; i++) { int next; if (i == this.AlphabetSize) { next = this.newF.Count; } else { next = this.newF.Select1 (i + 1); } int len = next - curr; L.Clear(); PrimitiveIO<int>.ReadFromFile (Input, len, L); for (int j = 0; j < len; ++j) { var x = L[j]; try { seq[ x ] = i - 1; } catch (Exception e) { Console.WriteLine ("== i: {0}, j: {1}, x: {2}, seq-count: {3}, len: {4}", i, j, x, seq.Length, len); throw e; } } curr = next; } this.SeqPsi = seq_builder(seq, this.AlphabetSize); } using (var Input = new BinaryReader (File.OpenRead (sa_name + ".samples"))) { this.SA_sample_step = Input.ReadInt16 (); this.SA_marked = RankSelectGenericIO.Load (Input); var _samples = new ListIFS (); _samples.Load (Input); var _invsamples = new ListIFS (); _invsamples.Load (Input); this.SA_samples = _samples; this.SA_invsamples = _invsamples; } }
public override void Load(BinaryReader Input) { base.Load(Input); var c = Input.ReadInt32 (); this.H = new ushort[c]; PrimitiveIO<ushort>.ReadFromFile(Input, c, this.H); this.Seq = RankSelectSeqGenericIO.Load(Input); }
public virtual void Build(LC_RNN lc, SequenceBuilder seq_builder = null) { this.COV = lc.COV; this.DB = lc.DB; this.CENTERS = new List<int>(lc.CENTERS); var S = lc.SEQ.GetRawSeq(); if (seq_builder == null) { seq_builder = SequenceBuilders.GetIISeq(BitmapBuilders.GetPlainSortedList()); } this.SEQ = seq_builder(S, lc.SEQ.Sigma); }
public void Load(string basename) { using (var Input = new BinaryReader (File.OpenRead (basename + ".idx"))) { this.newF = RankSelectGenericIO.Load (Input); this.charT = new int[this.newF.Count1]; PrimitiveIO<int>.ReadFromFile (Input, this.charT.Length, this.charT); } using (var Input = new BinaryReader (File.OpenRead (basename + ".psi"))) { this.SeqPsi = RankSelectSeqGenericIO.Load (Input); } using (var Input = new BinaryReader (File.OpenRead (basename + ".samples"))) { this.SA_sample_step = Input.ReadInt16 (); this.SA_marked = RankSelectGenericIO.Load (Input); var _samples = new ListIFS (); _samples.Load (Input); var _invsamples = new ListIFS (); _invsamples.Load (Input); this.SA_samples = _samples; this.SA_invsamples = _invsamples; } }
public virtual void BuildInternal(BitStream32 IsCenter, int[] seq_lc, SequenceBuilder seq_builder) { int len = this.DB.Count; int pc = len / 100 + 1; for (int docid = 0; docid < len; docid++) { if (docid % pc == 0) { Console.WriteLine ("docid {0} of {1}, advance {2:0.00}%, timestamp: {3}", docid, len, docid * 100.0 / len, DateTime.Now); } if (IsCenter [docid]) { seq_lc[docid] = this.CENTERS.Count; continue; } int nn_center; double nn_dist; this.BuildSearchNN (this.DB[docid], out nn_center, out nn_dist); seq_lc[docid] = nn_center; if (this.COV [nn_center] < nn_dist) { this.COV [nn_center] = (float)nn_dist; } } if (seq_builder == null) { seq_builder = SequenceBuilders.GetIISeq(BitmapBuilders.GetPlainSortedList()); } this.SEQ = seq_builder(seq_lc, this.CENTERS.Count + 1); }
public override void Load(BinaryReader Input) { base.Load (Input); int m = Input.ReadInt32(); this.CENTERS = new int[m]; this.COV = new float[m]; // PrimitiveIO<int>.ReadFromFile(Input, m, this.CENTERS); PrimitiveIO<float>.ReadFromFile(Input, m, this.COV); this.SEQ = RankSelectSeqGenericIO.Load(Input); var L = new SortedListRSCache(this.SEQ.Unravel(this.SEQ.Sigma - 1)); this.CENTERS = new List<int>(L); }
public void Build(string sa_name) { using (var Input = new BinaryReader (File.OpenRead (sa_name + ".structs"))) { this.newF = RankSelectGenericIO.Load (Input); int len = this.newF.Count1; this.charT = new int[len]; PrimitiveIO<int>.ReadFromFile (Input, len, this.charT); } using (var Input = new BinaryReader (File.OpenRead (sa_name + ".bwt"))) { var L = new ListIFS (); L.Load (Input); this.seqIndex = this.SeqBuilder (L, this.charT.Length); } using (var Input = new BinaryReader (File.OpenRead (sa_name + ".samples"))) { this.SA_sample_step = Input.ReadInt16 (); this.SA_marked = RankSelectGenericIO.Load (Input); var _samples = new ListIFS (); _samples.Load (Input); var _invsamples = new ListIFS (); _invsamples.Load (Input); this.SA_samples = _samples; this.SA_invsamples = _invsamples; } }
public void Load(string basename) { using (var Input = new BinaryReader (File.OpenRead (basename + ".structs"))) { this.newF = RankSelectGenericIO.Load (Input); this.charT = new int[this.newF.Count1]; PrimitiveIO<int>.ReadFromFile (Input, this.charT.Length, this.charT); } // this.seqIndex = new WaveletTree (); // this.seqIndex.Load (Input); using (var Input = new BinaryReader (File.OpenRead (basename + ".bwt-index"))) { this.seqIndex = RankSelectSeqGenericIO.Load (Input); } using (var Input = new BinaryReader (File.OpenRead (basename + ".structs-samples"))) { this.SA_sample_step = Input.ReadInt16 (); this.SA_marked = RankSelectGenericIO.Load (Input); } using (var Input = new BinaryReader (File.OpenRead (basename + ".samples"))) { var _samples = new ListIFS (); _samples.Load (Input); var _invsamples = new ListIFS (); _invsamples.Load (Input); this.SA_samples = _samples; this.SA_invsamples = _invsamples; } }
public void BuildWebGraph(string filename, SequenceBuilder seqbuilder, BitmapFromBitStream bitmapbuilder = null) { if (bitmapbuilder == null) { bitmapbuilder = BitmapBuilders.GetGGMN_wt (12); } var len_stream = new BitStream32 (); var seq = new List<int> (); int prev_context = -1; using (var Input = File.OpenText (filename)) { string line; int lineno = 0; int counterlineno = 0; while (true) { { if (lineno % 10000 == 0) { if (counterlineno % 10 == 0) { Console.WriteLine (); Console.Write ("Processing lines: "); } ++counterlineno; Console.Write ("{0}, ", lineno); } ++lineno; } line = Input.ReadLine (); if (line == null) { break; } if (line.StartsWith ("#")) { continue; } var link = line.Split ('\t', ' '); var start_node = int.Parse (link [0]); var end_node = int.Parse (link [1]); // on webgraph format, starting nodes are already sorted, just advance and count if (start_node != prev_context) { for (int diffcount = start_node - prev_context; diffcount > 0; --diffcount) { len_stream.Write (true); } prev_context = start_node; } len_stream.Write (false); seq.Add (end_node); } // a simple hack simplifying direct-neighbors's retrieval len_stream.Write (true); } this.SEQ = seqbuilder (seq, prev_context + 1); this.LENS = bitmapbuilder (new FakeBitmap (len_stream)); }
public void Load(BinaryReader Input) { this.LENS = RankSelectGenericIO.Load (Input); this.SEQ = RankSelectSeqGenericIO.Load (Input); }
/// <summary> /// Creates an unraveled symbol using "_symbol" over "_seqindex" /// </summary> public UnraveledSymbol(IRankSelectSeq _seqindex, int _symbol) { this.seqindex = _seqindex; this.symbol = _symbol; }