public KnrFP(KnrFP inputDB, int new_n, int new_K = -1) { this.K = new_K; this.Fingerprints = new StringSpace<int> (); this.Fingerprints.seqs.Capacity = new_n; this.IdxRefs = inputDB.IdxRefs; if (new_K <= 0) { for (int i = 0; i < new_n; ++i) { var u = inputDB.Fingerprints.seqs [i]; this.Fingerprints.Add (u); } } else { if (new_K > inputDB.Fingerprints.seqs [0].Length) { throw new ArgumentOutOfRangeException("new_K > old_K need a complete re-construction of the transformation"); } for (int i = 0; i < new_n; ++i) { var u = inputDB.Fingerprints.seqs [i]; var v = new int[new_K]; for (int j = 0; j < new_K; ++j) { v [j] = u [j]; } this.Fingerprints.Add (v); } } }
public void Build(MetricDB db, KnrFP knrfp, int maxcand=1024, SequenceBuilder seq_builder=null) { this.DB = db; this.R = knrfp.IdxRefs; this.K = knrfp.K; this.MAXCAND = maxcand; var M = knrfp.Fingerprints.seqs; // var L = new int[this.K * this.DB.Count]; // int pos = 0; // for (int objID = 0; objID< this.DB.Count; ++objID) { // var u = M [objID]; // for (int i = 0; i < this.K; ++i, ++pos) { // L [pos] = u [i]; // } // } var L = new ListGen<int> ((int i) => M [i / K] [i % K], this.DB.Count * this.K); if (seq_builder == null) { seq_builder = SequenceBuilders.GetSeqXLB_SArray64 (16); } Console.WriteLine ("xxxxx Build L: {0}, R: {1}, db-count: {2}, db: {3}, K: {4}", L.Count, this.R.DB.Count, db.Count, db, K); this.SEQ = seq_builder (L, this.R.DB.Count); }
public void Build(MetricDB db, Index refs, int K=7, int maxcand=1024, SequenceBuilder seq_builder=null) { var knrfp = new KnrFP (); knrfp.Build (db, refs, K); this.Build (db, knrfp, maxcand, seq_builder); }
public void Build(MetricDB db, int symbolsPerHash, int neighborhoodExpansion, Random rand) { this.DB = db; int n = db.Count; this.SymbolsPerHash = symbolsPerHash; // very small values, i.e., 2, 3, 4 this.NeighborhoodExpansion = neighborhoodExpansion; // neighborhoodExpansion >= symbolsPerHash var numrefs = (int)(Math.Pow(n, 1.0 / this.SymbolsPerHash)); this.CountSymbolBits = countBits(numrefs); var refs = new SampleSpace("", db, numrefs, rand); var sat = new SAT_Distal (); sat.Build (refs, new Random ()); this.R = sat; var G = new KnrFP (); G.Build (db, this.R, this.SymbolsPerHash); var knrcopy = new int[this.SymbolsPerHash]; this.hashTable = new Dictionary<long, List<int>> (); for (int objID = 0; objID < n; ++objID) { var knr = G [objID] as int[]; knr.CopyTo (knrcopy, 0); var hash = this.EncodeKnr (knrcopy); // EncodeKnr destroys the reference order List<int> L; if (!this.hashTable.TryGetValue(hash, out L)) { L = new List<int>(); this.hashTable.Add(hash, L); } L.Add (objID); } double avg_len = 0; double avg_len_sq = 0; foreach (var list in this.hashTable.Values) { avg_len += list.Count; avg_len_sq += list.Count * list.Count; } avg_len /= this.hashTable.Count; avg_len_sq /= this.hashTable.Count; Console.WriteLine ("=== created hash table with {0} keys, items: {1}, popcount mean: {2}, popcount stddev: {3}", this.hashTable.Count, n, avg_len, Math.Sqrt(avg_len_sq - avg_len * avg_len)); }