public void Build(KnrSeqSearch other) { this.DB = other.DB; this.K = other.K; this.MAXCAND = other.MAXCAND; this.R = other.R; this.SEQ = other.SEQ; }
public void Build(KnrSeqSearch knr, int min_occ) { this.DB = knr.DB; this.K = knr.K; this.MINOCC = min_occ; // this.MAXCAND = knr.MAXCAND; this.R = knr.R; int sigma = this.R.DB.Count; this.INVINDEX = new List<int[]> (sigma); var list = new List<int>(); for (int i = 0; i < sigma; ++i) { list.Clear(); var unravel = knr.SEQ.Unravel(i); var count = unravel.Count1; for (int s = 1; s <= count; ++s) { list.Add (unravel.Select1(s) / this.K); } this.INVINDEX.Add( list.ToArray() ); } }
public void Build(KnrSeqSearch knr, int min_occ, BitmapFromList bitmap_builder = null) { this.DB = knr.DB; this.K = knr.K; this.MINOCC = min_occ; // this.MAXCAND = knr.MAXCAND; this.R = knr.R; int sigma = this.R.DB.Count; if (bitmap_builder == null) { bitmap_builder = BitmapBuilders.GetDiffSetRL2(63, new EliasDelta()); } this.INVINDEX = new IRankSelect[sigma]; var list = new List<int>(); for (int i = 0; i < sigma; ++i) { list.Clear(); var unravel = knr.SEQ.Unravel(i); var count = unravel.Count1; for (int s = 1; s <= count; ++s) { list.Add (unravel.Select1(s) / this.K); } this.INVINDEX[i] = bitmap_builder(list); } }
public KnrSeqSearchLCSv3(KnrSeqSearch knr) : base(knr) { }
public KnrSeqSearchJaccard(KnrSeqSearch knr) : base(knr) { }
public void Test(string nick, MetricDB db, string queries, int num_centers, int num_perms, int num_refs) { var qstream = new QueryStream (queries); var reslist = new List<string> (); // Exhaustive search { Sequential seq = new Sequential (); seq.Build (db); var idxname = "Index.Sequential." + nick; IndexGenericIO.Save (idxname, seq); var resname = "Res." + idxname + "." + queries; if (!File.Exists (resname)) { Commands.Search (seq, qstream.Iterate (), new ShellSearchOptions (queries, idxname, resname)); } reslist.Add (resname); } /// /// The List of Clusters and variants /// // LC_RNN reslist.Add (this.TestLC ("Index.LC_RNN." + nick, db, num_centers, new LC_RNN (), queries, qstream)); // LC reslist.Add (this.TestLC ("Index.LC." + nick, db, num_centers, new LC (), queries, qstream)); // LC_IRNN reslist.Add (this.TestLC ("Index.LC_IRNN." + nick, db, num_centers, new LC_IRNN (), queries, qstream)); // LC_PRNN reslist.Add (this.TestLC ("Index.LC_PRNN." + nick, db, num_centers, new LC_PRNN (), queries, qstream)); // LC_ParallelBuild reslist.Add (this.TestLC ("Index.LC_ParallelBuild." + nick, db, num_centers, new LC_ParallelBuild (), queries, qstream)); /// /// Permutation Based Indexes /// // Permutations reslist.Add (this.TestPI ("Index.Perms." + nick, db, num_perms, new Perms (), queries, qstream)); // Brief Index reslist.Add (this.TestPI ("Index.BinPerms." + nick, db, num_perms, new BinPerms (), queries, qstream)); // BinPermsTwoBits reslist.Add (this.TestPI ("Index.BinPermsTwoBits." + nick, db, num_perms, new BinPermsTwoBit (), queries, qstream)); /// /// KNR /// { KnrSeqSearch idx; var idxname = "Index.KnrSeqSearch." + nick; if (File.Exists (idxname)) { idx = (KnrSeqSearch)IndexGenericIO.Load (idxname); } else { Console.WriteLine ("** Starting construction of '{0}'", idxname); var knr = new KnrSeqSearch (); var sample = RandomSets.GetRandomSubSet (num_refs, db.Count); var refsdb = new SampleSpace ("", db, sample); var refsidx = new LC (); refsidx.Build (refsdb, refsdb.Count / 10); knr.Build (db, refsidx, 7); IndexGenericIO.Save (idxname, knr); idx = knr; } idx.MAXCAND = 1024; this.TestKNR(idx, idxname, queries, num_refs, reslist, (I) => I); Console.WriteLine ("==== Working on a permuted space"); idxname = idxname + ".proximity-sorted"; if (!File.Exists(idxname)) { idx = idx.GetSortedByPrefix(); idx.MAXCAND = 1024; IndexGenericIO.Save(idxname, idx); } else { idx = (KnrSeqSearch)IndexGenericIO.Load(idxname); } this.TestKNR(idx, idxname, queries, num_refs, reslist, (I) => new PermutedIndex(I)); } reslist.Add("--horizontal"); Commands.Check(reslist); }
public void TestKNR(KnrSeqSearch idx, string idxname, string queries, int num_refs, IList<string> reslist, Func<Index,Index> map) { // KnrSeqSearch var qstream = new QueryStream(queries); // PP-Index var resname = "Res." + idxname + "." + queries + ".PPIndex"; var searchops = new ShellSearchOptions (queries, idxname, resname); if (!File.Exists (resname)) { Commands.Search (map(idx), qstream.Iterate (), searchops); } reslist.Add (resname); // Spearman Footrule resname = "Res." + idxname + "." + queries + ".SF"; if (!File.Exists (resname)) { searchops = new ShellSearchOptions (queries, idxname, resname); Commands.Search (map(new KnrSeqSearchFootrule(idx)), qstream.Iterate (), searchops); } reslist.Add (resname); // Spearman Rho resname = "Res." + idxname + "." + queries + ".SR"; if (!File.Exists (resname)) { searchops = new ShellSearchOptions (queries, idxname, resname); Commands.Search (map(new KnrSeqSearchSpearmanRho(idx)), qstream.Iterate (), searchops); } reslist.Add (resname); // Jaccard resname = "Res." + idxname + "." + queries + ".Jaccard"; if (!File.Exists (resname)) { searchops = new ShellSearchOptions (queries, idxname, resname); Commands.Search (map(new KnrSeqSearchJaccard(idx)), qstream.Iterate (), searchops); } reslist.Add (resname); // RelMatches resname = "Res." + idxname + "." + queries + ".RelMatches"; if (!File.Exists (resname)) { searchops = new ShellSearchOptions (queries, idxname, resname); Commands.Search (map(new KnrSeqSearchRelMatches(idx)), qstream.Iterate (), searchops); } reslist.Add (resname); // CNAPP reslist.Add(_Test("Index.CNAPP." + idxname, idx.DB, () => { var cnapp = new CNAPP(); // cnapp.Build(idx, idx.K-2); cnapp.Build(idx, 1); return map(cnapp); }, queries)); }
public NAPP(KnrSeqSearch knr) : base() { this.Build (knr, 1); }
public KnrSeqSearchCosine(KnrSeqSearch knr) : base(knr) { }
public KnrSeqSearchRelMatches(KnrSeqSearch knr) : base(knr) { }
public KnrSeqSearch(KnrSeqSearch other) : base() { this.Build (other); }
public KnrSeqSearch GetSortedByPrefix(SequenceBuilder seq_builder = null, ListIBuilder list_builder = null) { int n = this.DB.Count; var seqs = new int[n][]; var perm = new int[n]; for (int i = 0; i < n; ++i) { seqs [i] = this.GetStoredKnr (i); perm [i] = i; } // please speed up this method using another sorting method // Sorting.Sort<int> (perm, (x,y) => StringSpace<int>.LexicographicCompare (seqs [x], seqs [y])); Sorting.Sort<int[],int> (seqs, perm, (x,y) => StringSpace<int>.LexicographicCompare (x, y)); var S = new ListGen<int> ((int i) => seqs [i / this.K] [i % this.K], n * this.K); if (list_builder == null) { list_builder = ListIBuilders.GetListIFS(); } if (seq_builder == null) { seq_builder = SequenceBuilders.GetSeqXLB_DiffSet64(24, 63); } var knr = new KnrSeqSearch(); knr.DB = new SampleSpace("", this.DB, list_builder(perm, n-1)); knr.K = this.K; knr.MAXCAND = this.MAXCAND; knr.R = this.R; knr.SEQ = seq_builder(S, this.R.DB.Count); return knr; }
public void BuildApprox(MetricDB db, Random rand, int num_refs, int K=7, int maxcand=1024, SequenceBuilder seq_builder = null) { var sample = new SampleSpace ("", db, num_refs); var inner = new KnrSeqSearch (); inner.Build (sample, rand, 1024, K, int.MaxValue); this.Build (db, new KnrSeqSearchFootrule(inner), K, maxcand, seq_builder); }
public KnrSeqSearchFootrule(KnrSeqSearch knr) : base(knr) { }
public static List<string> ExecuteKNRSEQ(IndexArgumentSetup setup, string nick, int numrefs, int k, double maxcand_ratio) { var idxname = String.Format ("{0}/Index.knrseq-{1}-{2}", nick, numrefs, k); MetricDB db = SpaceGenericIO.Load (setup.BINARY_DATABASE); Index idx; var suffix = ""; var resnamelist = new List<string> (); if (!File.Exists (idxname)) { Console.WriteLine ("*** creating index {0}", idxname); var s = DateTime.Now.Ticks; var c = db.NumberDistances; var IDX = new KnrSeqSearch (); var refsDB = new SampleSpace("", db, numrefs); var refsIDX = new EPTable (); refsIDX.Build(refsDB, 4, (_db, _rand) => new EPListOptimizedA(_db, 4, _rand)); if (k == 0) { k = KnrEstimateParameters.EstimateKnrEnsuringSharedNeighborhoods (db, refsIDX, (int)Math.Abs (setup.QARG)); suffix = String.Format ("estimated-K={0}.", k); } IDX.Build (db, refsIDX, k, int.MaxValue); SaveConstructionTime (idxname, DateTime.Now.Ticks - s, db.NumberDistances - c); IndexGenericIO.Save (idxname, IDX); idx = IDX; } else { Console.WriteLine ("*** loading index {0}", idxname); idx = IndexGenericIO.Load (idxname); if (k == 0) { var _idx = idx as KnrSeqSearch; suffix = String.Format ("estimated-K={0}.", _idx.K); } } string resname; // PPIndex resname = GetResultName (nick, idxname, setup, String.Format(suffix + "maxcand={0}.PPI", maxcand_ratio)); resnamelist.Add(resname); if (!File.Exists (resname)) { var knr = idx as KnrSeqSearch; knr.MAXCAND = (int)(idx.DB.Count * maxcand_ratio); PerformSearch (resname, knr, idxname, setup); } // KnrSeqSearchCosine resname = GetResultName (nick, idxname, setup, String.Format(suffix + "maxcand={0}.COS", maxcand_ratio)); resnamelist.Add(resname); if (!File.Exists (resname)) { var knr = new KnrSeqSearchCosine(idx as KnrSeqSearch); knr.MAXCAND = (int)(idx.DB.Count * maxcand_ratio); PerformSearch (resname, knr, idxname, setup); } // KnrSeqSearchFootrule resname = GetResultName (nick, idxname, setup, String.Format(suffix + "maxcand={0}.FOOTRULE", maxcand_ratio)); resnamelist.Add(resname); if (!File.Exists (resname)) { var knr = new KnrSeqSearchFootrule(idx as KnrSeqSearch); knr.MAXCAND = (int)(idx.DB.Count * maxcand_ratio); PerformSearch (resname, knr, idxname, setup); } // KnrSeqSearchJaccLCS resname = GetResultName (nick, idxname, setup, String.Format(suffix + "maxcand={0}.JACCLCS", maxcand_ratio)); resnamelist.Add(resname); if (!File.Exists (resname)) { var knr = new KnrSeqSearchJaccLCS(idx as KnrSeqSearch); knr.MAXCAND = (int)(idx.DB.Count * maxcand_ratio); PerformSearch (resname, knr, idxname, setup); } // KnrSeqSearchLCSv3 resname = GetResultName (nick, idxname, setup, String.Format(suffix + "maxcand={0}.LCSv3", maxcand_ratio)); resnamelist.Add(resname); if (!File.Exists (resname)) { var knr = new KnrSeqSearchLCSv3(idx as KnrSeqSearch); knr.MAXCAND = (int)(idx.DB.Count * maxcand_ratio); PerformSearch (resname, knr, idxname, setup); } // NAPP foreach (var ksearch in setup.KNR_KSEARCH) { var knr = new NAPP(idx as KnrSeqSearch); knr.MAXCAND = (int)(idx.DB.Count * maxcand_ratio); resname = GetResultName (nick, idxname, setup, String.Format(suffix + "maxcand={0}.NAPP.ksearch={1}", maxcand_ratio, ksearch)); resnamelist.Add(resname); if (!File.Exists (resname)) { PerformSearch (resname, knr, idxname, setup); } } return resnamelist; }
public KnrSeqSearchSpearmanRho(KnrSeqSearch knr) : base(knr) { }
public KnrSeqSearchJaccLCS(KnrSeqSearch knr) : base(knr) { }