public LSH get1LSH(int ID_Lop) { string query = " select * from SV where LopSH = " + ID_Lop; LSH lopsh = set1LSH(DBHelper.Instance.GetRecords(query).Rows[0]); return(lopsh); }
public LSH GetLSH(DataRow i) { LSH lsh = new LSH(); lsh.ID_Lop = Convert.ToInt32(i["ID_Lop"].ToString()); lsh.NameLop = i["NameLop"].ToString(); return(lsh); }
private LSH set1LSH(DataRow i) { LSH l = new LSH(); l.ID_Lop = Convert.ToInt32(i["ID_Lop"].ToString()); l.NameLop = i["NameLop"].ToString(); return(l); }
public LSH GetLSH(DataRow i) { LSH s = new LSH { ID_Lop = Convert.ToInt32(i["ID_Lop"]), NameLop = i["NameLop"].ToString() }; return(s); }
private static void CalculateLshForListingSet(List <Listing> listings, string job_id, Dictionary <long, long> duplicates) { var numSimilarityBuckets = (int)Math.Ceiling(listings.Count / 100M); // First make 2 dimensional array (docs by min-hashes) var matrix = new int[listings.Count, minHashCount]; for (int listing = 0; listing < listings.Count; listing++) { for (int hash = 0; hash < listings[listing].minhash_description.Count; hash++) { matrix[listing, hash] = (int)listings[listing].minhash_description[hash]; } } // Now set LSH var lsh = new LSH(matrix, numSimilarityBuckets); lsh.Calc(); // Set closes duplicate on each listing var duplicatesFound = new Dictionary <long, long>(); var singleItemProgress = ProgressManager.CalculateLoopIncrement(listings.Count(), 0.2M); for (int listing = 0; listing < listings.Count; listing++) { ProgressManager.IncrementJobPercentBy(job_id, singleItemProgress); var nearest = lsh.GetNearest(listing); if (!nearest.Any()) { continue; } var thisListing = listings[listing]; var nearestListing = listings[nearest[0]]; var priceRatio = nearestListing.buy_now_price / thisListing.buy_now_price; if (priceRatio < 0.8M || priceRatio > 1.2M) { continue; } if (duplicatesFound.ContainsKey(nearestListing.id)) { continue; } listings[listing].likely_duplicate_id_by_description = nearestListing.id; listings[listing].similarity_description = Jaccard.Calc(ArrayHelpers.GetRow <int>(matrix, listing).ToList(), nearest); duplicates[nearestListing.id] = thisListing.id; duplicates[thisListing.id] = nearestListing.id; } }
public List <LSH> GetAllLSH() { List <LSH> LSHList = new List <LSH>(); DataTable data; data = DBHelper.Instance.ExecuteQuery("SELECT * FROM LSH"); foreach (DataRow item in data.Rows) { LSH lsh = new LSH(item); LSHList.Add(lsh); } return(LSHList); }
public List <LSH> getAllLSH_DAL() { List <LSH> list = new List <LSH>(); string query = "select * from LSH"; foreach (DataRow dr in DBHelper.Instance.getRecords(query).Rows) { LSH lsh = new LSH(); lsh.ID_Lop = Convert.ToInt32(dr["ID_Lop"]); lsh.NameLop = dr["NameLop"].ToString(); list.Add(lsh); } return(list); }
static void Main(string[] args) { // Creates a list of documents var documents = new Faker <TextDocument>() .RuleFor(x => x.Text, faker => faker.Lorem.Sentence()) .Generate(10000); var lsh = new LSH(documents, 50000); var lshSearch = lsh.Search("cum"); var linearSearch = documents.Where(x => x.Text.Split(" ").Contains("cum")).ToList(); Console.ReadLine(); }
public List <LSH> GetAllLSH() { List <LSH> llsh = new List <LSH>(); DataTable dataa = CSDL.Instance.DTLSH; DataRowCollection dt = dataa.Rows; foreach (DataRow i in dt) { LSH s = new LSH { ID_Lop = Convert.ToInt32(i["ID_Lop"].ToString()), NameLop = i["NameLop"].ToString() }; llsh.Add(s); } return(llsh); }
public void GenerateSSGraph(List <string> docs) { /* 1) decomposite the document represetned by fileName into sentences * 2) generate the sentence similarity graph via minhashing and LSH * 3) describe the graph by neiborhood list NB and offset list OFF */ Stopwatch sw = new Stopwatch(); sw.Start(); List <string> docsOrg = new List <string>(docs); for (int i = 0; i < docs.Count; i++) { senteceNames[i] = docs[i]; } int r = docs.Count; int n = 40; int rows = 2; // b= n / r; int[][] minHashes = new int[r][]; for (int i = 0; i < r; i++) { //minHashes[i] = getShingleVec(parseWords(docs[i]).ToList()).ToArray(); minHashes[i] = GetShingleVec(docs[i]).ToArray(); } MinHash mh = new MinHash(r, n); int[,] minhashes = new int[r, n]; for (int i = 0; i < r; i++) { List <int> doc = minHashes[i].ToList(); List <uint> hvs = mh.GetMinHash(doc).ToList(); for (int j = 0; j < hvs.Count; j++) { minhashes[i, j] = (int)hvs[j]; } } OFF.Add(0); int conCount = 0; LSH lsh = new LSH(minhashes, rows); lsh.Calc(); int idx = 0; for (int k = 0; k < minhashes.GetUpperBound(0); k++) { List <int> nearest = lsh.GetNearest(k); if (!nodes.Contains(k)) { nodes.Add(k); } //Console.Write("\n" + k+" "); foreach (int i in nearest) { //Console.Write(near + ", "); if (!nodes.Contains(i)) { nodes.Add(i); } if (i == idx) { continue; } NB.Add(i); conCount++; ++idx; } OFF.Add(conCount); } sw.Stop(); Console.WriteLine(sw.ElapsedMilliseconds / (double)1000); }
public void GenerateSSGraphForComparativeSum(List <string> docs, List <int> offsets) { /* * same as the first just applied to comparative sum * here we have the set of different documents */ Stopwatch sw = new Stopwatch(); sw.Start(); List <string> docsOrg = new List <string>(docs); for (int i = 0; i < docs.Count; i++) { senteceNames[i] = docs[i]; } int r = docs.Count; int n = 100; int rows = 5; // n / r; int[][] minHashes = new int[r][]; for (int i = 0; i < r; i++) { //minHashes[i] = getShingleVec(parseWords(docs[i]).ToList()).ToArray(); minHashes[i] = GetShingleVec(docs[i]).ToArray(); } MinHash mh = new MinHash(r, n); int[,] minhashes = new int[r, n]; for (int i = 0; i < r; i++) { List <int> doc = minHashes[i].ToList(); List <uint> hvs = mh.GetMinHash(doc).ToList(); for (int j = 0; j < hvs.Count; j++) { minhashes[i, j] = (int)hvs[j]; } } OFF.Add(0); int conCount = 0; LSH lsh = new LSH(minhashes, rows); lsh.Calc(); int idx = 0; for (int k = 0; k < minhashes.GetUpperBound(0); k++) { List <int> nearest = lsh.GetNearest(k); if (!nodes.Contains(k)) { nodes.Add(k); } //Console.Write("\n" + k+" "); foreach (int i in nearest) { //Console.Write(near + ", "); if (!nodes.Contains(i)) { nodes.Add(i); } if (i == idx) { continue; } NB.Add(i); if (Helpers.AreFromSameGroup(k, i, offsets)) { SIGN.Add(1); } else { SIGN.Add(-0.5f); } conCount++; ++idx; } OFF.Add(conCount); } sw.Stop(); Console.WriteLine(sw.ElapsedMilliseconds / (double)1000); }
private static int[] Calculeaza_vecini_LSH(int k, int id_user) { // Extragem din DB un Dictionary de toate id utilizator, lista de preparate // comandate. Dictionary<int, List<int>> toatePrep = DatabaseFunctions. preparateComandateDupaUtilizator(); Dictionary<int, HashSet<int>> signatures = new Dictionary<int, HashSet<int>>(); HashSet<int> single_signatures = new HashSet<int>(); foreach (KeyValuePair<int, List<int>> entry in toatePrep) { signatures.Add(entry.Key, entry.Value.ToHashSet<int>()); single_signatures.UnionWith(entry.Value); } int numSets = signatures.Count; int numHashFunctions = single_signatures.Count; MinHash<int> minHash = new MinHash<int>(numHashFunctions); int[][] minHashValues = minHash.initializeHashBuckets(numSets, numHashFunctions); int index = 0, index_cautat = 0; List<HashSet<int>> list_signatures = new List<HashSet<int>>(); foreach(var entry in signatures) { minHash.computeMinHashForSet(entry.Value, index, minHashValues, single_signatures); if(entry.Key == id_user) { index_cautat = index; } index++; list_signatures.Add(entry.Value); } LSH<int> lsh = new LSH<int>(minHashValues, list_signatures); Dictionary<int, double> closeSimilarItems = lsh.closestSimilarItems(index_cautat, minHash); int poz = 0; int[] vecini = new int[k]; Dictionary<int, double> results = new Dictionary<int,double>(); int i = 0; foreach(var closeItem in closeSimilarItems) { poz = closeItem.Key; i = 0; foreach (var entry in signatures) { if (i == poz) { results.Add(entry.Key, closeItem.Value); break; } i++; } } if (results.Count() > 0) { for (int ind = 0; ind < k; ind++) { vecini[ind] = results.MaxBy(x => x.Value).Key; results.Remove(vecini[ind]); if (results.Count == 0) break; } } return vecini; }