//removes links from products and reviews, when a review links to multiple products. private ReviewProductLinks RemoveInvalidLinks(ref ReviewProductLinks reviewProductLinks) { ReviewProductLinks actualReviewProductLinks = new ReviewProductLinks(); foreach (var review in reviewProductLinks.reviewList) { if (!(review.linkedProducts.Count > 1)) { actualReviewProductLinks.reviewList.Add(review); } else { foreach (var id in review.linkedProducts) { foreach (var product in reviewProductLinks.productList) { if (product.Id == id.Id) { product.reviewMatches.Remove(review); } } } } } foreach (var product in reviewProductLinks.productList) { if (product.reviewMatches.Count > 0) { actualReviewProductLinks.productList.Add(product); } } return(actualReviewProductLinks); }
//The work each thread should do (link the products delegated to this thread, with the reviews.) public void ThreadfunctionProduct <T>(object data) where T : Product { DistinctProductList <T> productList = ((ThreadingData <T>)data).productList; DistinctReviewList <Review> reviewList = ((ThreadingData <T>)data).reviewList; ReviewProductLinks processedReviewProductLinks = ThreadingData.threadProcessedData[((ThreadingData <T>)data).id]; foreach (var product in productList) { product.MatchReviewAndProductTokens(reviewList, productList.stopWord, ref processedReviewProductLinks); //execute linking processing } Interlocked.Decrement(ref ThreadingData.semaphore); }
public override void MatchReviewAndProductSubstring(DistinctReviewList <Review> reviewList, Dictionary <string, bool> stopWords, ref ReviewProductLinks reviewProductLinks) { List <string> productTokens = SplitStringToTokens(Model.ToLower() + " " + CpuSeries.ToLower()); productTokens = RemoveRestrictedTokens(productTokens, stopWords); List <Review> matchingReviews = new List <Review>(); foreach (int searchNumber in prunNumbers) { if (reviewList.prunGroups.ContainsKey(searchNumber)) { matchingReviews.AddRange(reviewList.prunGroups[searchNumber]); } } foreach (var review in matchingReviews) { List <string> reviewTitleWithoutStopWordTokens = RemoveRestrictedTokens(SplitStringToTokens(review.Title.ToLower()), stopWords); string reviewTitleWithoutStopWords = ""; foreach (var token in reviewTitleWithoutStopWordTokens) { reviewTitleWithoutStopWords += token; } if (MatchReviewTitleWithProductStringsSubstring(reviewTitleWithoutStopWords, productTokens)) { reviewMatches.Add(review); //add review to list of reviews that link to this product review.linkedProducts.Add(this); //add this GPU product to review list of products it links to if (!reviewProductLinks.productList.Contains(this)) { reviewProductLinks.productList.Add(this); } if (!reviewProductLinks.reviewList.Contains(review)) { reviewProductLinks.reviewList.Add(review); } } } }
public override void MatchReviewAndProductTokens(DistinctReviewList <Review> reviewList, Dictionary <string, bool> stopWords, ref ReviewProductLinks reviewProductLinks) { string productStrings = Model.ToLower() + " " + CpuSeries.ToLower(); List <string> productTokens = RemoveRestrictedTokens(SplitStringToTokens(productStrings), stopWords); List <Review> matchingReviews = new List <Review>(); foreach (int searchNumber in prunNumbers) { if (reviewList.prunGroups.ContainsKey(searchNumber)) { matchingReviews.AddRange(reviewList.prunGroups[searchNumber]); } } foreach (var review in matchingReviews.Distinct()) { if (CompareReviewTitleWithProductStringsToken(review.Title.ToLower(), productTokens, stopWords)) { //add review id to product reviewMatches.Add(review); review.linkedProducts.Add(this); if (!reviewProductLinks.productList.Contains(this)) { reviewProductLinks.productList.Add(this); } if (!reviewProductLinks.reviewList.Contains(review)) { reviewProductLinks.reviewList.Add(review); } } } }
private void GetDataTest_bt_Click(object sender, RoutedEventArgs e) { DistinctProductList <CPU> cpuList; //list of all cpu products, after merging DistinctProductList <GPU> gpuList; DistinctReviewList <Review> reviewListCpu = new DistinctReviewList <Review>(); //list of all cpu reviews DistinctReviewList <Review> reviewListGpu = new DistinctReviewList <Review>(); ReviewProductLinks reviewProductLinks = new ReviewProductLinks(); //contains the products and reviews which have been linked ReviewProductLinks actualReviewProductLinks = new ReviewProductLinks(); //contains linked products and reviews, reviews linking to multiple products removed int productsPerThread = 10; //determines the amount of products each thread task should process DBConnect dbConnection = new DBConnect(); //create a database connection handler #region Add data from crawlerDB dbConnection.DbInitialize(true); dbConnection.connection.Open(); gpuList = dbConnection.GetGpuData(); cpuList = dbConnection.GetCpuData(); reviewListCpu.AddDistinctList(dbConnection.GetCriticReviewData("CPU").ToReview()); reviewListCpu.AddDistinctList(dbConnection.GetUserReviewData("CPU").ToReview()); reviewListGpu.AddDistinctList(dbConnection.GetCriticReviewData("GPU").ToReview()); reviewListGpu.AddDistinctList(dbConnection.GetUserReviewData("GPU").ToReview()); dbConnection.connection.Close(); #endregion StartThreads(productsPerThread, cpuList, reviewListCpu); //execute threaded processing for CPUs StartThreads(productsPerThread, gpuList, reviewListGpu); while (ThreadingData.semaphore != 0) //wait until all threads are done { Thread.Sleep(300); //no need for main thread to work while waiting } foreach (var threadReviewProductLink in ThreadingData.threadProcessedData) //collect each thread's processed data { reviewProductLinks.productList.AddRange(threadReviewProductLink.productList); reviewProductLinks.reviewList.AddRange(threadReviewProductLink.reviewList); } actualReviewProductLinks = RemoveInvalidLinks(ref reviewProductLinks); //remove invalid links (reviews which link to multiple products) #region Debug region /* ||===================================================|| * ||!! Warning! you are now entering the debug area. !!|| * ||---------------------------------------------------|| * ||Here are noting true and everything might be wrong || * || Proceed at your own risk || * ||===================================================||*/ //Dictionary<string, bool> helloo = new Dictionary<string, bool>(); /*gpuList.testPruning = gpuList.testPruning.OrderByDescending(a => a[1]).ToList(); * cpuList.testPruning = cpuList.testPruning.OrderByDescending(a => a[1]).ToList(); * reviewListCpu.testPruning = reviewListCpu.testPruning.OrderByDescending(a => a[1]).ToList(); * reviewListGpu.testPruning = reviewListGpu.testPruning.OrderByDescending(a => a[1]).ToList();*/ /*foreach (var product in actualReviewProductLinks.productList) * { * foreach (var review in product.reviewMatches) * { * if (!helloo.ContainsKey(review.Title)) * { * Debug.WriteLine(""); * Debug.WriteLine(product.Id + " " + product); * Debug.WriteLine(review.Id + " " + review.Title); * helloo.Add(review.Title, true); * } * } * }*/ //Debugging.Debugging.DebugReviewDuplicates(chassisList, cpuList, gpuList, hardDriveList, motherboardList, psuList, ramList); //Debugging.Debugging.GetUnlinkedReviews(reviewListGpu, cpuList, gpuList); //Debugging.Debugging.NumberOfReviewForEachProduct(cpuList); #endregion //Score.Score.AssessProductListScores(cpuList); //var scoredProducts = Score.Score.AssessProductListScores(actualReviewProductLinks.productList); //SCORE foreach (var product in actualReviewProductLinks.productList) { product.score.CalculateProductScore(product); } //Open analyserDB dbConnection.DbInitialize(false); dbConnection.connection.Open(); foreach (Product product in actualReviewProductLinks.productList) { if (product.score.superScore != -1) { product.WriteToDB(dbConnection.connection); } foreach (Review review in product.reviewMatches) { review.WriteToDB(dbConnection.connection); } } /* * //Write to database * cpuList[1].WriteToDB(dbConnection.connection); * cpuList[1].reviewMatches[0].WriteToDB(dbConnection.connection); * gpuList[1].WriteToDB(dbConnection.connection); * gpuList[1].reviewMatches[0].WriteToDB(dbConnection.connection); */ dbConnection.connection.Close(); }