public override void MatchReviewAndProductTokens(DistinctReviewList <Review> reviewList, Dictionary <string, bool> stopWords, ref ReviewProductLinks reviewProductLinks) { string productStrings = Model.ToLower() + " " + CpuSeries.ToLower(); List <string> productTokens = RemoveRestrictedTokens(SplitStringToTokens(productStrings), stopWords); List <Review> matchingReviews = new List <Review>(); foreach (int searchNumber in prunNumbers) { if (reviewList.prunGroups.ContainsKey(searchNumber)) { matchingReviews.AddRange(reviewList.prunGroups[searchNumber]); } } foreach (var review in matchingReviews.Distinct()) { if (CompareReviewTitleWithProductStringsToken(review.Title.ToLower(), productTokens, stopWords)) { //add review id to product reviewMatches.Add(review); review.linkedProducts.Add(this); if (!reviewProductLinks.productList.Contains(this)) { reviewProductLinks.productList.Add(this); } if (!reviewProductLinks.reviewList.Contains(review)) { reviewProductLinks.reviewList.Add(review); } } } }
public DistinctReviewList <UserReview> GetUserReviewData(string category) { MySqlCommand command = new MySqlCommand("SELECT * FROM Review WHERE isCriticReview<>1 AND productType = \"" + category + "\"", connection); DistinctReviewList <UserReview> result = new DistinctReviewList <UserReview>(); MySqlDataReader reader = command.ExecuteReader(); while (reader.Read()) { object[] tempResult = new object[reader.FieldCount]; reader.GetValues(tempResult); UserReview row = new UserReview((int)tempResult[0], reader.GetBoolean(10), (float)tempResult[4], (float)tempResult[14], reader.GetDateTime(1), (string)tempResult[13], (string)tempResult[12], (string)tempResult[11], reader.GetBoolean(9), (string)tempResult[3], //content (string)tempResult[6] //author ); if (!reader.IsDBNull(7) && !reader.IsDBNull(8)) { row.positiveReception = (int)tempResult[7]; row.negativeReception = (int)tempResult[8]; } result.Add(row); } reader.Close(); return(result); }
//The work each thread should do (link the products delegated to this thread, with the reviews.) public void ThreadfunctionProduct <T>(object data) where T : Product { DistinctProductList <T> productList = ((ThreadingData <T>)data).productList; DistinctReviewList <Review> reviewList = ((ThreadingData <T>)data).reviewList; ReviewProductLinks processedReviewProductLinks = ThreadingData.threadProcessedData[((ThreadingData <T>)data).id]; foreach (var product in productList) { product.MatchReviewAndProductTokens(reviewList, productList.stopWord, ref processedReviewProductLinks); //execute linking processing } Interlocked.Decrement(ref ThreadingData.semaphore); }
public override void MatchReviewAndProductSubstring(DistinctReviewList <Review> reviewList, Dictionary <string, bool> stopWords, ref ReviewProductLinks reviewProductLinks) {//linking method which uses the title.contains(productToken) way for linking List <string> productTokens = SplitStringToTokens(Model.ToLower() + " " + GraphicsProcessor.ToLower() + " " + Manufacturer.ToLower()); productTokens = RemoveRestrictedTokens(productTokens, stopWords); List <Review> matchingReviews = new List <Review>(); foreach (int searchNumber in prunNumbers) { if (reviewList.prunGroups.ContainsKey(searchNumber)) { matchingReviews.AddRange(reviewList.prunGroups[searchNumber]); } } foreach (var review in matchingReviews) { List <string> reviewTitleWithoutStopWordTokens = RemoveRestrictedTokens(SplitStringToTokens(review.Title.ToLower()), stopWords); string reviewTitleWithoutStopWords = ""; foreach (var token in reviewTitleWithoutStopWordTokens) { reviewTitleWithoutStopWords += token; } if (MatchReviewTitleWithProductStringsSubstring(reviewTitleWithoutStopWords, productTokens)) { reviewMatches.Add(review); //add review to list of reviews that link to this product review.linkedProducts.Add(this); //add this GPU product to review list of products it links to if (!reviewProductLinks.productList.Contains(this)) { reviewProductLinks.productList.Add(this); } if (!reviewProductLinks.reviewList.Contains(review)) { reviewProductLinks.reviewList.Add(review); } } } }
public virtual void MatchReviewAndProductTokens(DistinctReviewList <Review> reviewList, Dictionary <string, bool> stopWords, ref ReviewProductLinks reviewProductLinks) { }
private void GetDataTest_bt_Click(object sender, RoutedEventArgs e) { DistinctProductList <CPU> cpuList; //list of all cpu products, after merging DistinctProductList <GPU> gpuList; DistinctReviewList <Review> reviewListCpu = new DistinctReviewList <Review>(); //list of all cpu reviews DistinctReviewList <Review> reviewListGpu = new DistinctReviewList <Review>(); ReviewProductLinks reviewProductLinks = new ReviewProductLinks(); //contains the products and reviews which have been linked ReviewProductLinks actualReviewProductLinks = new ReviewProductLinks(); //contains linked products and reviews, reviews linking to multiple products removed int productsPerThread = 10; //determines the amount of products each thread task should process DBConnect dbConnection = new DBConnect(); //create a database connection handler #region Add data from crawlerDB dbConnection.DbInitialize(true); dbConnection.connection.Open(); gpuList = dbConnection.GetGpuData(); cpuList = dbConnection.GetCpuData(); reviewListCpu.AddDistinctList(dbConnection.GetCriticReviewData("CPU").ToReview()); reviewListCpu.AddDistinctList(dbConnection.GetUserReviewData("CPU").ToReview()); reviewListGpu.AddDistinctList(dbConnection.GetCriticReviewData("GPU").ToReview()); reviewListGpu.AddDistinctList(dbConnection.GetUserReviewData("GPU").ToReview()); dbConnection.connection.Close(); #endregion StartThreads(productsPerThread, cpuList, reviewListCpu); //execute threaded processing for CPUs StartThreads(productsPerThread, gpuList, reviewListGpu); while (ThreadingData.semaphore != 0) //wait until all threads are done { Thread.Sleep(300); //no need for main thread to work while waiting } foreach (var threadReviewProductLink in ThreadingData.threadProcessedData) //collect each thread's processed data { reviewProductLinks.productList.AddRange(threadReviewProductLink.productList); reviewProductLinks.reviewList.AddRange(threadReviewProductLink.reviewList); } actualReviewProductLinks = RemoveInvalidLinks(ref reviewProductLinks); //remove invalid links (reviews which link to multiple products) #region Debug region /* ||===================================================|| * ||!! Warning! you are now entering the debug area. !!|| * ||---------------------------------------------------|| * ||Here are noting true and everything might be wrong || * || Proceed at your own risk || * ||===================================================||*/ //Dictionary<string, bool> helloo = new Dictionary<string, bool>(); /*gpuList.testPruning = gpuList.testPruning.OrderByDescending(a => a[1]).ToList(); * cpuList.testPruning = cpuList.testPruning.OrderByDescending(a => a[1]).ToList(); * reviewListCpu.testPruning = reviewListCpu.testPruning.OrderByDescending(a => a[1]).ToList(); * reviewListGpu.testPruning = reviewListGpu.testPruning.OrderByDescending(a => a[1]).ToList();*/ /*foreach (var product in actualReviewProductLinks.productList) * { * foreach (var review in product.reviewMatches) * { * if (!helloo.ContainsKey(review.Title)) * { * Debug.WriteLine(""); * Debug.WriteLine(product.Id + " " + product); * Debug.WriteLine(review.Id + " " + review.Title); * helloo.Add(review.Title, true); * } * } * }*/ //Debugging.Debugging.DebugReviewDuplicates(chassisList, cpuList, gpuList, hardDriveList, motherboardList, psuList, ramList); //Debugging.Debugging.GetUnlinkedReviews(reviewListGpu, cpuList, gpuList); //Debugging.Debugging.NumberOfReviewForEachProduct(cpuList); #endregion //Score.Score.AssessProductListScores(cpuList); //var scoredProducts = Score.Score.AssessProductListScores(actualReviewProductLinks.productList); //SCORE foreach (var product in actualReviewProductLinks.productList) { product.score.CalculateProductScore(product); } //Open analyserDB dbConnection.DbInitialize(false); dbConnection.connection.Open(); foreach (Product product in actualReviewProductLinks.productList) { if (product.score.superScore != -1) { product.WriteToDB(dbConnection.connection); } foreach (Review review in product.reviewMatches) { review.WriteToDB(dbConnection.connection); } } /* * //Write to database * cpuList[1].WriteToDB(dbConnection.connection); * cpuList[1].reviewMatches[0].WriteToDB(dbConnection.connection); * gpuList[1].WriteToDB(dbConnection.connection); * gpuList[1].reviewMatches[0].WriteToDB(dbConnection.connection); */ dbConnection.connection.Close(); }
//method to start all threads, with their work. public void StartThreads <T>(int productsPerThread, DistinctProductList <T> productList, DistinctReviewList <Review> reviewList) where T : Product { for (int i = 0; i < productList.Count; i += productsPerThread) { if (productList.Count - i > productsPerThread) //amount of products left to process is above that which the thread task should process { ThreadingData.threadProcessedData.Add(new ReviewProductLinks()); //this specific thread's container for processed data ThreadPool.QueueUserWorkItem(ThreadfunctionProduct <T>, new ThreadingData <T>(ThreadingData.semaphore, productList.GetRange(i, productsPerThread), reviewList)); Interlocked.Increment(ref ThreadingData.semaphore); //interlocked ensure atomic increment of semaphore } else //amount of products left to process is the last batch to process. { ThreadingData.threadProcessedData.Add(new ReviewProductLinks()); ThreadPool.QueueUserWorkItem(ThreadfunctionProduct <T>, new ThreadingData <T>(ThreadingData.semaphore, productList.GetRange(i, productList.Count - i), reviewList)); Interlocked.Increment(ref ThreadingData.semaphore); break; } } }