public Cleansing() { CommonAndstopWords = LoadStopWords(commonAndStopWordFile); _BagOfWords = BagOfWords.GetBagOfWords; _wordsUserList = WordsUserList.GetWordList; _usersWordList = UsersWordList.GetUserList; }
public Cleansing(string filePath) { CommonAndstopWords = LoadStopWords(filePath); _BagOfWords = BagOfWords.GetBagOfWords; _wordsUserList = WordsUserList.GetWordList; _usersWordList = UsersWordList.GetUserList; }
public static bool WriteWordTweetUserDistribution(WordsUserList wordList, string fileName) { try { //string path = Path.GetDirectoryName(Path.GetDirectoryName(Path.GetDirectoryName(Directory.GetCurrentDirectory()))) + // "\\" + _Util.CreateFolderPath(); string path = commonPath + "\\" + _Util.CreateFolderPath(); if (!Directory.Exists(path)) { Directory.CreateDirectory(path); } var fileStream = new FileStream(path + "\\" + fileName, FileMode.Create, FileAccess.Write); //string dataSet = SortByTweetDateTime(richTextBox1.Text); var streamWriter = new StreamWriter(fileStream, Encoding.UTF8); streamWriter.WriteLine("Word,Total Tweets, Total Users"); WordInfo wordInfo; lock (_Util.Locker) { _Util.totalIterations = wordList.Count; _Util.functionNameForProgressMsg = "WriteWordTweetUserDistribution"; } long count = 0; foreach (string word in wordList.GetKeys()) { count++; wordInfo = wordList.GetWordInfo(word); streamWriter.WriteLine(word + "," + wordInfo.TotalTweets.ToString() + "," + wordInfo.TotalUsers.ToString()); lock (_Util.Locker) { _Util.completedIterations = count; } } streamWriter.Close(); return(true); } catch (Exception ex) { return(false); } }
private void Process() { ///////////////////////////////////////////////////////////////////////////////// ///// CHANGE ALL THE CONDITIONS FROM CHECK BOXES TO CONFIGURATION OBJECT //////// ///////////////////////////////////////////////////////////////////////////////// try { Extractor dataExtractor = new Extractor(_config); lock (_Util.Locker) { p.Activity = 7; p.Message = 0; } dataExtractor.Extract(); UsersWordList userList = UsersWordList.GetUserList; WordsUserList wordList = WordsUserList.GetWordList; bool success = false; // temporary condition, just to check if DHG writer is working fine, because I need to execute code again n again if (!chkDHGWriter.Checked) { lock (_Util.Locker) { p.Activity = 5; p.Message = 2; } int testing = 0; // UPDATE ACTIVITIES FOR WRITING FILES AND MESSAGES string fileName = "WordDistribution.csv"; success = _Util.WriteWordDistributionToCSV(BagOfWords.GetBagOfWords._TweetWords, fileName); lock (_Util.Locker) { p.Activity = 5; p.Message = 6; } fileName = "UserTweetDistribution.csv"; success = _Util.WriteUserTweetDistribution(userList, fileName); lock (_Util.Locker) { p.Activity = 5; p.Message = 7; } fileName = "WordTweetUserDistribution.csv"; success = _Util.WriteWordTweetUserDistribution(wordList, fileName); } List <int> test = new List <int>(); lock (_Util.Locker) { p.Activity = 2; p.Message = 2; } // following code will execute only if experiment is run on dirty dataset // other wise it will save alot of time if (!chkCleanedData.Checked) { string str = txtDataSetFilePath.Text.Trim(); int lastIndex = str.LastIndexOf(@"\"); if (chkDuplicate.Checked) { dataExtractor.RemoveDuplicateTweetsUsingIndexFile(txtDuplicateTweetIndexFile.Text); } else { dataExtractor.RemoveDuplicateTweet(); lock (_Util.Locker) { p.Activity = 5; p.Message = 4; } success = dataExtractor.CreateCSVForRemovedTweets("RemovedTweets.csv", str.Substring(lastIndex + 1, str.Length - lastIndex - 1)); if (!success) { int dummyvar = 0; } } lock (_Util.Locker) { p.Activity = 4; p.Message = 1; } if (chkReverse.Checked) { dataExtractor.CleanedData.Reverse(); } lock (_Util.Locker) { p.Activity = 4; p.Message = 2; } success = _Util.WriteToFile(dataExtractor.CleanedData, false, str.Substring(lastIndex + 1, str.Length - lastIndex - 1)); if (!success) { int dummyVar = 0; } } // end of clean data condidtion i.e. !chkCleanedData lock (_Util.Locker) { p.Activity = 1; p.Message = 3; } if (!dataExtractor.CreateTimeSeriesNetwork()) { int dummyVar = 0; } lock (_Util.Locker) { p.Activity = 5; p.Message = 10; } if (!dataExtractor.CreateNetworkNodeFrequencyDistribution("Node_distribution.csv")) { int dummyVar = 0; } lock (_Util.Locker) { p.Activity = 1; p.Message = 0; } dataExtractor.CalculateWindowsDifferences(); lock (_Util.Locker) { p.Activity = 1; p.Message = 4; } #region " This is temp region " #endregion dataExtractor.CalculateDegreeCentralityInDifferenceGraph(); // temporary condition, just to check if DHG writer is working fine, because I need to execute code again n again if (!chkDHGWriter.Checked) { lock (_Util.Locker) { p.Activity = 5; p.Message = 3; } success = dataExtractor.CreateCentralityScoreCSV("Degree_Centrality_Scores.csv"); if (!success) { int dummyVar = 0; } lock (_Util.Locker) { p.Activity = 5; p.Message = 8; } success = dataExtractor.CreateSlidingWindowsFeaturesCSV("Sliding_Windows_Features.csv"); if (!success) { int dummyVar = 0; } lock (_Util.Locker) { p.Activity = 5; p.Message = 5; } success = dataExtractor.CreateUserInfoInEachSlidingWindowCSV("UserInfoInEachSlidingWindow.csv"); if (!success) { int dummyVar = 0; } } if (_config.IsDHGWriterOn) { lock (_Util.Locker) { p.Activity = 5; p.Message = 9; } success = dataExtractor.CreateGraphVisualizationCSV(_heartBeatGraphs); if (!success) { int dummyVar = 0; } } lock (_Util.Locker) { p.Activity = 6; p.Message = 0; } //ProgressTimer.Enabled = false; if (!success) { MessageBox.Show("Unable to write file for degree centralities"); } else { MessageBox.Show("Successfully wrote file for degree centralities"); } int dummy = 0; } catch (Exception ex) { MessageBox.Show("Exception: " + ex.ToString() + "\n" + "Inner Exception: " + ex.InnerException.ToString() + "\n" + "Trace: " + ex.StackTrace); } //GC.Collect(); }