示例#1
0
 public Cleansing()
 {
     CommonAndstopWords = LoadStopWords(commonAndStopWordFile);
     _BagOfWords        = BagOfWords.GetBagOfWords;
     _wordsUserList     = WordsUserList.GetWordList;
     _usersWordList     = UsersWordList.GetUserList;
 }
示例#2
0
 public Cleansing(string filePath)
 {
     CommonAndstopWords = LoadStopWords(filePath);
     _BagOfWords        = BagOfWords.GetBagOfWords;
     _wordsUserList     = WordsUserList.GetWordList;
     _usersWordList     = UsersWordList.GetUserList;
 }
        public static bool WriteWordTweetUserDistribution(WordsUserList wordList, string fileName)
        {
            try
            {
                //string path = Path.GetDirectoryName(Path.GetDirectoryName(Path.GetDirectoryName(Directory.GetCurrentDirectory()))) +
                //                "\\" + _Util.CreateFolderPath();

                string path = commonPath + "\\" + _Util.CreateFolderPath();

                if (!Directory.Exists(path))
                {
                    Directory.CreateDirectory(path);
                }

                var fileStream = new FileStream(path + "\\" + fileName, FileMode.Create, FileAccess.Write);

                //string dataSet = SortByTweetDateTime(richTextBox1.Text);

                var streamWriter = new StreamWriter(fileStream, Encoding.UTF8);

                streamWriter.WriteLine("Word,Total Tweets, Total Users");
                WordInfo wordInfo;

                lock (_Util.Locker)
                {
                    _Util.totalIterations            = wordList.Count;
                    _Util.functionNameForProgressMsg = "WriteWordTweetUserDistribution";
                }

                long count = 0;

                foreach (string word in wordList.GetKeys())
                {
                    count++;

                    wordInfo = wordList.GetWordInfo(word);
                    streamWriter.WriteLine(word + "," + wordInfo.TotalTweets.ToString() + "," + wordInfo.TotalUsers.ToString());

                    lock (_Util.Locker)
                    {
                        _Util.completedIterations = count;
                    }
                }

                streamWriter.Close();

                return(true);
            }
            catch (Exception ex)
            {
                return(false);
            }
        }
示例#4
0
        private void Process()
        {
            /////////////////////////////////////////////////////////////////////////////////
            ///// CHANGE ALL THE CONDITIONS FROM CHECK BOXES TO CONFIGURATION OBJECT ////////
            /////////////////////////////////////////////////////////////////////////////////

            try
            {
                Extractor dataExtractor = new Extractor(_config);

                lock (_Util.Locker)
                {
                    p.Activity = 7;
                    p.Message  = 0;
                }

                dataExtractor.Extract();

                UsersWordList userList = UsersWordList.GetUserList;
                WordsUserList wordList = WordsUserList.GetWordList;


                bool success = false;
                // temporary condition, just to check if DHG writer is working fine, because I need to execute code again n again
                if (!chkDHGWriter.Checked)
                {
                    lock (_Util.Locker)
                    {
                        p.Activity = 5;
                        p.Message  = 2;
                    }

                    int testing = 0;
                    // UPDATE ACTIVITIES FOR WRITING FILES AND MESSAGES


                    string fileName = "WordDistribution.csv";
                    success = _Util.WriteWordDistributionToCSV(BagOfWords.GetBagOfWords._TweetWords, fileName);


                    lock (_Util.Locker)
                    {
                        p.Activity = 5;
                        p.Message  = 6;
                    }

                    fileName = "UserTweetDistribution.csv";
                    success  = _Util.WriteUserTweetDistribution(userList, fileName);

                    lock (_Util.Locker)
                    {
                        p.Activity = 5;
                        p.Message  = 7;
                    }

                    fileName = "WordTweetUserDistribution.csv";
                    success  = _Util.WriteWordTweetUserDistribution(wordList, fileName);
                }


                List <int> test = new List <int>();

                lock (_Util.Locker)
                {
                    p.Activity = 2;
                    p.Message  = 2;
                }

                // following code will execute only if experiment is run on dirty dataset
                // other wise it will save alot of time
                if (!chkCleanedData.Checked)
                {
                    string str       = txtDataSetFilePath.Text.Trim();
                    int    lastIndex = str.LastIndexOf(@"\");

                    if (chkDuplicate.Checked)
                    {
                        dataExtractor.RemoveDuplicateTweetsUsingIndexFile(txtDuplicateTweetIndexFile.Text);
                    }

                    else
                    {
                        dataExtractor.RemoveDuplicateTweet();

                        lock (_Util.Locker)
                        {
                            p.Activity = 5;
                            p.Message  = 4;
                        }

                        success = dataExtractor.CreateCSVForRemovedTweets("RemovedTweets.csv", str.Substring(lastIndex + 1, str.Length - lastIndex - 1));

                        if (!success)
                        {
                            int dummyvar = 0;
                        }
                    }

                    lock (_Util.Locker)
                    {
                        p.Activity = 4;
                        p.Message  = 1;
                    }

                    if (chkReverse.Checked)
                    {
                        dataExtractor.CleanedData.Reverse();
                    }


                    lock (_Util.Locker)
                    {
                        p.Activity = 4;
                        p.Message  = 2;
                    }

                    success = _Util.WriteToFile(dataExtractor.CleanedData, false, str.Substring(lastIndex + 1, str.Length - lastIndex - 1));

                    if (!success)
                    {
                        int dummyVar = 0;
                    }
                } // end of clean data condidtion i.e. !chkCleanedData


                lock (_Util.Locker)
                {
                    p.Activity = 1;
                    p.Message  = 3;
                }

                if (!dataExtractor.CreateTimeSeriesNetwork())
                {
                    int dummyVar = 0;
                }

                lock (_Util.Locker)
                {
                    p.Activity = 5;
                    p.Message  = 10;
                }

                if (!dataExtractor.CreateNetworkNodeFrequencyDistribution("Node_distribution.csv"))
                {
                    int dummyVar = 0;
                }

                lock (_Util.Locker)
                {
                    p.Activity = 1;
                    p.Message  = 0;
                }

                dataExtractor.CalculateWindowsDifferences();

                lock (_Util.Locker)
                {
                    p.Activity = 1;
                    p.Message  = 4;
                }


                #region " This is temp region "



                #endregion


                dataExtractor.CalculateDegreeCentralityInDifferenceGraph();

                // temporary condition, just to check if DHG writer is working fine, because I need to execute code again n again
                if (!chkDHGWriter.Checked)
                {
                    lock (_Util.Locker)
                    {
                        p.Activity = 5;
                        p.Message  = 3;
                    }

                    success = dataExtractor.CreateCentralityScoreCSV("Degree_Centrality_Scores.csv");

                    if (!success)
                    {
                        int dummyVar = 0;
                    }

                    lock (_Util.Locker)
                    {
                        p.Activity = 5;
                        p.Message  = 8;
                    }

                    success = dataExtractor.CreateSlidingWindowsFeaturesCSV("Sliding_Windows_Features.csv");

                    if (!success)
                    {
                        int dummyVar = 0;
                    }

                    lock (_Util.Locker)
                    {
                        p.Activity = 5;
                        p.Message  = 5;
                    }

                    success = dataExtractor.CreateUserInfoInEachSlidingWindowCSV("UserInfoInEachSlidingWindow.csv");

                    if (!success)
                    {
                        int dummyVar = 0;
                    }
                }



                if (_config.IsDHGWriterOn)
                {
                    lock (_Util.Locker)
                    {
                        p.Activity = 5;
                        p.Message  = 9;
                    }

                    success = dataExtractor.CreateGraphVisualizationCSV(_heartBeatGraphs);

                    if (!success)
                    {
                        int dummyVar = 0;
                    }
                }

                lock (_Util.Locker)
                {
                    p.Activity = 6;
                    p.Message  = 0;
                }

                //ProgressTimer.Enabled = false;

                if (!success)
                {
                    MessageBox.Show("Unable to write file for degree centralities");
                }
                else
                {
                    MessageBox.Show("Successfully wrote file for degree centralities");
                }

                int dummy = 0;
            }
            catch (Exception ex)
            {
                MessageBox.Show("Exception: " + ex.ToString() + "\n" + "Inner Exception: " + ex.InnerException.ToString() + "\n" + "Trace: " + ex.StackTrace);
            }

            //GC.Collect();
        }