예제 #1
0
        private async void AnalyzeGooglePlay(object sender, RoutedEventArgs e)
        {
            GooglePlayHTMLCrawler crawler = new GooglePlayHTMLCrawler();

            await InitializeGooglePlayFilesReadersAndWriters(crawler);

            StopwatchGooglePlay.Start();

            for (int i = GooglePlaySearchWordsCount; i < SearchWords.Count; i++)
            {
                GooglePlaySearchWordsTextBlock.Text = (++GooglePlaySearchWordsCount).ToString();
                Tuple <List <string>, List <string> > tuple = await crawler.FindSearchResultForOneSearchWord(SearchWords[i]);

                List <string> htmlStrings = tuple.Item1;
                List <string> urlsStrings = tuple.Item2;

                //write found urls to file:
                foreach (string url in urlsStrings)
                {
                    GooglePlayFoundURLsWriter.WriteLine(url);
                }

                //write counts to passedSearchWords file
                GooglePlayPassedSearchWordsWriter.WriteLine();
                GooglePlayPassedSearchWordsWriter.Write(GooglePlaySearchWordsCount + " " +
                                                        GooglePlayDownloadedCount + " " +
                                                        GooglePlayAnalyzedCount + " " +
                                                        GooglePlayErrorsCount
                                                        );

                AnalyzeHTMLStringsGooglePlay(htmlStrings);
            }

            StopwatchGooglePlay.Stop();
        }
예제 #2
0
        //methods:
        private async Task InitializeGooglePlayFilesReadersAndWriters(GooglePlayHTMLCrawler crawler)
        {
            StorageFile GooglePlayLogExcelFile;
            StorageFile GooglePlayLogHumansFile;
            StorageFile GooglePlayFoundURLsFile;
            StorageFile GooglePlayPassedSearchWordsFile;

            try
            {
                GooglePlayLogExcelFile = await GooglePlayWorkingFolder.GetFileAsync("LogExcel.txt");
            }
            catch (Exception)
            {
                GooglePlayLogExcelFile = await GooglePlayWorkingFolder.CreateFileAsync("LogExcel.txt");
            }

            try
            {
                GooglePlayLogHumansFile = await GooglePlayWorkingFolder.GetFileAsync("LogHumans.txt");
            }
            catch (Exception)
            {
                GooglePlayLogHumansFile = await GooglePlayWorkingFolder.CreateFileAsync("LogHumans.txt");
            }

            bool foundURLs = false;

            try
            {
                //If we found?
                GooglePlayFoundURLsFile = await GooglePlayWorkingFolder.GetFileAsync("GooglePlayFoundURLs.txt");

                foundURLs = true;
            }
            catch (Exception)
            {
                //If we don't
                GooglePlayFoundURLsFile = await GooglePlayWorkingFolder.CreateFileAsync("GooglePlayFoundURLs.txt");
            }

            bool foundPassedSearchWords = false;

            try
            {
                //If we found:
                GooglePlayPassedSearchWordsFile = await GooglePlayWorkingFolder.GetFileAsync("GooglePlayPassedSearchWords.txt");

                foundPassedSearchWords = true;
            }
            catch (Exception)
            {
                //if we don't
                GooglePlayPassedSearchWordsFile = await GooglePlayWorkingFolder.CreateFileAsync("GooglePlayPassedSearchWords.txt");
            }

            //basic write streams:
            var logExcelStream = await GooglePlayLogExcelFile.OpenStreamForWriteAsync();

            var logHumansStream = await GooglePlayLogHumansFile.OpenStreamForWriteAsync();

            var foundURLsStream = await GooglePlayFoundURLsFile.OpenStreamForWriteAsync();

            var passedSearchWordsStream = await GooglePlayPassedSearchWordsFile.OpenStreamForWriteAsync();

            //basic read streams:
            var foundURLsReadStream = await GooglePlayFoundURLsFile.OpenStreamForReadAsync();

            var passedSearchWordsReadStream = await GooglePlayPassedSearchWordsFile.OpenStreamForReadAsync();


            //better write streams:
            var GooglePlayLogExcelStream          = logExcelStream.AsOutputStream();
            var GooglePlayLogHumansStream         = logHumansStream.AsOutputStream();
            var GooglePlayFoundURLsStream         = foundURLsStream.AsOutputStream();
            var GooglePlayPassedSearchWordsStream = passedSearchWordsStream.AsOutputStream();
            //better read streams:
            var GooglePlayFoundURLsReadStream         = foundURLsReadStream.AsInputStream();
            var GooglePlayPassedSearchWordsReadStream = passedSearchWordsReadStream.AsInputStream();


            //writers:
            GooglePlayLogExcelWriter          = new StreamWriter(GooglePlayLogExcelStream.AsStreamForWrite());
            GooglePlayLogHumanWriter          = new StreamWriter(GooglePlayLogHumansStream.AsStreamForWrite());
            GooglePlayFoundURLsWriter         = new StreamWriter(GooglePlayFoundURLsStream.AsStreamForWrite());
            GooglePlayPassedSearchWordsWriter = new StreamWriter(GooglePlayPassedSearchWordsStream.AsStreamForWrite());

            //readers:
            GooglePlayFoundURLsReader         = new StreamReader(GooglePlayFoundURLsReadStream.AsStreamForRead());
            GooglePlayPassedSearchWordsReader = new StreamReader(GooglePlayPassedSearchWordsReadStream.AsStreamForRead());

            GooglePlayLogExcelWriter.AutoFlush          = true;
            GooglePlayLogHumanWriter.AutoFlush          = true;
            GooglePlayFoundURLsWriter.AutoFlush         = true;
            GooglePlayPassedSearchWordsWriter.AutoFlush = true;

            if (foundURLs)
            {
                await crawler.LoadURLmap(GooglePlayFoundURLsReader);
            }

            if (foundPassedSearchWords)
            {
                Tuple <int, int, int, int> counts = await crawler.SkipPassedSearchWords(GooglePlayPassedSearchWordsReader); //And also loads counts

                GooglePlaySearchWordsCount = counts.Item1;
                GooglePlayDownloadedCount  = counts.Item2;
                GooglePlayAnalyzedCount    = counts.Item3;
                GooglePlayErrorsCount      = counts.Item4;
            }
        }