Beispiel #1
0
        public static void DownloadGitCodeZip(string inputStrFirst, string InputStrSecond)
        {
            int[] indexData = GetIndex(inputStrFirst, InputStrSecond);
            int   startIndex = indexData[0], endIndex = indexData[1];

            if (startIndex < 0)
            {
                return;
            }
            DateTime      startTime = DateTime.Now;
            List <string> repositoryNameList;
            List <string> downloadURLList = DBUtils.GetDownloadURLList(out repositoryNameList);
            DateTime      now             = DateTime.Now;
            int           timeCost        = (int)(now - startTime).TotalSeconds;

            startTime = now;
            Console.WriteLine("Get Name List Time cost: " + timeCost + "s");
            if (downloadURLList.Count != repositoryNameList.Count)
            {
                Console.WriteLine("downloadURLList.Count: \"" + downloadURLList.Count + "\" is not equal to repositoryNameList.Count: \"" + repositoryNameList.Count + "\", return!!!");
                return;
            }
            Console.WriteLine("Data Line Count: " + downloadURLList.Count);
            endIndex = endIndex > 0 ? endIndex : downloadURLList.Count;
            //ThreadPool.SetMaxThreads(1, 1);
            for (int i = startIndex; i < endIndex; i++)
            {
                //ThreadPool.QueueUserWorkItem(new WaitCallback(DownloadFileTaskMethod), new GitCodeZipInfoClass(downloadURLList[i], repositoryNameList[i].Replace("/", "_").Replace("\\", "_") + ".zip"));
                string fileName = CrawlerClass.HttpDownloadFile(downloadURLList[i], Configuration.DownloadZipDir, false, false, repositoryNameList[i].Replace("/", "_").Replace("\\", "_") + ".zip");
                Console.WriteLine("Run " + i + " line fileName: " + fileName + " will be downloaded");
                //Thread.Sleep(100);
            }
            Console.WriteLine("Download Git Code Zip All End*****************************");
        }
Beispiel #2
0
 private static void DownloadFileTaskMethod(Object gitCodeZipInfoObj)
 {
     CrawlerClass.HttpDownloadFile(((GitCodeZipInfoClass)gitCodeZipInfoObj).downloadURL, Configuration.DownloadZipDir, false, true, ((GitCodeZipInfoClass)gitCodeZipInfoObj).repositoryName);
     Console.WriteLine(((GitCodeZipInfoClass)gitCodeZipInfoObj).repositoryName + " will be downloaded");
 }
Beispiel #3
0
        public static void CrawlAndStoreGitData(string inputStrFirst, string InputStrSecond)
        {
            List <string>         inputList  = FileUtils.ReadFileLine(Configuration.URLFile);
            List <DBGitDataModel> insertData = new List <DBGitDataModel>();
            Dictionary <string, DBGitDataModel> existDownLoadURLData;
            HashSet <string> existPKData = DBUtils.GetExistZipData(out existDownLoadURLData);

            for (int i = 0; i < inputList.Count; i++)
            {
                string[] eles           = inputList[i].Split(new char[] { '\t' });
                string   repositoryPath = eles[0];
                Console.WriteLine(repositoryPath);
                if (existPKData.Contains(repositoryPath))
                {
                    continue;
                }
                int            impressionCount = Convert.ToInt32(eles[1]);
                int            clickCount      = Convert.ToInt32(eles[2]);
                HttpStatusCode statusCode;
                Dictionary <string, string> header;
                string htmlContent;
                try
                {
                    htmlContent = CrawlerClass.Crawl(repositoryPath, out statusCode, out header);
                }
                catch (Exception e)
                {
                    Logger.WriteLog("error: crawl \"" + repositoryPath + "\" " + e.Message);
                    continue;
                }
                string downloadRelativeURL = HtmlResolve.GetGitDownloadURL(htmlContent);
                if (downloadRelativeURL == null || downloadRelativeURL == "")
                {
                    continue;
                }
                string downloadURL = Configuration.RootURL + downloadRelativeURL;
                string fileName;
                string dirName;
                if (existDownLoadURLData.ContainsKey(downloadURL))
                {
                    DBGitDataModel modelTemp = existDownLoadURLData[downloadURL];
                    fileName = modelTemp.fileName;
                    dirName  = modelTemp.dirName;
                }
                else
                {
                    fileName = CrawlerClass.HttpDownloadFile(downloadURL, Configuration.DownloadZipDir);
                    dirName  = FileUtils.ZipExtractToDirectory(Path.Combine(Configuration.DownloadZipDir, fileName), Configuration.ZipExtractDir);
                    if (dirName == "")
                    {
                        continue;
                    }
                }
                DBGitDataModel model = new DBGitDataModel(repositoryPath, downloadURL, impressionCount, clickCount, fileName, dirName);
                insertData.Add(model);
                if (insertData.Count == Configuration.DBInsertCountEveryTime)
                {
                    DBUtils.StoreDataToDBGitDataPart(insertData);
                    insertData = new List <DBGitDataModel>();
                    Console.WriteLine("Store Data To DBGitData Part End!!!");
                }
            }
            if (insertData.Count > 0)
            {
                DBUtils.StoreDataToDBGitDataPart(insertData);
            }
        }