static void Main(string[] args) { CFlipkartScrapper objscrapper = new CFlipkartScrapper(); ItemDescription objItem = new MicrowaveItemDescription(); objscrapper.SetHTMLDocument("http://www.flipkart.com/samsung-40h5100-40-inches-57900-tv/p/itmdu7ycguexxhzm?pid=TVSDU5QTVF9BX57Z"); objscrapper.ExtractData(ref objItem); string path = "DealSheelImages//" + objItem.getLabel(); string fileName = objItem.id + ImageController.GetFileTypeFromImage(objItem.image); ImageController.GetImageFromUrlAndSave(objItem.image, fileName); ImageController.SaveimageToRemoteLocation(path, fileName, "localhost", "8080"); }
public void ScrapModelNumberAndWriteToCSV() { //scrap the model number CFlipkartScrapper objscrapper = new CFlipkartScrapper(); foreach (ProductStructure objpr in m_lProduct) { //Get the product id and search if its already there in the csv if (m_dictProduct.ContainsKey(objpr.productId)) { continue; } try { ItemDescription objItem = new ItemDescription(); objscrapper.SetHTMLDocument(objpr.productUrl); objscrapper.ExtractData(ref objItem); if (objItem.Model_ID != null) { objpr.Model_Name = objItem.Model_ID; } else { objpr.Model_Name = objItem.Model_Name; } //Write to csv file string line = objpr.Model_Name + Constants.CSV_DELIMITER + objpr.productBrand + Constants.CSV_DELIMITER + objpr.productUrl + Constants.CSV_DELIMITER + objpr.productId; Logger.WriteToCSVFile(line, m_fileName, System.IO.Path.GetDirectoryName(m_fileName)); } catch (Exception ex) { } } m_threadCount--; }
public static ExcelStructure SaveToDB(ExcelStructure objExcelStruct, Store objStore, SubCategory objSubCategory, Brand objBrand, string server, string port, GridViewRowInfo row = null) { try { if (objStore == null) { throw new Exception("Please select a store ....."); } if (objSubCategory == null) { throw new Exception("Subcateogry do not match or is empty"); } if (objBrand == null) { throw new Exception("Please add brand for this subcategory ir brand is null or not matching"); } ItemDescription objItemDescr = DBGetInterface.GetItemDescription(objBrand, objExcelStruct.ModelNumber); if (objItemDescr == null) { //Call flipkart scrapper CFlipkartScrapper objscrapper = new CFlipkartScrapper(); ItemDescription objItemdesc = ItemFactory.GetItem(objSubCategory); if (String.IsNullOrEmpty(objExcelStruct.FlipKart)) { //check if the item link is already scrapped, find in csv string strFilePath = m_ResourcesFolder + "//" + m_CategoryMappingTxt; if (System.IO.File.Exists(strFilePath)) { //File exists //read the file and check for that file in LinkFodler try { using (System.IO.StreamReader sw = new System.IO.StreamReader(strFilePath)) { string line = String.Empty; while ((line = sw.ReadLine()) != null) { bool bflag = false; string[] cat = line.Split(' '); if (cat[0].Equals(objSubCategory.SubCategoryID.ToString(), StringComparison.InvariantCultureIgnoreCase)) { //search model number in that subcategory file string file = Constants.LINK_EXTRACTED_FOLDER + "//" + cat[1] + ".csv"; //search in this csv file List <List <string> > lmap = Utilities.GetCSVSheet(file); foreach (List <string> product in lmap) { if (product.ElementAt(0).Equals(objExcelStruct.ModelNumber, StringComparison.InvariantCultureIgnoreCase)) { objExcelStruct.FlipKart = product.ElementAt(2); bflag = true; break; } } } if (bflag) { break; } } } } catch (Exception ex) { } } } objscrapper.SetHTMLDocument(objExcelStruct.FlipKart); objscrapper.ExtractData(ref objItemdesc); string image = objItemdesc.image; objItemdesc.image = null; //Now do the following //Create item, create item description. Item objItem = new Item(); objItem.Price = Convert.ToUInt32(objExcelStruct.Price); objItem.OfferDescription = objExcelStruct.Offer; ItemWrapper objWrap = new ItemWrapper(); objWrap.objBrand = objBrand; objWrap.objStore = objStore; objWrap.objItemDescription = objItemdesc; objWrap.objItem = objItem; DBAddinterface.CreateItemNode(objWrap); //Copy image //Create a seperate thread for it as image copying is a tedious process //Copy image to local machine and then copy it to tomcat machine //build path string imageDir = Utilities.GetImageDir(objItemdesc.getLabel()); Utilities.CreateFolder(imageDir); string imagePath = Utilities.GetImagePath(imageDir, objItemdesc.id); ImageController.GetImageFromUrlAndSave(image, imagePath); //Create a seperate thread for this to do Thread SaveImageThread = new Thread(() => SaveImageToRemoteMachineThread(imageDir, Utilities.AppendImageType(objItemdesc.id), server, port)); SaveImageThread.Start(); objExcelStruct.Completed = Constants.COMPLETED_DONE; objExcelStruct.Comment = String.Empty; } else { objExcelStruct.Comment = Constants.ALREADY_IN_DB; //Just update the price and offer of the item Item objItem = DBGetInterface.GetItem(objStore, objItemDescr); if (objItem == null) { //Add this node objItem = new Item(); objItem.Price = Convert.ToUInt32(objExcelStruct.Price); objItem.OfferDescription = objExcelStruct.Offer; ItemWrapper objWrap = new ItemWrapper(); objWrap.objBrand = objBrand; objWrap.objStore = objStore; objWrap.objItemDescription = objItemDescr; objWrap.objItem = objItem; DBAddinterface.CreateItemNode(objWrap); objExcelStruct.Completed = Constants.COMPLETED_ADDED; } else { //Check for any updates bool bUpdate = false; if (objItem.Price != Convert.ToUInt32(objExcelStruct.Price)) { objItem.Price = Convert.ToUInt32(objExcelStruct.Price); bUpdate = true; } string offer = String.Empty; if (objExcelStruct.Offer != null) { offer = objExcelStruct.Offer; } if (!objItem.OfferDescription.Equals(offer, StringComparison.InvariantCultureIgnoreCase)) { objItem.OfferDescription = objExcelStruct.Offer; bUpdate = true; } if (bUpdate) { objItem.lastUpdated = Utilities.GetDateTimeInUnixTimeStamp(); DBUpdateInterface.UpdateNode <Item>(objItem); objExcelStruct.Completed = Constants.COMPLETED_UPDATED; } else { objExcelStruct.Completed = Constants.COMPLETED_NOUPDATES; } } } } catch (Exception ex) { objExcelStruct.Completed = Constants.COMPLETED_ERROR; objExcelStruct.Comment = ex.Message; } if (row != null) { row.Cells["Completed"].Value = objExcelStruct.Completed; row.Cells["Comment"].Value = objExcelStruct.Comment; } return(objExcelStruct); }