Пример #1
0
        /// <summary>
        /// Creates a CSVRow instance for a given UPC with data collected from the input CSV file and the internet (if enabled)
        /// </summary>
        /// <param name="theRowIndex">Index of the row in the CSV</param>
        private void LoadRow(int theRowIndex)
        {
            int count = 0;

            using (var reader = new StreamReader(InputPath))
                using (var csv = new CsvReader(reader))
                {
                    try
                    {
                        csv.Configuration.RegisterClassMap <InputMap>();

                        // Get all the data
                        IEnumerable <CSVRow> records = csv.GetRecords <CSVRow>();

                        // Iterate through the data and lookup the UPCs
                        foreach (var rec in records)
                        {
                            // Once we are at the starting point, start scraping
                            if (count == theRowIndex)
                            {
                                Console.WriteLine("Scraping for index: " + theRowIndex.ToString() + ": " + rec.full_upc);

                                ScraperOutput output = new ScraperOutput();
                                output.desc = "";
                                output.upc  = rec.full_upc;

                                // Normal scraping behavior if enabled
                                if (!isOffline)
                                {
                                    output = ScrapeWeb(rec.full_upc, rec);
                                }

                                Console.WriteLine("ADD " + rec.full_upc.ToString() + " " + recordBuffer.Count);
                                recordBuffer.Add(new Tuple <CSVRow, ScraperOutput>(rec, output));
                            }

                            count++;
                        }
                    }
                    catch (HeaderValidationException e)
                    {
                        Console.WriteLine("ERROR: " + e.Message);
                        errorState = new Tuple <bool, string>(true, e.Message);
                    }

                    totalRecordsCount = count;
                }
        }
Пример #2
0
        /// <summary>
        /// Goes back one index, unused
        /// </summary>
        private void GetLastEntry()
        {
            Tuple <CSVRow, ScraperOutput> entry = webWorker.GetLastRow();

            CSVRow        row    = entry.Item1;
            ScraperOutput output = entry.Item2;

            txtBrand.Text    = row.brand2;
            txtDesc.Text     = row.desc2;
            txtSignDesc.Text = row.sign;
            txtUPC.Text      = row.full_upc;

            txtNewBrand.Text       = row.brand;
            txtNewDescription.Text = row.desc;
            txtNotes.Text          = "oops";

            // Crucial to make sure we update this
            webWorker.rowIndex--;
        }
Пример #3
0
        private ScraperOutput HandleWebError(string upc, bool switchOffline = false, string msg = "")
        {
            ScraperOutput output = new ScraperOutput();

            if (switchOffline)
            {
                output.desc = "Internet connection failed, switching to Offline mode";
            }
            else
            {
                output.desc = "Web Error: " + msg;
            }

            output.upc = upc;

            isOffline = switchOffline;

            return(output);
        }
Пример #4
0
        /// <summary>
        /// Loads the next item in the Record Buffer into the user application
        /// </summary>
        private void LoadNextEntry()
        {
            // If the web worker ran into an error, stop the progress
            if (webWorker.errorState.Item1 == true)
            {
                LogText("[ERROR] " + webWorker.errorState.Item2);
                return;
            }

            // Catch the end condition
            if (webWorker.recordBuffer.Count == 0)
            {
                LogText("No more records left to populate!");

                lblcsvIndex.Text       = "All finished! Check the output file location.";
                txtBrand.Text          = "";
                txtDesc.Text           = "";
                txtSignDesc.Text       = "";
                txtUPC.Text            = "";
                txtNewBrand.Text       = "";
                txtNewDescription.Text = "";
                txtNotes.Text          = "";

                return;
            }

            // Get the next entry in the buffer
            Tuple <CSVRow, ScraperOutput> entry = webWorker.recordBuffer[0];

            CSVRow        row    = entry.Item1;
            ScraperOutput output = entry.Item2;

            // Makes sure that we convert all scientific notation back to real numbers
            string upc = decimal.Parse(row.full_upc, System.Globalization.NumberStyles.Any).ToString();

            // Make sure we didn't lose any leading zeros
            while (upc.Length < 14)
            {
                upc = '0' + upc;
            }

            string currentUPC = upc;
            string lastManu, currentManu;

            string lastNoLeading    = lastUPC.TrimStart('0');
            string currentNoLeading = currentUPC.TrimStart('0');

            // How many digits to check for uniqueness
            int checkLength = 7;

            // Get the manufacturer codes
            if (lastNoLeading.Length > checkLength && currentNoLeading.Length > checkLength)
            {
                lastManu    = lastNoLeading.Substring(0, checkLength);
                currentManu = currentNoLeading.Substring(0, checkLength);
            }
            else
            {
                // These just have to be non-equal, doesn't matter the value
                lastManu    = "1";
                currentManu = "-1";
            }

            // If the manufacturer codes are the same OR there are brand names and they are the same, keep the current brand name
            if (lastManu == currentManu || (lastBrand.ToLower() == row.brand2.ToLower() && row.brand2 != ""))
            {
                //row.brand = txtNewBrand.Text;
            }

            lastBrand = row.brand2;
            lastUPC   = upc;

            txtBrand.Text    = row.brand2;
            txtDesc.Text     = row.desc2;
            txtSignDesc.Text = row.sign != " " ? row.sign : row.pos;
            txtUPC.Text      = upc;

            // Fill the new textboxes with the best data of the bunch
            txtNewBrand.Text       = (row.brand.Length != 0) ? row.brand : row.brand2;
            txtNewDescription.Text = (row.desc.Length != 0) ? row.desc : output.desc;
            txtNotes.Text          = row.notes;

            // Run the formatting logic
            txtNewBrand.Text       = toTitlecase(txtNewBrand.Text);
            txtNewDescription.Text = DescriptionFormat(txtNewDescription.Text);

            if (txtNewDescription.Text.ToLower() == "null")
            {
                txtNewDescription.Text = "";
            }

            btnNext.Enabled = true;
            btnSkip.Enabled = true;

            if (chkGoogleUPC.Checked)
            {
                System.Diagnostics.Process.Start("https://www.google.com/search?q=" + txtUPC.Text);
            }

            // Update the label letting the user know where they are
            lblcsvIndex.Text = "Item #" + (webWorker.GetCurrentRowIndex()).ToString() + " of " + webWorker.totalRecordsCount.ToString() + " - Row #" + (webWorker.GetCurrentRowIndex() + 2).ToString();
        }
Пример #5
0
        /// <summary>
        /// Scrapes the internet for product data for a given UPC and its matching CSV row
        /// </summary>
        /// <param name="upc">UPC to search for</param>
        /// <param name="record">CSV record from the input file</param>
        /// <returns></returns>
        private ScraperOutput ScrapeWeb(string upc, CSVRow record)
        {
            ScraperOutput output;

            // Go to the website for that UPC
            string urlAddress = "https://www.barcodelookup.com/" + upc;

            try
            {
                HttpWebRequest  request  = (HttpWebRequest)WebRequest.Create(urlAddress);
                HttpWebResponse response = (HttpWebResponse)request.GetResponse();

                if (response.StatusCode == HttpStatusCode.OK)
                {
                    // Copy/pasted web stuff
                    Stream       receiveStream = response.GetResponseStream();
                    StreamReader readStream    = null;

                    if (response.CharacterSet == null)
                    {
                        readStream = new StreamReader(receiveStream);
                    }
                    else
                    {
                        readStream = new StreamReader(receiveStream, Encoding.GetEncoding(response.CharacterSet));
                    }
                    // End Copy/pasted web stuff


                    string data = readStream.ReadToEnd();
                    //Console.WriteLine("Reading data (length #" + data.Length.ToString() + " characters)");

                    // The product name is in the page's meta tag
                    string startStr   = "<meta name=\"description\" content=\"Barcode Lookup provides info on EAN ";
                    string endStr     = "\">";
                    int    startIndex = data.IndexOf(startStr);

                    // Check for invalid data
                    int failIndex    = data.IndexOf("<meta name=\"description\" content=\"This barcode doesn't exist in our database. Please search for another barcode in the search box");
                    int badCodeIndex = data.IndexOf("\"This barcode number is not valid");

                    // Catch the scenarios where the website fails to provide a clear definition for the website
                    if (failIndex != -1 || badCodeIndex != -1 || startIndex == -1)
                    {
                        //Console.WriteLine("Could not find UPC");

                        output      = new ScraperOutput();
                        output.desc = "null";
                        output.upc  = upc;
                        return(output);
                    }

                    // If all has worked, grab the ending index
                    int endIndex = data.IndexOf(endStr, startIndex);

                    // Ignore the meta tag
                    startIndex += startStr.Length;
                    int grabLength = endIndex - startIndex;

                    // Grab the website's product name which consists of the EAN barcode and the name
                    string eanAndName = data.Substring(startIndex, grabLength);

                    // Apply my logic to format the title string into something user friendly
                    string name = formatProductDescription(eanAndName, record);

                    // Format and return the output
                    output      = new ScraperOutput();
                    output.desc = name;
                    output.upc  = upc;

                    return(output);
                }
                else
                {
                    // If we are connected to the internet but the query fails, we try again next time
                    string err = "Web error code: " + response.StatusCode.ToString();

                    Console.WriteLine(err);

                    return(HandleWebError(upc, false, err));
                }
            }
            catch (Exception e)
            {
                // If we are unable to connect to the internet, we go offline
                Console.WriteLine("Error code: " + e.Message);
                return(HandleWebError(upc, true));
            }
        }