Ejemplo n.º 1
0
        /// <summary>
        /// Adds a new record to the output list and then writes it
        /// </summary>
        /// <param name="r">Instance of the CSVRow class with new data</param>
        public void AddNewRecord(CSVRow r)
        {
            newRecords.Add(r);

            using (var writer = new StreamWriter(OutputPath))
                using (var csv = new CsvWriter(writer))
                {
                    csv.WriteRecords(newRecords);
                    writer.Close();
                }
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Goes back one index, unused
        /// </summary>
        private void GetLastEntry()
        {
            Tuple <CSVRow, ScraperOutput> entry = webWorker.GetLastRow();

            CSVRow        row    = entry.Item1;
            ScraperOutput output = entry.Item2;

            txtBrand.Text    = row.brand2;
            txtDesc.Text     = row.desc2;
            txtSignDesc.Text = row.sign;
            txtUPC.Text      = row.full_upc;

            txtNewBrand.Text       = row.brand;
            txtNewDescription.Text = row.desc;
            txtNotes.Text          = "oops";

            // Crucial to make sure we update this
            webWorker.rowIndex--;
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Formats a raw scraped string into an item description
        /// </summary>
        /// <param name="product">Raw string</param>
        /// <param name="brand">Brand name to be removed from the first param</param>
        /// <returns></returns>
        private string formatProductDescription(string product, CSVRow record)
        {
            int nameOffset = 13; // 13 characters for the EAN barcode

            nameOffset += 3;     // 3 characters for "space hyphen space".

            product = product.Substring(nameOffset);
            product = product.Substring(0, product.Length - 1); // Drop the period at the end of the name

            // Remove the brand name from the scraped name
            product = Regex.Replace(product, record.brand, "", RegexOptions.IgnoreCase);

            // Remove the size if its an exact match (usually isn't)
            product = Regex.Replace(product, record.size, "", RegexOptions.IgnoreCase);

            // Remove commas
            product = product.Replace(",", "");

            return(product);
        }
Ejemplo n.º 4
0
        /// <summary>
        /// Queues the new data to be written and left shifts the buffer
        /// </summary>
        private void SaveCurrentEntry()
        {
            string path = Path.GetDirectoryName(txtOutputFile.Text);

            Directory.CreateDirectory(path);

            // If the file is open in Excel, the program will error cryptically so this catches that
            try
            {
                FileStream fs = File.Open(txtOutputFile.Text, FileMode.OpenOrCreate,
                                          FileAccess.ReadWrite, FileShare.None);
                fs.Close();
            }
            catch (IOException ex)
            {
                LogText("Output File in use by another program. \nFree it and try again");
                Console.WriteLine(ex.Message);
                return;
            }

            // Create a new CSVRow object with our newly formatted information
            CSVRow r = new CSVRow();

            r.desc     = (txtNewDescription.Text == "" || txtNewDescription.Text.ToLower() == "null") ? "" : txtNewDescription.Text;
            r.brand    = (txtNewBrand.Text == "" || txtNewBrand.Text.ToLower() == "null" || txtNewBrand.Text == txtBrand.Text) ? "" : txtNewBrand.Text;
            r.notes    = txtNotes.Text;
            r.full_upc = txtUPC.Text;

            // Trim whitespace
            r.desc  = r.desc.Trim();
            r.brand = r.brand.Trim();

            // Prompts the web worker to proceed
            webWorker.AddNewRecord(r);
            webWorker.ShiftBuffer();

            LogText("Wrote entry to output file");
        }
Ejemplo n.º 5
0
        /// <summary>
        /// Loads the next item in the Record Buffer into the user application
        /// </summary>
        private void LoadNextEntry()
        {
            // If the web worker ran into an error, stop the progress
            if (webWorker.errorState.Item1 == true)
            {
                LogText("[ERROR] " + webWorker.errorState.Item2);
                return;
            }

            // Catch the end condition
            if (webWorker.recordBuffer.Count == 0)
            {
                LogText("No more records left to populate!");

                lblcsvIndex.Text       = "All finished! Check the output file location.";
                txtBrand.Text          = "";
                txtDesc.Text           = "";
                txtSignDesc.Text       = "";
                txtUPC.Text            = "";
                txtNewBrand.Text       = "";
                txtNewDescription.Text = "";
                txtNotes.Text          = "";

                return;
            }

            // Get the next entry in the buffer
            Tuple <CSVRow, ScraperOutput> entry = webWorker.recordBuffer[0];

            CSVRow        row    = entry.Item1;
            ScraperOutput output = entry.Item2;

            // Makes sure that we convert all scientific notation back to real numbers
            string upc = decimal.Parse(row.full_upc, System.Globalization.NumberStyles.Any).ToString();

            // Make sure we didn't lose any leading zeros
            while (upc.Length < 14)
            {
                upc = '0' + upc;
            }

            string currentUPC = upc;
            string lastManu, currentManu;

            string lastNoLeading    = lastUPC.TrimStart('0');
            string currentNoLeading = currentUPC.TrimStart('0');

            // How many digits to check for uniqueness
            int checkLength = 7;

            // Get the manufacturer codes
            if (lastNoLeading.Length > checkLength && currentNoLeading.Length > checkLength)
            {
                lastManu    = lastNoLeading.Substring(0, checkLength);
                currentManu = currentNoLeading.Substring(0, checkLength);
            }
            else
            {
                // These just have to be non-equal, doesn't matter the value
                lastManu    = "1";
                currentManu = "-1";
            }

            // If the manufacturer codes are the same OR there are brand names and they are the same, keep the current brand name
            if (lastManu == currentManu || (lastBrand.ToLower() == row.brand2.ToLower() && row.brand2 != ""))
            {
                //row.brand = txtNewBrand.Text;
            }

            lastBrand = row.brand2;
            lastUPC   = upc;

            txtBrand.Text    = row.brand2;
            txtDesc.Text     = row.desc2;
            txtSignDesc.Text = row.sign != " " ? row.sign : row.pos;
            txtUPC.Text      = upc;

            // Fill the new textboxes with the best data of the bunch
            txtNewBrand.Text       = (row.brand.Length != 0) ? row.brand : row.brand2;
            txtNewDescription.Text = (row.desc.Length != 0) ? row.desc : output.desc;
            txtNotes.Text          = row.notes;

            // Run the formatting logic
            txtNewBrand.Text       = toTitlecase(txtNewBrand.Text);
            txtNewDescription.Text = DescriptionFormat(txtNewDescription.Text);

            if (txtNewDescription.Text.ToLower() == "null")
            {
                txtNewDescription.Text = "";
            }

            btnNext.Enabled = true;
            btnSkip.Enabled = true;

            if (chkGoogleUPC.Checked)
            {
                System.Diagnostics.Process.Start("https://www.google.com/search?q=" + txtUPC.Text);
            }

            // Update the label letting the user know where they are
            lblcsvIndex.Text = "Item #" + (webWorker.GetCurrentRowIndex()).ToString() + " of " + webWorker.totalRecordsCount.ToString() + " - Row #" + (webWorker.GetCurrentRowIndex() + 2).ToString();
        }
Ejemplo n.º 6
0
        /// <summary>
        /// Scrapes the internet for product data for a given UPC and its matching CSV row
        /// </summary>
        /// <param name="upc">UPC to search for</param>
        /// <param name="record">CSV record from the input file</param>
        /// <returns></returns>
        private ScraperOutput ScrapeWeb(string upc, CSVRow record)
        {
            ScraperOutput output;

            // Go to the website for that UPC
            string urlAddress = "https://www.barcodelookup.com/" + upc;

            try
            {
                HttpWebRequest  request  = (HttpWebRequest)WebRequest.Create(urlAddress);
                HttpWebResponse response = (HttpWebResponse)request.GetResponse();

                if (response.StatusCode == HttpStatusCode.OK)
                {
                    // Copy/pasted web stuff
                    Stream       receiveStream = response.GetResponseStream();
                    StreamReader readStream    = null;

                    if (response.CharacterSet == null)
                    {
                        readStream = new StreamReader(receiveStream);
                    }
                    else
                    {
                        readStream = new StreamReader(receiveStream, Encoding.GetEncoding(response.CharacterSet));
                    }
                    // End Copy/pasted web stuff


                    string data = readStream.ReadToEnd();
                    //Console.WriteLine("Reading data (length #" + data.Length.ToString() + " characters)");

                    // The product name is in the page's meta tag
                    string startStr   = "<meta name=\"description\" content=\"Barcode Lookup provides info on EAN ";
                    string endStr     = "\">";
                    int    startIndex = data.IndexOf(startStr);

                    // Check for invalid data
                    int failIndex    = data.IndexOf("<meta name=\"description\" content=\"This barcode doesn't exist in our database. Please search for another barcode in the search box");
                    int badCodeIndex = data.IndexOf("\"This barcode number is not valid");

                    // Catch the scenarios where the website fails to provide a clear definition for the website
                    if (failIndex != -1 || badCodeIndex != -1 || startIndex == -1)
                    {
                        //Console.WriteLine("Could not find UPC");

                        output      = new ScraperOutput();
                        output.desc = "null";
                        output.upc  = upc;
                        return(output);
                    }

                    // If all has worked, grab the ending index
                    int endIndex = data.IndexOf(endStr, startIndex);

                    // Ignore the meta tag
                    startIndex += startStr.Length;
                    int grabLength = endIndex - startIndex;

                    // Grab the website's product name which consists of the EAN barcode and the name
                    string eanAndName = data.Substring(startIndex, grabLength);

                    // Apply my logic to format the title string into something user friendly
                    string name = formatProductDescription(eanAndName, record);

                    // Format and return the output
                    output      = new ScraperOutput();
                    output.desc = name;
                    output.upc  = upc;

                    return(output);
                }
                else
                {
                    // If we are connected to the internet but the query fails, we try again next time
                    string err = "Web error code: " + response.StatusCode.ToString();

                    Console.WriteLine(err);

                    return(HandleWebError(upc, false, err));
                }
            }
            catch (Exception e)
            {
                // If we are unable to connect to the internet, we go offline
                Console.WriteLine("Error code: " + e.Message);
                return(HandleWebError(upc, true));
            }
        }