コード例 #1
0
        // Thread responsible for extracting the all of the cities links for a given website
        public void ExtractingCities()
        {
            string read;
            AtTheEnd = "";

            // Some weblinks contains dots (.) and .NET simply remove it from URLs. The following code was included just to preserv the dot on links
            MethodInfo getSyntax = typeof(UriParser).GetMethod("GetSyntax", System.Reflection.BindingFlags.Static | System.Reflection.BindingFlags.NonPublic);
            FieldInfo flagsField = typeof(UriParser).GetField("m_Flags", System.Reflection.BindingFlags.Instance | System.Reflection.BindingFlags.NonPublic);
            if (getSyntax != null && flagsField != null)
            {
                foreach (string scheme in new[] { "http", "https" })
                {
                    UriParser parser = (UriParser)getSyntax.Invoke(null, new object[] { scheme });
                    if (parser != null)
                    {
                        int flagsValue = (int)flagsField.GetValue(parser);
                        // Clear the CanonicalizeAsFilePath attribute
                        if ((flagsValue & 0x1000000) != 0)
                            flagsField.SetValue(parser, flagsValue & ~0x1000000);
                    }
                }
            }

            read = DownloadData(this.oneWebsite.data[0]);

            StreamWriter writer;
            SqlConnection myConnection;

            string temp = this.oneWebsite.data[0];
            temp = temp.Replace(".", "_");
            temp = temp.Replace("/", "_");
            temp = temp.Replace(":", "");
            temp = temp.Replace("?", "");
            temp = temp.Replace("=", "");
            writer = File.CreateText(@"C:\Users\MediaConnect\Documents\" + temp + "_out.txt");

            if (read == "ERROR: Exception reading from webpage")
            {
                writer.WriteLine("ERROR: Exception reading from webpage" + this.oneWebsite.data[0]);
                writer.Close();
                return;
            }

            string str;
            string[] parts;
            List<string> listOfCities;
            dealslist listOfEvaluatedDeals = new dealslist();
            List<Tags> listOfDeals = new List<Tags>();

             //           writer.Write(read);
             //           writer.Close();

            Console.WriteLine(@"C:\Users\MediaConnect\Documents\" + this.oneWebsite.data[0] + " file opened!");
            Console.WriteLine("===========================");

            // Check if the Website is valid
            str = this.oneWebsite.data[1];
            if (!WebsiteValid(str, read))
            {
                Console.WriteLine("ERROR: Invalid initial website: " + this.oneWebsite.data[0]);
                writer.WriteLine("ERROR: Invalid initial website: " + this.oneWebsite.data[0]);
                writer.Close();
                return;
            }

            str = this.oneWebsite.data[2];
            temp = this.SingleDataExtraction(str, read);
            if ((temp == "{:-(") || (temp == ""))
            {
                Console.WriteLine("ERROR: Couldn't find cities in website " + this.oneWebsite.data[0]);
                writer.WriteLine("ERROR: Couldn't find cities in website " + this.oneWebsite.data[0]);
                writer.Close();
                return;
            }
            string sourceLocations = read;
            parts = temp.Split('\n');
            listOfCities = new List<string>();
            for (int i=0; i<parts.Length; i++)
            {
                if ((parts[i] != "") && (!listOfCities.Contains(parts[i])))
                {
                    listOfCities.Add(parts[i]);
                }
            }

            writer.WriteLine("Website | xx | ListOfCities | yy | DealID | DealLinkURL | Category | Company | CompanysURL | Image | Description | Latitude | Longitude | CompleteAddress | StreetName | City | PostalCode | Country | Map | CompanysPhone | RegularPrice | OurPrice | Save | Discount | PayOutAmount | PayOutLink | SecondsTotal | SecondsElapsed | RemainingTime | ExpiryTime | MaxNumberOfVouchers | MinNumberOfVouchers | DealSoldOut | DealEnded | DealValid | PaidVoucherCount | Highlights | BuyDetails | DealText | Reviews | RelatedDeals (same company)");
            List<string> TryLater = new List<string>();
            foreach (string item in listOfCities)
            {
                List<string> SideDeals = new List<string>();
                List<string> EvaluatedSideDeals = new List<string>();
                string part_URL = item;
                string URL = "";
                int tries = 3;
                Boolean FirstTime = true;
                do
                {
                    string DealID = "";
                    int i;
                    Tags DealData = new Tags();
                    DateTimeOffset extractedTime;

                    if (SideDeals.Count() > 0)
                    {
                        part_URL = SideDeals.ElementAt(0);
                        EvaluatedSideDeals.Add(part_URL);
                        SideDeals.RemoveAt(0);
                    }

              //                       URL = "http://www.teambuy.ca/toronto/28135483/";
                    URL = baseAddress.Replace("$", part_URL);
                    // opening Website
                    read = DownloadData(URL);
                    extractedTime = DateTimeOffset.Now;

                    if (read == "ERROR: Exception reading from webpage")
                    {
                        TryLater.Add(part_URL);
                        //                       continue;
                    }
                    else
                    {
                        // checking if website is valid or if it has a deal
                        str = this.oneWebsite.data[1];
                        if (!WebsiteValid(str, read))
                        {
                            Console.WriteLine("WARNING: Invalid website: " + URL);
                            writer.WriteLine("WARNING: Invalid website: " + URL);
                            AtTheEnd = AtTheEnd + "WARNING: Invalid website: " + URL + "\n";
                            //                            continue;
                        }
                        else
                        {
                            //  extract the side deals
                            string relatedDeals = "";
                            str = this.oneWebsite.data[3];
                            temp = this.SingleDataExtraction(str, read);
                            str = this.oneWebsite.data[40]; //related deals are handled as sidedeals
                            relatedDeals = this.SingleDataExtraction(str, read);
                            if ((relatedDeals != "{:-(") && (relatedDeals != ""))
                                if (temp != "{:-(")
                                    temp = temp + relatedDeals;
                                else
                                    temp = relatedDeals;
                            if (temp != "{:-(")
                            {
                                List<string> tempSideDeals;
                                parts = temp.Split('\n');
                                tempSideDeals = new List<string>(parts);
                                foreach (string s in tempSideDeals)
                                {
                                    if ((s != "") && (s != part_URL) && (!SideDeals.Contains(s)))
                                    {
                                        if (!EvaluatedSideDeals.Contains(s))
                                        {
                                            // checking if this sidedeal was an evaluated deal. To do that, it must be possible to get DealID from Sidedeal's link
                                            str = this.oneWebsite.data[41];
                                            if (str == "")
                                            {
                                                SideDeals.Add(s);
                                            }
                                            else
                                            {
                                                int c = 0;
                                                int read_pos = 0;
                                                keywords search = new keywords();
                                                keywords end = new keywords();
                                                string tempID = "";
                                                if (str[0] == '?')
                                                {
                                                    c += 1;
                                                    search = GetSearchString(str, ref c, DealData, s);
                                                    while ((c < str.Length) && ((str[c] == ' ') || (str[c] == ';')))
                                                    {
                                                        c += 1;
                                                    }
                                                }
                                                if ((c < str.Length) && (str[c] == '@'))
                                                {
                                                        c += 1;
                                                        end = GetEndString(str, ref c);
                                                }
                                                int end_pos;
                                                while (search.GetTimes() > 0)
                                                {
                                                    search.SetTimes(search.GetTimes() - 1);
                                                    read_pos = s.IndexOf(search.GetKeyword(), read_pos);
                                                    if (read_pos != -1)
                                                        read_pos += search.GetKeyword().Length;
                                                    else
                                                        break;
                                                }
                                                if (read_pos != -1)
                                                {
                                                    if (end.GetKeyword() == "")
                                                        end_pos = s.Length;
                                                    else
                                                    {
                                                        end_pos = read_pos;
                                                        end_pos = s.IndexOf(end.GetKeyword(), end_pos);
                                                    }
                                                    if (end_pos != -1)
                                                    {
                                                        tempID = s.Substring(read_pos, end_pos - read_pos);
                                                    }
                                                }
                                                // reusing c
                                                c = listOfEvaluatedDeals.DealEvaluated(tempID);
                                                if (c != -1)
                                                {
                                                    if (!listOfEvaluatedDeals.GetDealDetails(c).GetListCities().Contains(item))
                                                    {
                                                        listOfEvaluatedDeals.AddCity(c, item);
                                                    }
                                                }
                                                else
                                                    SideDeals.Add(s);
                                            }
                                        }
                                    }
                                }
                            }

                            if (FirstTime)
                            {
                                FirstTime = false;
                            }
                            else
                            {

                                // get the dealID
                                str = this.oneWebsite.data[4];
                                DealID = this.SingleDataExtraction(str, read);
                                if (DealID == "{:-(")
                                {
                                    Console.WriteLine("WARNING: Couldn't find the DealID in website " + URL);
                                    writer.WriteLine("WARNING: Couldn't find the DealID in website " + URL);
                                    AtTheEnd = AtTheEnd + "WARNING: Couldn't find the DealID in website " + URL + "\n";
                                    //                                continue;
                                }
                                else
                                {
                                    string alternativeID = this.SingleDataExtraction(this.oneWebsite.data[42], read);
                                    if (alternativeID == "")
                                        i = listOfEvaluatedDeals.DealEvaluated(DealID);
                                    else
                                        i = listOfEvaluatedDeals.DealEvaluated(alternativeID);

                                    // check if the deal was evaluated before. If not, store in Deals list
                                    if (i != -1)
                                    {
                                        if (!listOfEvaluatedDeals.GetDealDetails(i).GetListCities().Contains(item))
                                        {
                                            listOfEvaluatedDeals.AddCity(i, item);
                                        }
                                        //                                    continue;
                                    }
                                    else
                                    {

                                        listOfEvaluatedDeals.SetDealID(DealID, alternativeID, item);
                                        DealData.data[0] = this.oneWebsite.data[0];
                                        DealData.data[4] = DealID;

                                        // DealData.data[1] will contain the extracted time. Index 1 is associated to invalid page. If page is valid, the extracted time will be stored
                                        DealData.data[1] = extractedTime.ToString();

                                        Console.WriteLine(baseAddress.Replace("$", "") + " - " + item + " \tDealID - " + DealID);

                                        // Get the data / write to file
                                        for (int j = 5; j < 50; j++)
                                        {
            //                                            Console.Write(j + " ");
                                            if ((j == 31))
                                            {
                                                Console.Write("");
                                            }
                                            if (j == 40)
                                                j = 43;
                                            if (DealData.data[j] == "")
                                            {
                                                int read_pos = 0;
                                                str = this.oneWebsite.data[j];
                                                RecursList.Add(j);
                                                temp = this.SingleDataExtraction(str, read, DealData, ref read_pos);
                                                RecursList.Remove(j);
                                                // Data not expected: in case the extracted data is not the expected one, keep searching
                                                if (j == 8)
                                                {
                                                    int has = temp.IndexOf("youtube.com");
                                                    if (has == -1)
                                                        has = temp.IndexOf("wikipedia");
                                                    if (has != -1)
                                                    {
                                                        str = ReduceInstruc(str);
                                                    }
                                                    while ((has != -1) && (read_pos != -1))
                                                    {
                                                        RecursList.Add(j);
                                                        temp = this.SingleDataExtraction(str, read, DealData, ref read_pos);
                                                        RecursList.Remove(j);
                                                        has = temp.IndexOf("youtube.com");
                                                        if (has == -1)
                                                            has = temp.IndexOf("wikipedia");
                                                    }
                                                }
                                                if ((j < 36) || (j > 39))
                                                {
                                                    temp = temp.Replace("\n", ";");
                                                    temp = temp.Replace("\t", " ");
                                                }
                                                temp = temp.Replace((char)8206, ' ');
                                                while (temp.IndexOf("  ") != -1)
                                                    temp = temp.Replace("  ", " ");
                                                while (temp.IndexOf(" ,") != -1)
                                                    temp = temp.Replace(" ,", ",");
                                                while (temp.IndexOf(",,") != -1)
                                                    temp = temp.Replace(",,", ",");
                                                while (temp.IndexOf(" ;") != -1)
                                                    temp = temp.Replace(" ;", ";");
                                                if (j != 13)
                                                {
                                                    while (temp.IndexOf(";;") != -1)
                                                        temp = temp.Replace(";;", ";");
                                                }
                                                DealData.data[j] = temp;
                                            }
                                        }
                                        if ((DealData.data[17] == "") || (DealData.data[17] == "{:-("))
                                        {
                                            str = this.oneWebsite.data[17];
                                            RecursList.Add(17);
                                            str = this.SingleDataExtraction(str, sourceLocations, DealData);
                                            RecursList.Remove(17);
                                            DealData.data[17] = str;
                                        }
                                        listOfDeals.Add(DealData);
                                    }
                                }
                            }
                        }
                    }
                    if ((SideDeals.Count == 0) && (TryLater.Count != 0))
                    {
                        if (tries > 0)
                        {
                            while (TryLater.Count > 0)
                            {
                                SideDeals.Add(TryLater.ElementAt(0));
                                TryLater.RemoveAt(0);
                            }
                            tries -= 1;
                        }
                        else
                        {
                            foreach (string TryItem in TryLater)
                            {
                                Console.WriteLine("ERROR: Giving up link: " + TryItem);
                                writer.WriteLine("ERROR: Giving up link: " + TryItem);
                                AtTheEnd = AtTheEnd + "ERROR: Giving up link: " + TryItem + "\n";
                            }
                            TryLater = new List<string>();
                        }
                    }
                } while (SideDeals.Count() > 0);
            }
            Console.WriteLine("\n\nNow listing cities with the same deal:");
            writer.WriteLine("\n\n\n\nNow listing cities with the same deal:");
            for (int i = 0; i < listOfEvaluatedDeals.CountDeals(); i++)
            {
                deals Dealdetails = listOfEvaluatedDeals.GetDealDetails(i);
                Tags dealData = new Tags();
                string ID = Dealdetails.GetDealID();
                Console.Write("\n" + ID + " - ");
                writer.Write("\n" + ID + " - ");

                foreach (Tags dd in listOfDeals)
                    if (dd.data[4] == ID)
                    {
                        dealData = dd;
                        break;
                    }
                foreach (string item in Dealdetails.GetListCities())
                {
                    dealData.data[2] = dealData.data[2] + item + "; ";
                    Console.Write(item + "  ");
                    writer.Write(item + "  ");
                }
            }
            Console.WriteLine("\n\nTotal of deals: " + listOfEvaluatedDeals.CountDeals());
            Console.WriteLine("Total of cities: " + listOfCities.Count);
            Console.WriteLine();
            writer.WriteLine("\n\n\n\nTotal of deals: " + listOfEvaluatedDeals.CountDeals());
            writer.WriteLine("Total of cities: " + listOfCities.Count + "\n\n\n");

            myConnection = new SqlConnection("server=MEDIACONNECT-PC\\MCAPPS; Trusted_Connection=yes; database=Deals; connection timeout=15");
            try
            {
                myConnection.Open();
            }
            catch (Exception e)
            {
                myConnection = new SqlConnection("server=FIVEFINGERFINDS\\MEDIACONNECT; Trusted_Connection=yes; database=Deals; connection timeout=15");
                try
                {
                    myConnection.Open();
                }
                catch (Exception error)
                {
                    Console.WriteLine(e.ToString());
                }
            }

               //         int cont = 0;

            // Store the data into SQL Database. Clean and handle the data, if needed
            foreach (Tags dd in listOfDeals)
            {
            //                cont += 1;
            //                if (cont == 198)
            //                    Console.WriteLine("");
            //                Console.WriteLine(cont + " " + listOfDeals.Count);
                string line = "";

            // Data Handling

                for (int i = 1; i < 50; i++)
                {
                    int b = dd.data[i].IndexOf("||");
                    if (b != -1)
                        dd.data[i] = dd.data[i].Remove(b);
                    if (dd.data[i] == "{:-(")
                        dd.data[i] = "";
                    RemoveSpaces(ref dd.data[i], true);
                    if(dd.data[i]==";")
                        dd.data[i] = "";
                }

                if (dd.data[5].ToLower() == "http://")
                    dd.data[5] = "";

                if (dd.data[8].ToLower() == "http://")
                    dd.data[8] = "";

                if (dd.data[8].ToLower() == "%22http:")
                    dd.data[8] = "";

                if (dd.data[9].ToLower() == "http://")
                    dd.data[9] = "";

                if (dd.data[18].ToLower() == "http://")
                    dd.data[18] = "";

                dd.data[8] = dd.data[8].Replace("www.", ""); // let all companie's URL with the same format, i.e., without "www." and the last "/"
                if ((dd.data[8].Length>=1) && (dd.data[8][dd.data[8].Length-1] == '/'))
                    dd.data[8] = dd.data[8].Substring(0, dd.data[8].Length-1);
                if (dd.data[8].IndexOf("youtube.com") != -1)
                    dd.data[8] = "";

                if ((dd.data[15].Length >= 5) && (dd.data[15].Substring(0, 5).ToLower() == "http:"))
                    dd.data[15] = "";
                if ((dd.data[15].Length >= 4) && (dd.data[15].Substring(0, 4).ToLower() == "www."))
                    dd.data[15] = "";

                if ((dd.data[18].Length == 30) && (dd.data[18] == "http://maps.google.com/maps?q="))
                    dd.data[18] = "";

                if (dd.data[17].ToLower() == "usa")
                    dd.data[17] = "United States";

            //                if (dd.data[0] == "http://www.teambuy.ca/toronto")
            //                {
                    if (dd.data[14] != "")
                    {
                        dd.data[14] = dd.data[14].Replace("(map)", "");
                        RemoveSpaces(ref dd.data[14], true);
                    }
            //                    if ((dd.data[18] != "") && (dd.data[13] == "") && (dd.data[14] == ""))
            //                    {
            //                        dd.data[18] = "";
            //                    }
            //                }

                Boolean phone = false;
                for (int i = 13; i <= 43; i++)
                {
                    if (i == 16)
                        i = 43;
                    string aux1 = dd.data[i].ToLower();
                    string aux = aux1;
                    if (aux != "")
                    {
                        //                   if (!((i == 13) && (dd.data[0] == "http://www.dealfind.com/toronto")))
                        //                   {
                        aux = ExtractPhone(aux, dd, ref phone);
                        //                   }
                        aux = RemoveWebLinks(aux);
                        if (aux == "include photo")
                            dd.data[19] = dd.data[i] + ", " + dd.data[19];
                        aux = aux.Replace("domocilio conocido", "");
                        aux = aux.Replace("call to order", "");
                        aux = aux.Replace("to place your order", "");
                        aux = aux.Replace("once purchased", "");
                        aux = aux.Replace("mailed to your door", "");
                        aux = aux.Replace("see website for directions", "");
                        aux = aux.Replace("click website link to", "");
                        aux = aux.Replace("to redeem voucher,", "");
                        aux = aux.Replace("to redeem voucher", "");
                        aux = aux.Replace("to redeem your voucher,", "");
                        aux = aux.Replace("to redeem your voucher", "");
                        aux = aux.Replace("to book your appointment", "");
                        aux = aux.Replace("include photo", "");
                        aux = aux.Replace("mailing address and contact number", "");
                        aux = aux.Replace("please visit:", "");
                        aux = aux.Replace("please visit", "");
                        aux = aux.Replace("they come to you", "");
                        aux = aux.Replace("for reservations", "");
                        aux = aux.Replace("redeem online by clicking the \"redemption\" link on your voucher", "");
                        aux = aux.Replace("Redeem online by clicking \"Redemption\" link on your voucher", "");
                        aux = aux.Replace("online redemption:", "");
                        aux = aux.Replace("online redemption", "");
                        aux = aux.Replace("web redemption:", "");
                        aux = aux.Replace("web redemption", "");
                        aux = aux.Replace("or redeem", "");
                        aux = aux.Replace("redeem", "");
                        aux = aux.Replace("online at", "");
                        aux = aux.Replace("online:", "");
                        aux = aux.Replace("online", "");
                        aux = aux.Replace("or by phone:", "");
                        aux = aux.Replace("or by phone", "");
                        aux = aux.Replace("by phone:", "");
                        aux = aux.Replace("by phone", "");
                        aux = aux.Replace("mobile service", "");
                        aux = aux.Replace("mobile service:", "");
                        aux = aux.Replace("mobile", "");
                        aux = aux.Replace("call/email", "");
                        aux = aux.Replace("or by email:", "");
                        aux = aux.Replace("or by e-mail:", "");
                        aux = aux.Replace("or by email", "");
                        aux = aux.Replace("or by e-mail", "");
                        aux = aux.Replace("by emailing:", "");
                        aux = aux.Replace("by emailing", "");
                        aux = aux.Replace("by email:", "");
                        aux = aux.Replace("by email", "");
                        aux = aux.Replace("by e-mail:", "");
                        aux = aux.Replace("by e-mail", "");
                        aux = aux.Replace("for inquiries,", "");
                        aux = aux.Replace("for inquiries", "");
                        aux = aux.Replace("please call:", "");
                        aux = aux.Replace("please call", "");
                        aux = aux.Replace("please", "");
                        aux = aux.Replace("call:", "");
                        aux = aux.Replace("call ", " ");
                        aux = aux.Replace("call\n", "\n");
                        aux = aux.Replace("or email:", "");
                        aux = aux.Replace("or email", "");
                        aux = aux.Replace("email:", "");
                        aux = aux.Replace("email", "");
                        aux = aux.Replace("or e-mail:", "");
                        aux = aux.Replace("or e-mail", "");
                        aux = aux.Replace("e-mail:", "");
                        aux = aux.Replace("e-mail", "");
                        aux = aux.Replace("multiple locations", "");
                        aux = aux.Replace("valid at", "");
                        aux = aux.Replace("view locations", "");
                        aux = aux.Replace("mail out", "");
                        if (aux1 != aux)
                        {
                            RemoveSpaces(ref aux, true);
                            if (aux == "or") aux = "";
                            dd.data[i] = aux;
                        }
                    }
                }

            //                if (dd.data[0] == "http://www.dealticker.com/toronto_en_1categ.html")
            //                {
                    transferEmails(ref dd.data[15], ref dd.data[19]);
                    if ((dd.data[15] != "") && ((dd.data[15][0] == '(') || ((dd.data[15][0] >= '0') && (dd.data[15][0] <= '9'))))
                    {  //??? try using Regex to find telephones in all of the columns
                        // Contacts are in the wrong place. Moving them from City to Contact
                        if (dd.data[19].IndexOf(dd.data[15]) == -1)
                        {
                            dd.data[19] = dd.data[19] + dd.data[15] + "; ";
                        }
                        dd.data[15] = "";
                        // streetName must be null??
                        //             dd.data[14] = "";
                    }
            //                }

                // Put emails in the right column
                if (dd.data[13] != "")
                {
                    transferEmails(ref dd.data[13], ref dd.data[19]);
                }
                if (dd.data[14] != "")
                {
                    transferEmails(ref dd.data[14], ref dd.data[19]);
                }

             //               if (dd.data[0] == "http://www.dealfind.com/toronto")
             //               {
                  if ((dd.data[13] == "") && (dd.data[14] == "") && (dd.data[16] == ""))
                    {
                        if ((dd.data[15] != "") && (dd.data[19] == ""))
                        {
                            // Contacts are in the wrong place. Moving them from City to Contact
                            if (dd.data[19] == "")
                                dd.data[19] = dd.data[15];
                            else if (dd.data[19].IndexOf(dd.data[15]) == -1)
                                dd.data[19] = dd.data[19] + "; " + dd.data[15];
                            dd.data[15] = "";
                        }
                        if (dd.data[43] != "")
                        {
                            // Contacts are in the wrong place. Moving them from Province to Contact
            //                            if (dd.data[19] == "")
            //                                dd.data[19] = dd.data[43];
            //                            else if (dd.data[19].IndexOf(dd.data[43]) == -1)
            //                                dd.data[19] = dd.data[19] + ", " + dd.data[43];
            //                            dd.data[43] = "";
            //                            dd.data[17] = "";

                            transferEmails(ref dd.data[43], ref dd.data[19]);
                            if ((dd.data[43] != "") && ((dd.data[43][0] == '(') || ((dd.data[43][0] >= '0') && (dd.data[43][0] <= '9'))))
                            {  //??? try using Regex to find telephones in all of the columns
                                // Contacts are in the wrong place. Moving them from City to Contact
                                if (dd.data[19].IndexOf(dd.data[43]) == -1)
                                {
                                    dd.data[19] = dd.data[19] + dd.data[43] + "; ";
                                }
                                dd.data[43] = "";
                            }

                        }
                    }
            //                }
            /*                    if (dd.data[19] != "")
                    {
                        int i = dd.data[19].LastIndexOf(" or");
                        while (i != -1)
                        {
                            int b = i;
                            i += 3;
                            while ((i < dd.data[19].Length) && ((dd.data[19][i] == ' ') || (dd.data[19][i] == '\n') ||
                                   (dd.data[19][i] == '\t') ||  (dd.data[19][i] == ',') || (dd.data[19][i] == ';')))
                                i += 1;
                            if (i >= dd.data[19].Length)
                            {
                                dd.data[19].Remove(b, 3);
                                RemoveSpaces(ref dd.data[19], true);
                            }
                            else
                                break;
                            i = dd.data[19].LastIndexOf(" or");
                        }
                    }*/

                    if (dd.data[15] == dd.data[43])
                    {
                        int pos = dd.data[15].IndexOf(",");
                        if (pos != -1)
                        {
                            dd.data[15] = dd.data[15].Remove(pos);
                            dd.data[43] = dd.data[43].Remove(0, pos);
                            RemoveSpaces(ref dd.data[15], true);
                            RemoveSpaces(ref dd.data[43], true);
                        }
                    }
            //  if Latitude contains both Lat and Longitude data, Longitude field is empty
                if ((dd.data[11] != "") && (dd.data[12] == ""))
                {
                    SeparateLatLong(ref dd.data[11], ref dd.data[12]);
                }

            // remove latitude and longitude if it points to nowhere
                if ((dd.data[11].Length>=3) && (dd.data[11].Substring(0,3) == "56.") && (dd.data[12].Length>=5) && (dd.data[12].Substring(0,5) == "-106."))
                {
                    dd.data[11] = "";
                    dd.data[12] = "";
                    if ((dd.data[18] != "") && (dd.data[18].IndexOf("56.") != -1))
                        dd.data[18] = "";
                }
                if ((dd.data[11].Length >= 3) && (dd.data[11].Substring(0, 3) == "51.") && (dd.data[12].Length >= 5) && (dd.data[12].Substring(0, 4) == "-85."))
                {
                    dd.data[11] = "";
                    dd.data[12] = "";
                    if ((dd.data[18] != "") && (dd.data[18].IndexOf("51.") != -1))
                        dd.data[18] = "";
                }

                // If googlemaps link has online, it is invalid, so remove it. Also remove Lat/Long
                if (dd.data[18] != "")
                {
                    string aux = dd.data[18].ToLower();
                    if ((aux.IndexOf("=online+") != -1) || (aux.IndexOf("+online+") != -1) ||
                        (aux.IndexOf("=mobile+") != -1) || (aux.IndexOf("+mobile+") != -1) ||
                        (aux.IndexOf("=mail+out+") != -1) || (aux.IndexOf("+mail+out+") != -1) ||
                        (aux.IndexOf("=mailed+to+your+door+") != -1) || (aux.IndexOf("+mailed+to+your+door+") != -1) ||
                        (aux.IndexOf("=they+come+to+you+") != -1) || (aux.IndexOf("+they+come+to+you+") != -1))
                    {
                        dd.data[18] = "";
                        dd.data[11] = "";
                        dd.data[12] = "";
                    }
                }

                RoundLatLong(ref dd.data[11], ref dd.data[12], ref AtTheEnd);

                // if there is no googlemaps link, we create the URL
                if (dd.data[18] == "")
                {
                    if ((dd.data[11] != "") && (dd.data[12] != ""))
                    {
                        dd.data[18] = "http://maps.google.com/maps?q=" + dd.data[11] + ", " + dd.data[12];
                    }
                }

                // If it is an online deal (i.e., there is no address, postal code, remove province, country and city. The advertised cities can be caught from OtherData.ListOfCities table
                if ((dd.data[13]=="") && (dd.data[14]=="") && (dd.data[16]==""))
                {
                    dd.data[15] = "";
                    dd.data[17] = "";
                    dd.data[43] = "";
                }

                PriceHandling(dd);
                dd.data[34] = isDealValid(dd.data[32], dd.data[33], dd.data[34]);
                GetExpiryTime(dd);
                VouchersHandling(dd);
            // end of Data Handling

                SqlCommand myCommandDeal = new SqlCommand("INSERT INTO DealsList (Website, DealID, DealLinkURL, Category, Image, Description, DealerID, RegularPrice, OurPrice, Saved, Discount, PayOutAmount, PayOutLink, ExpiryTime, MaxNumberVouchers, MinNumberVouchers, PaidVoucherCount, DealExtractedTime, Highlights, BuyDetails, DealText, Reviews) Values (@Website, @DealID, @DealLinkURL, @Category, @Image, @Description, @DealerID, @RegularPrice, @OurPrice, @Saved, @Discount, @PayOutAmount, @PayOutLink, @ExpiryTime, @MaxNumberOfVouchers, @MinNumberOfVouchers, @PaidVoucherCount, @DealExtractedTime, @Highlights, @BuyDetails, @DealText, @Reviews)", myConnection);
                SqlCommand myCommandOtherData = new SqlCommand("INSERT INTO OtherData (Website, DealID, ListOfCities, SideDeals, RegularPrice, OurPrice, Saved, Discount, SecondsTotal, SecondsElapsed, RemainingTime, ExpiryTime, DealSoldOut, DealEnded, DealValid, RelatedDeals) Values (@Website, @DealID, @ListOfCities, @SideDeals, @RegularPrice, @OurPrice, @Saved, @Discount, @SecondsTotal, @SecondsElapsed, @RemainingTime, @ExpiryTime, @DealSoldOut, @DealEnded, @DealValid, @RelatedDeals)", myConnection);

                SqlParameter p41 = new SqlParameter();
                p41.ParameterName = "@DealerID";
                p41.Value = getDealerID(dd.data, myConnection, writer, ref AtTheEnd);
                if ((p41.Value.ToString() == "") || (p41.Value.ToString() == "{:-("))
                    p41.Value = DBNull.Value;
                myCommandDeal.Parameters.Add(p41);

                SqlParameter p1 = new SqlParameter();
                p1.ParameterName = "@Website";
                if ((dd.data[0] == "") || (dd.data[0] == "{:-("))
                    p1.Value = DBNull.Value;
                else
                    p1.Value = dd.data[0];
                myCommandDeal.Parameters.Add(p1);

                SqlParameter p2 = new SqlParameter();
                p2.ParameterName = "@DealID";
                if ((dd.data[4] == "") || (dd.data[4] == "{:-("))
                    p2.Value = DBNull.Value;
                else
                    p2.Value = dd.data[4];
                myCommandDeal.Parameters.Add(p2);

                SqlParameter p3 = new SqlParameter();
                p3.ParameterName = "@DealLinkURL";
                if ((dd.data[5] == "") || (dd.data[5] == "{:-("))
                    p3.Value = DBNull.Value;
                else
                    p3.Value = dd.data[5];
                myCommandDeal.Parameters.Add(p3);

                SqlParameter p4 = new SqlParameter();
                p4.ParameterName = "@Category";
                if ((dd.data[6] == "") || (dd.data[6] == "{:-("))
                    p4.Value = DBNull.Value;
                else
                   p4.Value = dd.data[6];
                myCommandDeal.Parameters.Add(p4);

                SqlParameter p7 = new SqlParameter();
                p7.ParameterName = "@Image";
                if ((dd.data[9] == "") || (dd.data[9] == "{:-("))
                    p7.Value = DBNull.Value;
                else
                    p7.Value = dd.data[9];
                myCommandDeal.Parameters.Add(p7);

                SqlParameter p8 = new SqlParameter();
                p8.ParameterName = "@Description";
                if ((dd.data[10] == "") || (dd.data[10] == "{:-("))
                    p8.Value = DBNull.Value;
                else
                    p8.Value = dd.data[10];
                myCommandDeal.Parameters.Add(p8);

                SqlParameter p18 = new SqlParameter();
                p18.ParameterName = "@RegularPrice";
                if ((dd.data[20] == "") || (dd.data[20] == "{:-("))
                    p18.Value = DBNull.Value;
                else
                    p18.Value = decimal.Parse(dd.data[20]);
                myCommandDeal.Parameters.Add(p18);

                SqlParameter p19 = new SqlParameter();
                p19.ParameterName = "@OurPrice";
                if ((dd.data[21] == "") || (dd.data[21] == "{:-("))
                    p19.Value = DBNull.Value;
                else
                    p19.Value = decimal.Parse(dd.data[21]);
                myCommandDeal.Parameters.Add(p19);

                SqlParameter p20 = new SqlParameter();
                p20.ParameterName = "@Saved";
                if ((dd.data[22] == "") || (dd.data[22] == "{:-("))
                    p20.Value = DBNull.Value;
                else
                    p20.Value = decimal.Parse(dd.data[22]);
                myCommandDeal.Parameters.Add(p20);

                SqlParameter p21 = new SqlParameter();
                p21.ParameterName = "@Discount";
                if ((dd.data[23] == "") || (dd.data[23] == "{:-("))
                    p21.Value = DBNull.Value;
                else
                    p21.Value = decimal.Parse(dd.data[23]);
                myCommandDeal.Parameters.Add(p21);

                SqlParameter p22 = new SqlParameter();
                p22.ParameterName = "@PayOutAmount";
                if ((dd.data[24] == "") || (dd.data[24] == "{:-("))
                    p22.Value = DBNull.Value;
                else
                    p22.Value = decimal.Parse(dd.data[24]);
                myCommandDeal.Parameters.Add(p22);

                SqlParameter p23 = new SqlParameter();
                p23.ParameterName = "@PayOutLink";
                if ((dd.data[25] == "") || (dd.data[25] == "{:-("))
                    p23.Value = DBNull.Value;
                else
                    p23.Value = dd.data[25];
                myCommandDeal.Parameters.Add(p23);

                SqlParameter p27 = new SqlParameter();
                p27.ParameterName = "@ExpiryTime";
                if ((dd.data[29] == "") || (dd.data[29] == "{:-("))
                    p27.Value = DBNull.Value;
                else
                    p27.Value = DateTimeOffset.Parse(dd.data[29]);
                myCommandDeal.Parameters.Add(p27);

                SqlParameter p28 = new SqlParameter();
                p28.ParameterName = "@MaxNumberOfVouchers";
                if ((dd.data[30] == "") || (dd.data[30] == "{:-("))
                    p28.Value = DBNull.Value;
                else
                    p28.Value = Convert.ToInt32(dd.data[30]);
                myCommandDeal.Parameters.Add(p28);

                SqlParameter p29 = new SqlParameter();
                p29.ParameterName = "@MinNumberOfVouchers";
                if ((dd.data[31] == "") || (dd.data[31] == "{:-("))
                    p29.Value = DBNull.Value;
                else
                    p29.Value = Convert.ToInt32(dd.data[31]);
                myCommandDeal.Parameters.Add(p29);

                SqlParameter p31 = new SqlParameter();
                p31.ParameterName = "@PaidVoucherCount";
                if ((dd.data[35] == "") || (dd.data[35] == "{:-("))
                    p31.Value = DBNull.Value;
                else
                    p31.Value = Convert.ToInt32(dd.data[35]);
                myCommandDeal.Parameters.Add(p31);

                SqlParameter p32 = new SqlParameter();
                p32.ParameterName = "@Highlights";
                if ((dd.data[36] == "") || (dd.data[36] == "{:-("))
                    p32.Value = DBNull.Value;
                else
                    p32.Value = dd.data[36];
                myCommandDeal.Parameters.Add(p32);

                SqlParameter p33 = new SqlParameter();
                p33.ParameterName = "@BuyDetails";
                if ((dd.data[37] == "") || (dd.data[37] == "{:-("))
                    p33.Value = DBNull.Value;
                else
                    p33.Value = dd.data[37];
                myCommandDeal.Parameters.Add(p33);

                SqlParameter p34 = new SqlParameter();
                p34.ParameterName = "@DealText";
                if ((dd.data[38] == "") || (dd.data[38] == "{:-("))
                    p34.Value = DBNull.Value;
                else
                    p34.Value = dd.data[38];
                myCommandDeal.Parameters.Add(p34);

                SqlParameter p35 = new SqlParameter();
                p35.ParameterName = "@Reviews";
                if ((dd.data[39] == "") || (dd.data[39] == "{:-("))
                    p35.Value = DBNull.Value;
                else
                    p35.Value = dd.data[39];
                myCommandDeal.Parameters.Add(p35);

                SqlParameter p42 = new SqlParameter();
                p42.ParameterName = "@DealExtractedTime";
                p42.Value = DateTimeOffset.Parse(dd.data[1]);
                myCommandDeal.Parameters.Add(p42);

            //                SqlParameter p41 = new SqlParameter();
            //                p41.ParameterName = "@DealerID";
            //                string DealerID = dd.data[7];
            //                if (DealerID.Length > 15)
            //                    DealerID = DealerID.Substring(0, 15);
            //                p41.Value = DealerID;
            //                myCommandDeal.Parameters.Add(p41);

                SqlParameter p1a = new SqlParameter();
                p1a.ParameterName = "@Website";
                if ((dd.data[0] == "") || (dd.data[0] == "{:-("))
                    p1a.Value = DBNull.Value;
                else
                    p1a.Value = dd.data[0];
                myCommandOtherData.Parameters.Add(p1a);

                SqlParameter p2a = new SqlParameter();
                p2a.ParameterName = "@DealID";
                if ((dd.data[4] == "") || (dd.data[4] == "{:-("))
                    p2a.Value = DBNull.Value;
                else
                    p2a.Value = dd.data[4];
                myCommandOtherData.Parameters.Add(p2a);

                SqlParameter p18a = new SqlParameter();
                p18a.ParameterName = "@RegularPrice";
                if ((dd.data[20] == "") || (dd.data[20] == "{:-("))
                    p18a.Value = DBNull.Value;
                else
                    p18a.Value = decimal.Parse(dd.data[20]);
                myCommandOtherData.Parameters.Add(p18a);

                SqlParameter p19a = new SqlParameter();
                p19a.ParameterName = "@OurPrice";
                if ((dd.data[21] == "") || (dd.data[21] == "{:-("))
                    p19a.Value = DBNull.Value;
                else
                    p19a.Value = decimal.Parse(dd.data[21]);
                myCommandOtherData.Parameters.Add(p19a);

                SqlParameter p20a = new SqlParameter();
                p20a.ParameterName = "@Saved";
                if ((dd.data[22] == "") || (dd.data[22] == "{:-("))
                    p20a.Value = DBNull.Value;
                else
                    p20a.Value = decimal.Parse(dd.data[22]);
                myCommandOtherData.Parameters.Add(p20a);

                SqlParameter p21a = new SqlParameter();
                p21a.ParameterName = "@Discount";
                if ((dd.data[23] == "") || (dd.data[23] == "{:-("))
                    p21a.Value = DBNull.Value;
                else
                    p21a.Value = decimal.Parse(dd.data[23]);
                myCommandOtherData.Parameters.Add(p21a);

                SqlParameter p24 = new SqlParameter();
                p24.ParameterName = "@SecondsTotal";
                if ((dd.data[26] == "") || (dd.data[26] == "{:-("))
                    p24.Value = DBNull.Value;
                else
                    p24.Value = dd.data[26];
                myCommandOtherData.Parameters.Add(p24);

                SqlParameter p25 = new SqlParameter();
                p25.ParameterName = "@SecondsElapsed";
                if ((dd.data[27] == "") || (dd.data[27] == "{:-("))
                    p25.Value = DBNull.Value;
                else
                    p25.Value = dd.data[27];
                myCommandOtherData.Parameters.Add(p25);

                SqlParameter p26 = new SqlParameter();
                p26.ParameterName = "@RemainingTime";
                if ((dd.data[28] == "") || (dd.data[28] == "{:-("))
                    p26.Value = DBNull.Value;
                else
                    p26.Value = dd.data[28];
                myCommandOtherData.Parameters.Add(p26);

                SqlParameter p27a = new SqlParameter();
                p27a.ParameterName = "@ExpiryTime";
                if ((dd.data[29] == "") || (dd.data[29] == "{:-("))
                    p27a.Value = DBNull.Value;
                else
                    p27a.Value = DateTimeOffset.Parse(dd.data[29]);
                myCommandOtherData.Parameters.Add(p27a);

                SqlParameter p30 = new SqlParameter();
                p30.ParameterName = "@DealValid";
                if ((dd.data[34] == "") || (dd.data[34] == "{:-("))
                    p30.Value = DBNull.Value;
                else
                    p30.Value = dd.data[34];
                myCommandOtherData.Parameters.Add(p30);

                SqlParameter p36 = new SqlParameter();
                p36.ParameterName = "@ListOfCities";
                if ((dd.data[2] == "") || (dd.data[2] == "{:-("))
                    p36.Value = DBNull.Value;
                else
                    p36.Value = dd.data[2];
                myCommandOtherData.Parameters.Add(p36);

                SqlParameter p37 = new SqlParameter();
                p37.ParameterName = "@SideDeals";
                if ((dd.data[3] == "") || (dd.data[3] == "{:-("))
                    p37.Value = DBNull.Value;
                else
                    p37.Value = dd.data[3];
                myCommandOtherData.Parameters.Add(p37);

                SqlParameter p38 = new SqlParameter();
                p38.ParameterName = "@DealSoldOut";
                if ((dd.data[32] == "") || (dd.data[32] == "{:-("))
                    p38.Value = DBNull.Value;
                else
                    p38.Value = dd.data[32];
                myCommandOtherData.Parameters.Add(p38);

                SqlParameter p39 = new SqlParameter();
                p39.ParameterName = "@DealEnded";
                if ((dd.data[33] == "") || (dd.data[33] == "{:-("))
                    p39.Value = DBNull.Value;
                else
                    p39.Value = dd.data[33];
                myCommandOtherData.Parameters.Add(p39);

                SqlParameter p40 = new SqlParameter();
                p40.ParameterName = "@RelatedDeals";
                if ((dd.data[40] == "") || (dd.data[40] == "{:-("))
                    p40.Value = DBNull.Value;
                else
                    p40.Value = dd.data[40];
                myCommandOtherData.Parameters.Add(p40);

                myCommandDeal.ExecuteNonQuery();
                myCommandOtherData.ExecuteNonQuery();

                for (int j = 0; j < dd.data.Length; j++)
                {
                    line = line + dd.data[j];
                    if (j < dd.data.Length)
                        line = line + "|";
                    else
                        line = line + "\n\n";
                }
                writer.WriteLine(line);
            }

            try
            {
                myConnection.Close();
            }
            catch (Exception e)
            {
                Console.WriteLine(e.ToString());
            }

            writer.WriteLine("\n\n" + AtTheEnd);
            writer.Close();
        }
コード例 #2
0
 public keywords GetSearchString(string str, ref int c1, Tags DealData, string read)
 {
     keywords k = new keywords();
     if (c1 > str.Length)
     {
         Console.WriteLine("Mistake at the end of Search keyword (?).");
         AtTheEnd += "ERROR: Mistake at the end of Search keyword (?).";
         k.SetTimes(-1);
         return (k);
     }
     while (str[c1] == ' ')
     {
         c1 += 1;
     }
     if (str[c1] == '<')
     {
         k.SetDirection('<');
         c1 += 1;
         while (str[c1] == ' ')
         {
             c1 += 1;
         }
     }
     if ((str[c1] >= '1') && (str[c1] <= '9'))
     {
         do
         {
             k.SetTimes(10 * k.GetTimes() + (str[c1] - 48));
             c1 += 1;
         } while ((str[c1] >= '1') && (str[c1] <= '9'));
         while (str[c1] == ' ')
         {
             c1 += 1;
         }
     }
     if (str[c1] == '$')
     {
         int c2;
         c1 += 1;
         c2 = c1 + 1;
         int num = -1;
         if ((str[c2] >= '0') && (str[c2] <= '9') && (str[c1] >= '0') && (str[c1] <= '9'))
         {
             num = Convert.ToInt16(str.Substring(c1, 2));
             c1 += 1;
         }
         else if ((str[c1] >= '0') && (str[c1] <= '9'))
         {
             num = Convert.ToInt16(str.Substring(c1, 1));
         }
         else
         {
             Console.WriteLine("Mistake at variable ($).");
             AtTheEnd += "ERROR: Mistake at variable ($).";
             k.SetTimes(-1);
             return (k);
         }
         if (DealData.data[num] == "")
         {
             str = this.oneWebsite.data[num];
             if (!RecursList.Contains(num))
             {
                 RecursList.Add(num);
                 str = this.SingleDataExtraction(str, read, DealData);
                 RecursList.Remove(num);
                 DealData.data[num] = str;
             }
             else
             {
                 Console.WriteLine("ERROR: Recursivity definition!" + str);
                 AtTheEnd += "ERROR: Recursivity definition!" + str;
                 k.SetTimes(-1);
                 return (k);
             }
         }
         k.SetKeyword(DealData.data[num]);
         k.SetIndex(num);
         if (k.GetTimes() == 0) k.SetTimes(1);
         k.SetType('?');
         c1 += 1;
     }
     else if (str[c1] == '"')
     {
         int c2 = c1;
         c1 += 1;
         do
         {
             c2 = str.IndexOf('"', c2 + 1);
             if (c2 == -1)
             {
                 Console.WriteLine("Missing \" in Search keyword. Can't go on.");
                 AtTheEnd += "ERROR: Missing \" in Search keyword. Can't go on.";
                 k.SetTimes(-1);
                 return (k);
             }
         } while (str[c2 - 1] == '\\');
         k.SetKeyword(str.Substring(c1, c2 - c1));
         k.SetKeyword(k.GetKeyword().Replace("\\\"", "\""));
         if (k.GetTimes() == 0) k.SetTimes(1);
         k.SetType('?');
         c1 = c2 + 1;
     }
     else
     {
         Console.WriteLine("Error in Search tag (?) format. Probably missing \" at " + c1 + " character.");
         AtTheEnd += "ERROR: Error in Search tag (?) format. Probably missing \" at " + c1 + " character.";
         k.SetTimes(-1);
     }
     return (k);
 }
コード例 #3
0
 public keywords GetEndString(string str, ref int c1)
 {
     keywords k = new keywords();
     if (c1 > str.Length)
     {
         Console.WriteLine("Mistake at the end of End keyword (@).");
         AtTheEnd += "ERROR: Mistake at the end of End keyword (@).";
         k.SetTimes(-1);
         return (k);
     }
     while (str[c1] == ' ')
     {
         c1 += 1;
     }
     if (str[c1] == '<')
     {
         Console.WriteLine("ERROR: End delimiter can't search back.");
         AtTheEnd += "ERROR: End delimiter can't search back.";
         k.SetTimes(-1);
         return (k);
     }
     if ((str[c1] >= '1') && (str[c1] <= '9'))
     {
         do
         {
             k.SetTimes(10 * k.GetTimes() + str[c1]);
             c1 += 1;
         } while ((str[c1] >= '1') && (str[c1] <= '9'));
         while (str[c1] == ' ')
         {
             c1 += 1;
         }
     }
     if (str[c1] == '"')
     {
         int c2 = c1;
         c1 += 1;
         do
         {
             c2 = str.IndexOf('"', c2 + 1);
             if (c2 == -1)
             {
                 Console.WriteLine("Missing \" in End keyword. Can't go on.");
                 AtTheEnd += "ERROR: Missing \" in End keyword. Can't go on.";
                 k.SetTimes(-1);
                 return (k);
             }
         } while (str[c2 - 1] == '\\');
         k.SetKeyword(str.Substring(c1, c2 - c1));
         k.SetKeyword(k.GetKeyword().Replace("\\\"", "\""));
         if (k.GetTimes() == 0) k.SetTimes(1);
         k.SetType('@');
         c1 = c2 + 1;
     }
     else
     {
         Console.WriteLine("Error in End tag (@) format. Probably missing \" at " + c1 + " character.");
         AtTheEnd += "ERROR: Error in End tag (@) format. Probably missing \" at " + c1 + " character.";
         k.SetTimes(-1);
     }
     return (k);
 }