Exemplo n.º 1
0
        private static void EngWords()
        {
            var t1   = DateTime.Now;
            var file = ConfigurationManager.AppSettings["WordsFile"];

            trie = new Trie(File.ReadAllLines(file));
            var t = new List <string>();

            result      = new Words(t);
            result.Tree = trie;
            var p = 10000;

            using (var db = new DomainsEntities())
            {
                db.Database.CommandTimeout = 0;
                var com = "TRUNCATE TABLE [dbo].[tbSplit]";
                db.Database.ExecuteSqlCommand(com);

                var m = db.tbNewDomains.Count() / p;
                int i = 0;
                while (SplitNames(i++) && i < m + 1)
                {
                    GC.Collect(0, GCCollectionMode.Forced, true);
                }

                var t2 = Math.Round((decimal)DateTime.Now.Subtract(t1).TotalSeconds).ToString();
                Console.WriteLine("Names split is executed in {0} sec", t2);

                // prepare keywords params
                com = "EXEC [dbo].[CreateDomainsTableKeys]";
                db.Database.ExecuteSqlCommand(com);
                t2 = Math.Round((decimal)DateTime.Now.Subtract(t1).TotalSeconds).ToString();
                Console.WriteLine("Keywords searches are filled {0} sec", t2);
            }
        }
Exemplo n.º 2
0
 private static void CreateMasterTable()
 {
     using (var db = new DomainsEntities())
     {
         var t1 = DateTime.Now;
         db.Database.CommandTimeout = 0;
         var cnt = db.Database.SqlQuery <Int32>("EXEC [dbo].[CreateDomainsTable]").Single();
         var t2  = Math.Round((decimal)DateTime.Now.Subtract(t1).TotalSeconds).ToString();
         Console.WriteLine("New table ({0} records) is normalized in {1} sec", cnt.ToString(), t2);
     }
 }
Exemplo n.º 3
0
        private static bool SplitNames(int chunkNum)
        {
            using (var db = new DomainsEntities())
            {
                db.Configuration.AutoDetectChangesEnabled = false;
                db.Configuration.ValidateOnSaveEnabled    = false;
                var cnt = 0;
                var p   = 10000;
                //var m = db.tbDomainsFromSrcs.Count() / p;

                var t0   = DateTime.Now;
                var list = db.tbNewDomains.Where(i => i.Id >= chunkNum * p && i.Id < (chunkNum + 1) * p);
                //var t1 = Math.Round((decimal)DateTime.Now.Subtract(t0).TotalSeconds).ToString();
                //Console.WriteLine("list is created ({0})", t1);
                //t0 = DateTime.Now;

                //
                var dlist = new List <tbSplit>();
                foreach (var d in list)
                {
                    result.FindAndSplit(d.Name);
                    var dn = new tbSplit();
                    dn.DomID     = d.Id;
                    dn.NameShown = result.FindBestIncSeparators();
                    dn.NameWords = result.SplitKeywords(dn.NameShown);
                    dn.WordCount = result.BestItemCount;
                    dlist.Add(dn);
                    cnt++;
                }
                try
                {
                    //db.tbSplits.AddRange(dlist);
                    db.BulkInsert(dlist);
                    //t1 = Math.Round((decimal)DateTime.Now.Subtract(t0).TotalSeconds).ToString();
                    //Console.WriteLine("List is inserted ({0})", t1);
                    //t0 = DateTime.Now;

                    db.SaveChanges();
                    //t1 = Math.Round((decimal)DateTime.Now.Subtract(t0).TotalSeconds).ToString();
                    //Console.WriteLine("List is saved ({0})", t1);
                    //t0 = DateTime.Now;
                }
                catch (Exception ex)
                {
                    var x = 1;
                }
                var t1 = Math.Round((decimal)DateTime.Now.Subtract(t0).TotalSeconds).ToString();
                Console.WriteLine("{0}-th chunk is processed ({1})", (chunkNum + 1).ToString(), t1);
            }
            return(true);
        }
Exemplo n.º 4
0
        public static bool GetPage(DomainsEntities db, string urlStr, int n, int minPR)
        {
            try
            {
                var       t1        = DateTime.Now;
                var       test      = true;
                WebClient webClient = new WebClient();
                var       p         = Convert.ToString((n - 1) * 25);

                var    url  = string.Format(urlStr, p);
                string page = webClient.DownloadString(url);

                HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
                doc.LoadHtml(page);

                List <List <string> > table = doc.DocumentNode.SelectSingleNode("//table[@class='base1']") // responsive
                                              .Descendants("tr")
                                              .Skip(1)
                                              .Where(tr => tr.Elements("td").Count() > 1)
                                              .Select(tr => tr.Elements("td").Select(td => td.FirstChild.InnerText.Trim()).ToList())
                                              .ToList();
                var s = string.Empty;

                foreach (var item in table)
                {
                    s = string.Format("{0}{1},{2}\n", s, item[0], item[1]);
                    if (int.Parse(item[1]) < minPR)
                    {
                        test = false;
                        break;
                    }
                    else
                    {
                        WriteToDB(db, item[0].ToLower(), item[1], item[2], item[4], item[6]);
                    }
                }
                db.SaveChanges();
                var tm = Math.Floor(DateTime.Now.Subtract(t1).TotalMilliseconds).ToString();
                Console.WriteLine("{0} is processed in {1} ms", url, tm.ToString());
                return(test);
            }
            catch (Exception ex)
            {
                Console.Write(ex.Message + '\n');
                return(false);
            }
        }
Exemplo n.º 5
0
        private static void WriteToDB(DomainsEntities db, string dom, string prStr, string blStr, string yStr, string dmoz)
        {
            var test = db.tbGooglePRs.Where(i => i.Domain == dom).FirstOrDefault();

            if (test == null)
            {
                try
                {
                    var tb = new tbGooglePR();
                    tb.Domain = dom;

                    bool res = false;
                    int  temp;

                    res = int.TryParse(prStr, out temp);
                    if (res)
                    {
                        tb.GooglePR = Convert.ToInt32(temp);
                    }

                    res = int.TryParse(yStr, out temp);
                    if (res)
                    {
                        tb.Year    = Convert.ToInt32(temp);
                        tb.Archive = true;
                    }

                    res = int.TryParse(blStr, out temp);
                    if (res && temp != 0)
                    {
                        tb.BackLinks = Convert.ToInt32(temp);
                    }

                    if (dmoz == "Yes")
                    {
                        tb.Dmoz = true;
                    }

                    db.tbGooglePRs.Add(tb);
                }
                catch (Exception ex)
                {
                }
            }
        }
Exemplo n.º 6
0
        private static void ImportGPR()
        {
            var    t1 = DateTime.Now;
            string urlStr;

            var  delay = 1000 * int.Parse(ConfigurationManager.AppSettings["AverageDelay"]);
            var  p     = int.Parse(ConfigurationManager.AppSettings["MinPR"]);
            bool test  = true;
            var  n     = 1;
            var  m     = 0;

            using (var db = new DomainsEntities())
            {
                var com = "TRUNCATE TABLE [dbo].[tbGooglePR]";
                db.Database.ExecuteSqlCommand(com);
                using (StreamReader reader = new StreamReader(gprSource))
                {
                    while ((urlStr = reader.ReadLine()) != null)
                    {
                        n = 1;
                        while (test)
                        {
                            test = GooglePR.GetPage(db, urlStr, n, p);
                            Random r   = new Random();
                            int    sec = r.Next(delay);
                            Thread.Sleep(sec);
                            n++;
                        }
                        m++;
                    }
                }
                var cnt = db.tbGooglePRs.Count();
                var tm1 = Math.Floor(DateTime.Now.Subtract(t1).TotalSeconds).ToString();
                Console.WriteLine("Completed all in {0} sec ({1} sources, {2} records)", tm1, Convert.ToString(m), cnt.ToString());
            }
        }
Exemplo n.º 7
0
        public int LoadCsvDataIntoSqlServer(string fileName, bool withTruncate)
        {
            using (var db = new DomainsEntities())
            {
                var srcs = db.tbSrcs.ToDictionary(g => g.Name, g => g.Id);
                var sts  = db.tbStatus.ToDictionary(g => g.Name, g => g.Id);

                var createdCount = 0;
                var fn           = string.Format("{0}\\{1}", _outputFolder, fileName);
                using (var textFieldParser = new TextFieldParser(fn))
                {
                    textFieldParser.TextFieldType             = FieldType.Delimited;
                    textFieldParser.Delimiters                = new[] { "," };
                    textFieldParser.HasFieldsEnclosedInQuotes = true;

                    var dataTable = new DataTable("tbDomainsFromSrc");

                    // Add the columns in the temp table
                    dataTable.Columns.Add("Domain");
                    dataTable.Columns.Add("Price");
                    dataTable.Columns.Add("ExpDate");//, typeof(DateTime));
                    dataTable.Columns.Add("Source", typeof(Int32));
                    dataTable.Columns.Add("IdInSource");
                    dataTable.Columns.Add("Age");
                    dataTable.Columns.Add("Status", typeof(Int32));
                    dataTable.Columns.Add("SrcFile");
                    //
                    dataTable.Columns.Add("Name");
                    dataTable.Columns.Add("Tld");
                    dataTable.Columns.Add("Length");


                    using (var sqlConnection = new SqlConnection(_connectionString))
                    {
                        sqlConnection.Open();

                        if (withTruncate)
                        {
                            // Truncate the live table
                            using (var sqlCommand = new SqlCommand(_truncateLiveTableCommandText, sqlConnection))
                            {
                                sqlCommand.ExecuteNonQuery();
                            }
                        }

                        // Create the bulk copy object
                        var sqlBulkCopy = new SqlBulkCopy(sqlConnection)
                        {
                            DestinationTableName = "tbDomainsFromSrc"
                        };

                        // Setup the column mappings, anything ommitted is skipped
                        sqlBulkCopy.ColumnMappings.Add("Domain", "Domain");
                        sqlBulkCopy.ColumnMappings.Add("Price", "Price");
                        sqlBulkCopy.ColumnMappings.Add("ExpDate", "ExpDate");
                        sqlBulkCopy.ColumnMappings.Add("Source", "Source");
                        sqlBulkCopy.ColumnMappings.Add("IdInSource", "IdInSource");
                        sqlBulkCopy.ColumnMappings.Add("Age", "Age");
                        sqlBulkCopy.ColumnMappings.Add("SrcFile", "SrcFile");
                        sqlBulkCopy.ColumnMappings.Add("Status", "Status");
                        //
                        sqlBulkCopy.ColumnMappings.Add("Name", "Name");
                        sqlBulkCopy.ColumnMappings.Add("Tld", "Tld");
                        sqlBulkCopy.ColumnMappings.Add("Length", "Length");


                        // Loop through the CSV and load each set of 100,000 records into a DataTable
                        // Then send it to the LiveTable
                        while (!textFieldParser.EndOfData)
                        {
                            string[] temp = textFieldParser.ReadFields();
                            if (createdCount > 0)
                            {
                                temp[0] = temp[0].ToLower();
                                if (temp.Length > 7)
                                {
                                    temp = MergeItemsInArray(temp, 1);// for price with comma
                                }

                                // add src
                                temp = temp.AddItemToArray(fileName.Replace(".csv", ""));
                                // prepare date
                                temp[2] = GetDateValue(temp[2]);

                                // src & status
                                temp[3] = Convert.ToString(srcs[temp[3]]);
                                temp[6] = Convert.ToString(sts[temp[6]]);

                                // 3 calc fields
                                var name = temp[0].Split('.')[0];
                                var tld  = temp[0].Substring(temp[0].IndexOf('.') + 1);
                                temp = temp.AddItemToArray(name);
                                temp = temp.AddItemToArray(tld);
                                temp = temp.AddItemToArray(name.Length.ToString());

                                //temp[temp.Length] = fileName;
                                dataTable.Rows.Add(temp);
                                //dataTable.Rows.Add(textFieldParser.ReadFields());
                            }
                            createdCount++;

                            if (createdCount % _batchSize == 0 && createdCount > 0)
                            {
                                InsertDataTable(sqlBulkCopy, sqlConnection, dataTable);
                                //break;
                            }
                        }

                        // Don't forget to send the last batch under 100,000
                        InsertDataTable(sqlBulkCopy, sqlConnection, dataTable);
                        sqlConnection.Close();
                        return(createdCount); // dataTable.Rows.Count;
                    }
                }
            }
        }