Пример #1
0
        private void GetNodes(List <Leaf> nodes)
        {
            loger.Error("GetNodes");
            if (nodes != null && nodes.Count > 0)
            {
                foreach (var node in nodes)
                {
                    if (stop)
                    {
                        return;
                    }
                    bool loaded = DAL.TreeManager.IsLoaded(node.site_id, node.parent_site_id);
                    //using (var context = new dataEntities())
                    //    loaded = context.data.Any(d => d.site_id == node.site_id &&
                    //        (d.parent_site_id == node.parent_site_id || (node.parent_site_id == null && d.parent_site_id == null)) &&
                    //        d.is_loaded == true);
                    if (loaded)
                    {
                        log(node.site_id + " - done");
                        continue;
                    }

                    string error;
                    string html;
                    html = WebPage.LoadPage(String.Format(urlPattern, node.site_id), Encoding.UTF8, out error);
                    if (String.IsNullOrWhiteSpace(error))
                    {
                        if (!String.IsNullOrWhiteSpace(html))
                        {
                            foreach (var item in html.Split('$'))
                            {
                                if (!String.IsNullOrWhiteSpace(item))
                                {
                                    if (stop)
                                    {
                                        return;
                                    }
                                    string[] obj = item.Split(':');
                                    long     id;
                                    if (long.TryParse(obj[0], out id))
                                    {
                                        var node_name = System.Web.HttpUtility.HtmlDecode(obj[3]);
                                        var n         = new Leaf()
                                        {
                                            site_id = obj[0], Name = node_name, parent_site_id = node.site_id, parent_id = node.id
                                        };
                                        node.Nodes.Add(n);
                                        using (var context = new dataEntities())
                                        {
                                            //var d_indb = context.data.Where(t => t.site_id == n.site_id && t.parent_site_id == n.parent_site_id && t.parent_id == n.parent_id).FirstOrDefault();
                                            var d_indb = DAL.TreeManager.GetId(n.site_id, n.parent_site_id, (long)n.parent_id);
                                            if (d_indb == -1)
                                            {
                                                var d = new data()
                                                {
                                                    id             = -1,
                                                    site_id        = n.site_id,
                                                    name           = n.Name,
                                                    parent_id      = n.parent_id,
                                                    parent_site_id = n.parent_site_id,
                                                    position       = node.Nodes.Count,
                                                    is_doc         = false,
                                                    is_loaded      = false
                                                };
                                                context.data.AddObject(d);
                                                context.SaveChanges();
                                                n.id = d.id;
                                            }
                                            else
                                            {
                                                n.id = d_indb;
                                            }
                                        }
                                    }
                                }
                            }
                            ;
                            GetNodes(node.Nodes);
                        }
                        else //if doc
                        {
                            if (stop)
                            {
                                return;
                            }
                            string urld = "http://online.zakon.kz/Search.aspx";
                            int    max  = 30;

                            int maxThread = 5;

                            var taskList = new List <DownloadTask>();
                            int posiont  = 1;

                            for (int i = 1; i <= max; i++)
                            {
                                html = WebPage.LoadPage(urld, CreateNVC22(node.site_id, i.ToString()), Encoding.UTF8, out error);
                                Match m = Regex.Match(html, @"<a[\s]href='http://online\.zakon\.kz/Document/\?doc_id=([\d]*?)'.*?>(.*?)<", RegexOptions.Singleline);
                                if (!m.Success)
                                {
                                    break;
                                }
                                while (m.Success && m.Groups.Count > 1)
                                {
                                    if (stop)
                                    {
                                        return;
                                    }
                                    var dt = (new DownloadTask(m.Groups[1].ToString(), m.Groups[2].ToString(), node.site_id, node.id, posiont, log));
                                    dt.Execute();
                                    GC.Collect();
                                    //  Queue.Enqueue(dt);
                                    //taskList.Add(dt);
                                    posiont++;
                                    m = m.NextMatch();
                                }
                            }

                            //var tList = new List<Thread>();
                            //foreach (var item in taskList)
                            //{
                            //    var t = new Thread(item.Execute);
                            //    tList.Add(t);
                            //}


                            //while (tList.Count > 0)
                            //{
                            //    var qList = new List<Thread>();
                            //    for (int i = 0; i < Math.Min(maxThread, tList.Count); i++)
                            //    {
                            //        qList.Add(tList[i]);
                            //        tList[i].Start();
                            //    }

                            //    foreach (var q in qList)
                            //    {
                            //        q.Join();
                            //        tList.Remove(q);
                            //    }


                            //}



                            //All related files dowloaded, let's try to set root like loaded
                            using (var context = new dataEntities())
                            {
                                var nnn = context.data.Where(d => d.id == node.id).FirstOrDefault();
                                if (nnn != null)
                                {
                                    nnn.is_loaded = true;
                                    context.SaveChanges();
                                }
                            }
                        }
                    }
                    else
                    {
                        log("Node:" + error);
                    }
                }
            }
        }
Пример #2
0
        public void Execute()
        {
            var d_indb = DAL.TreeManager.GetId(doc_id, parent_site_id, parent_id);

            if (d_indb > 0)
            {
                log("doc_id =" + doc_id + " redy");
                return;
            }



            Stopwatch sw = Stopwatch.StartNew();

            WebPage.DownloadFile(url, file_name, callback);
            sw.Stop();
            var timeLoad = sw.ElapsedMilliseconds / 1000;
            var fileSize = (new FileInfo(file_name).Length);

            if (fileSize > 0)
            {
                using (var context = new dataEntities())
                {
                    //var d_indb = context.data.Where(t => t.site_id == doc_id && t.parent_site_id == parent_site_id && parent_id == this.parent_id).FirstOrDefault();
                    string data_rtf, data_text;

                    sw.Reset();
                    sw.Start();
                    string doc_info;
                    GetText(Path.GetFullPath(file_name), out data_rtf, out data_text, out doc_info);
                    sw.Stop();
                    var time_text = sw.ElapsedMilliseconds / 1000;

                    sw.Reset();
                    sw.Start();
                    var d = new data()
                    {
                        id             = -1,
                        site_id        = this.doc_id,
                        name           = doc_name,
                        parent_id      = parent_id,
                        parent_site_id = parent_site_id,
                        position       = position,
                        is_doc         = true,
                        is_loaded      = true,
                        data_rtf       = data_rtf,
                        data_text      = data_text
                    };
                    context.data.AddObject(d);
                    context.SaveChanges();
                    sw.Stop();
                    var time_save = sw.ElapsedMilliseconds / 1000;


                    var milliSec = timeLoad + time_text + time_save;
                    var str      = String.Empty;
                    if (milliSec > 3)
                    {
                        str += "all:" + milliSec + "\t\tload:" + timeLoad + "\t\ttext:" + time_text + "\t\tLENGTH:" + fileSize + "(" + doc_info + ")";
                    }
                    log("doc_id =" + doc_id + "\t" + str);
                }
            }
            else
            {
                log("Leaf: doc file length is 0");
            }
        }
Пример #3
0
 /// <summary>
 /// Deprecated Method for adding a new object to the data EntitySet. Consider using the .Add method of the associated ObjectSet&lt;T&gt; property instead.
 /// </summary>
 public void AddTodata(data data)
 {
     base.AddObject("data", data);
 }