コード例 #1
0
        /// <summary>
        /// </summary>
        /// <param name="crawler">
        /// The crawler.
        /// </param>
        /// <param name="propertyBag">
        /// The property bag.
        /// </param>
        public void Process(Crawler crawler, PropertyBag propertyBag)
        {
            CultureInfo contentCulture = (CultureInfo)propertyBag["LanguageCulture"].Value;
            string cultureDisplayValue = "N/A";
            if (!contentCulture.IsNull())
            {
                cultureDisplayValue = contentCulture.DisplayName;
            }

            TextExtraction t = new TextExtraction();

            lock (this)
            {
                BIDVObject item = new BIDVObject();
                item.OriginalUrl = propertyBag.Step.Uri.ToString();

                if (!IsDuplicate(item.OriginalUrl))
                {
                    item.Title = propertyBag.Title;
                    item.StatusDescription = propertyBag.StatusDescription;
                    item.ResponseUri = propertyBag.ResponseUri.ToString();
                    item.Text = propertyBag.Text;
                    item.Depth = propertyBag.Step.Depth;
                    item.LastModified = propertyBag.LastModified;
                    item.OriginalReferrerUrl = propertyBag.OriginalReferrerUrl.ToString();
                    item.Server = propertyBag.Server;
                    string description = t.GetBetween2Words("Chi tiết văn bản", "Xem toàn màn hình", item.Text.Replace("\r","  ").Replace("\n","  "));
                    item.Summary = t.RemoveWhiteSpace(description);

                    string strNgayPhatHanh = t.GetBetween2Words("Ngày phát hành", "Số đi", item.Summary);
                    strNgayPhatHanh = strNgayPhatHanh.Replace(' ', '/').Remove(0, ("Ngày phát hành").Length);
                    string[] strSplit = { "/" };
                    int day = int.Parse(strNgayPhatHanh.Split(strSplit, StringSplitOptions.None)[1]);
                    int month = int.Parse(strNgayPhatHanh.Split(strSplit, StringSplitOptions.None)[2]);
                    int year = int.Parse(strNgayPhatHanh.Split(strSplit, StringSplitOptions.None)[3]);

                    //Clean the text field is null
                    item.Text = null;
                    item.IsToEmail = false;

                    db.AddToBIDVObjects(item);

                    item.ContentEncoding = propertyBag.ContentEncoding;
                    item.ContentType = propertyBag.ContentType;
                    //item.Length = propertyBag.Text.IsNull() ? 0 : propertyBag.Text.Length;
                    //item.CultureDisplayValue = cultureDisplayValue;
                }
            }

            try
            {
                db.SaveChanges();
            }
            catch (Exception ex)
            {
                throw new Exception(ex.Message);
            }
        }
コード例 #2
0
ファイル: DumperStep.cs プロジェクト: w3bprof/ttscrawler
        /// <summary>
        /// </summary>
        /// <param name="crawler">
        /// The crawler.
        /// </param>
        /// <param name="propertyBag">
        /// The property bag.
        /// </param>
        public void Process(Crawler crawler, PropertyBag propertyBag)
        {
            CultureInfo contentCulture = (CultureInfo)propertyBag["LanguageCulture"].Value;
            string cultureDisplayValue = "N/A";
            if (!contentCulture.IsNull())
            {
                cultureDisplayValue = contentCulture.DisplayName;
            }

            TextExtraction t = new TextExtraction();

            lock (this)
            {
                BIDVObject item = new BIDVObject();
                //item.Id = Guid.NewGuid();
                //item.Url = propertyBag.Step.Uri.ToString();

                //if (item.Url.StartsWith("http://bidvportal.vn/eDocman"))
                //{
                //    item.Title = propertyBag.Title;

                //    string strTarget = t.GetMinimumString(propertyBag.Text, "Chi tiết văn bản", "Nội dung văn bản");
                //    item.Text = strTarget;

                //    string strNgayPhatHanh = t.GetMinimumString(strTarget, "Ngày phát hành", "Số đi");
                //    item.NgayPhatHanh = strNgayPhatHanh.Replace(' ','/');

                //    string strSubject = t.GetMinimumString(strTarget, "Trích yếu", "Độ khẩn");
                //    //item.Subject = strSubject;

                //    //item.ContentEncoding = propertyBag.ContentEncoding;
                //    //item.ContentType = propertyBag.ContentType;
                //    //item.Length = propertyBag.Text.IsNull() ? 0 : propertyBag.Text.Length;
                //    item.Depth = propertyBag.Step.Depth;
                //    //item.CultureDisplayValue = cultureDisplayValue;

                //    string[] strSplit = { "/" };
                //    int day = int.Parse(item.NgayPhatHanh.Split(strSplit, StringSplitOptions.None)[0]);
                //    int month = int.Parse(item.NgayPhatHanh.Split(strSplit, StringSplitOptions.None)[1]);
                //    int year = int.Parse(item.NgayPhatHanh.Split(strSplit, StringSplitOptions.None)[2]);

                //    if ((DateTime.Now.Year == year) && (DateTime.Now.Month == month) && (DateTime.Now.Day == day))
                //    {
                //        //db.AddToItems(item);
                //    }
                //}
            }

            try
            {
                db.SaveChanges();
            }
            catch (Exception ex)
            {
                Console.WriteLine("=====================================================");
                Console.WriteLine(ex.Message);
            }
        }
コード例 #3
0
 /// <summary>
 /// Create a new BIDVObject object.
 /// </summary>
 /// <param name="id">Initial value of the Id property.</param>
 public static BIDVObject CreateBIDVObject(global::System.Int32 id)
 {
     BIDVObject bIDVObject = new BIDVObject();
     bIDVObject.Id = id;
     return bIDVObject;
 }
コード例 #4
0
 /// <summary>
 /// Deprecated Method for adding a new object to the BIDVObjects EntitySet. Consider using the .Add method of the associated ObjectSet&lt;T&gt; property instead.
 /// </summary>
 public void AddToBIDVObjects(BIDVObject bIDVObject)
 {
     base.AddObject("BIDVObjects", bIDVObject);
 }