/// <summary> /// </summary> /// <param name="crawler"> /// The crawler. /// </param> /// <param name="propertyBag"> /// The property bag. /// </param> public void Process(Crawler crawler, PropertyBag propertyBag) { CultureInfo contentCulture = (CultureInfo)propertyBag["LanguageCulture"].Value; string cultureDisplayValue = "N/A"; if (!contentCulture.IsNull()) { cultureDisplayValue = contentCulture.DisplayName; } TextExtraction t = new TextExtraction(); lock (this) { BIDVObject item = new BIDVObject(); item.OriginalUrl = propertyBag.Step.Uri.ToString(); if (!IsDuplicate(item.OriginalUrl)) { item.Title = propertyBag.Title; item.StatusDescription = propertyBag.StatusDescription; item.ResponseUri = propertyBag.ResponseUri.ToString(); item.Text = propertyBag.Text; item.Depth = propertyBag.Step.Depth; item.LastModified = propertyBag.LastModified; item.OriginalReferrerUrl = propertyBag.OriginalReferrerUrl.ToString(); item.Server = propertyBag.Server; string description = t.GetBetween2Words("Chi tiết văn bản", "Xem toàn màn hình", item.Text.Replace("\r"," ").Replace("\n"," ")); item.Summary = t.RemoveWhiteSpace(description); string strNgayPhatHanh = t.GetBetween2Words("Ngày phát hành", "Số đi", item.Summary); strNgayPhatHanh = strNgayPhatHanh.Replace(' ', '/').Remove(0, ("Ngày phát hành").Length); string[] strSplit = { "/" }; int day = int.Parse(strNgayPhatHanh.Split(strSplit, StringSplitOptions.None)[1]); int month = int.Parse(strNgayPhatHanh.Split(strSplit, StringSplitOptions.None)[2]); int year = int.Parse(strNgayPhatHanh.Split(strSplit, StringSplitOptions.None)[3]); //Clean the text field is null item.Text = null; item.IsToEmail = false; db.AddToBIDVObjects(item); item.ContentEncoding = propertyBag.ContentEncoding; item.ContentType = propertyBag.ContentType; //item.Length = propertyBag.Text.IsNull() ? 0 : propertyBag.Text.Length; //item.CultureDisplayValue = cultureDisplayValue; } } try { db.SaveChanges(); } catch (Exception ex) { throw new Exception(ex.Message); } }
/// <summary> /// </summary> /// <param name="crawler"> /// The crawler. /// </param> /// <param name="propertyBag"> /// The property bag. /// </param> public void Process(Crawler crawler, PropertyBag propertyBag) { CultureInfo contentCulture = (CultureInfo)propertyBag["LanguageCulture"].Value; string cultureDisplayValue = "N/A"; if (!contentCulture.IsNull()) { cultureDisplayValue = contentCulture.DisplayName; } TextExtraction t = new TextExtraction(); lock (this) { BIDVObject item = new BIDVObject(); //item.Id = Guid.NewGuid(); //item.Url = propertyBag.Step.Uri.ToString(); //if (item.Url.StartsWith("http://bidvportal.vn/eDocman")) //{ // item.Title = propertyBag.Title; // string strTarget = t.GetMinimumString(propertyBag.Text, "Chi tiết văn bản", "Nội dung văn bản"); // item.Text = strTarget; // string strNgayPhatHanh = t.GetMinimumString(strTarget, "Ngày phát hành", "Số đi"); // item.NgayPhatHanh = strNgayPhatHanh.Replace(' ','/'); // string strSubject = t.GetMinimumString(strTarget, "Trích yếu", "Độ khẩn"); // //item.Subject = strSubject; // //item.ContentEncoding = propertyBag.ContentEncoding; // //item.ContentType = propertyBag.ContentType; // //item.Length = propertyBag.Text.IsNull() ? 0 : propertyBag.Text.Length; // item.Depth = propertyBag.Step.Depth; // //item.CultureDisplayValue = cultureDisplayValue; // string[] strSplit = { "/" }; // int day = int.Parse(item.NgayPhatHanh.Split(strSplit, StringSplitOptions.None)[0]); // int month = int.Parse(item.NgayPhatHanh.Split(strSplit, StringSplitOptions.None)[1]); // int year = int.Parse(item.NgayPhatHanh.Split(strSplit, StringSplitOptions.None)[2]); // if ((DateTime.Now.Year == year) && (DateTime.Now.Month == month) && (DateTime.Now.Day == day)) // { // //db.AddToItems(item); // } //} } try { db.SaveChanges(); } catch (Exception ex) { Console.WriteLine("====================================================="); Console.WriteLine(ex.Message); } }
/// <summary> /// Create a new BIDVObject object. /// </summary> /// <param name="id">Initial value of the Id property.</param> public static BIDVObject CreateBIDVObject(global::System.Int32 id) { BIDVObject bIDVObject = new BIDVObject(); bIDVObject.Id = id; return bIDVObject; }
/// <summary> /// Deprecated Method for adding a new object to the BIDVObjects EntitySet. Consider using the .Add method of the associated ObjectSet<T> property instead. /// </summary> public void AddToBIDVObjects(BIDVObject bIDVObject) { base.AddObject("BIDVObjects", bIDVObject); }