public void RunCrawler() { Crawler crawler = new Crawler(); crawler.SetStatus = new Crawler.StatusOutput(AddOutput); List<URLInfo> badUrls; List<URLInfo> urlsToCrawl = new List<URLInfo>(); URLInfo info = new URLInfo(); info.URL = txtSite.Text; info.RootURL = txtSite.Text; info.LastCrawled = DateTime.MinValue; urlsToCrawl.Add(info); List<URLInfo> urls = crawler.Crawl(urlsToCrawl, out badUrls, Int32.MaxValue); AddOutput(urls.Count + " URLs found. " + badUrls.Count + " bad URLs.\n\r"); String path = Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData) + "\\CrawlMapper\\sitemap.txt"; TextWriter writer = new StreamWriter(path); foreach (URLInfo str in urls) { writer.WriteLine(str.URL); } writer.Flush(); writer.Close(); path = Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData) + "\\CrawlMapper\\badurls.txt"; writer = new StreamWriter(path); foreach (URLInfo str in badUrls) { writer.WriteLine(str.URL); } writer.Flush(); writer.Close(); SetStatus("Crawl completed. Check sitemap.txt in the app data folder.\r\n"); Process.Start(Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData) + "\\CrawlMapper"); EnableGoButton(); }
protected void Page_Load(object sender, EventArgs e) { if (!this.Page.IsPostBack) { int queryString = RequestHelper.GetQueryString <int>("ID"); if (queryString != -2147483648) { base.CheckAdminPower("ReadURLRewriter", PowerCheckType.Single); URLInfo info = URLClass.ReadURL(queryString); this.RealPath.Text = info.RealPath; this.VitualPath.Text = info.VitualPath; if (info.IsEffect) { int num2 = 1; this.IsEffect.Text = num2.ToString(); } else { this.IsEffect.Text = 0.ToString(); } } } }
public ActionResult RedirectToURL(string shortCode) { try { URLInfo info = new URLInfo(); ProcessURLController processURL = new ProcessURLController(); info = processURL.GetURLInfo(shortCode, true); if (info != null && !string.IsNullOrEmpty(info.LongURL)) { return(Redirect(info.LongURL)); } else { return(Redirect(ConfigurationManager.AppSettings["redirecturl"].ToString())); } } catch (Exception ex) { ServiceLocator.ErrorLogger("NEW ERROR LINE : URLShortner/RedirectToURL | " + DateTime.Now + " | Error: " + ex.ToString()); return(Redirect(ConfigurationManager.AppSettings["redirecturl"].ToString())); } }
/// <summary> /// Opens a new browser tab when a favorite is clicked /// </summary> /// <param name="sender"></param> /// <param name="e"></param> private void tab_OnFavClick(object sender, URLInfo e) { this.Get(TabTypes.Browser, e.URL); }
internal static URLInfo BuildUrl(string action, string eventName, double revenue, string currency, string refId, List <MATEventItem> eventItems, Parameters parameters) { StringBuilder url = new StringBuilder("https://"); url.Append(Uri.EscapeUriString(parameters.advertiserId)).Append("."); if (parameters.DebugMode) { url.Append("debug."); } url.Append(DOMAIN).Append("/serve?sdk=").Append(SDK_TYPE).Append("&ver=").Append(SDK_VERSION); url.Append("&advertiser_id=").Append(Uri.EscapeUriString(parameters.advertiserId)); url.Append("&mat_id=").Append(Uri.EscapeUriString(parameters.MatId)); url.Append("&action=").Append(Uri.EscapeUriString(action)); url.Append("&package_name=").Append(Uri.EscapeUriString(parameters.PackageName)); url.Append("&transaction_id=").Append(Guid.NewGuid().ToString().ToUpper()); // Append event name/ID for events if (action.Equals("conversion")) { long value; if (long.TryParse(eventName, out value)) { url.Append("&site_event_id=").Append(eventName); } else { url.Append("&site_event_name=").Append(Uri.EscapeUriString(eventName)); } } // Append open log id if (parameters.OpenLogId != null) { url.Append("&open_log_id=").Append(Uri.EscapeUriString(parameters.OpenLogId)); } if (parameters.LastOpenLogId != null) { url.Append("&last_open_log_id=").Append(Uri.EscapeUriString(parameters.LastOpenLogId)); } if (parameters.AllowDuplicates) { url.Append("&skip_dup=1"); } if (parameters.DebugMode) { url.Append("&debug=1"); } if (parameters.ExistingUser) { url.Append("&existing_user=1"); } // Construct encrypted data params and append to url StringBuilder data = new StringBuilder(); // Add UNIX timestamp as system date long timestamp = UnixTimestamp(); data.Append("&system_date=").Append(timestamp.ToString()); data.Append("&app_name=").Append(Uri.EscapeUriString(parameters.AppName)); data.Append("&app_version=").Append(Uri.EscapeUriString(parameters.AppVersion)); data.Append("&device_brand=").Append(Uri.EscapeUriString(parameters.DeviceBrand)); data.Append("&device_model=").Append(Uri.EscapeUriString(parameters.DeviceModel)); data.Append("&device_carrier=").Append(Uri.EscapeUriString(parameters.DeviceCarrier)); data.Append("&device_screen_size=").Append(Uri.EscapeUriString(parameters.DeviceScreenSize)); data.Append("&os_id=").Append(Uri.EscapeUriString(parameters.DeviceUniqueId)); data.Append("&os_version=").Append(Uri.EscapeUriString(parameters.OSVersion)); if (parameters.AppAdTracking) { data.Append("&app_ad_tracking=1"); } else { data.Append("&app_ad_tracking=0"); } if (revenue > 0) { data.Append("&revenue=").Append(Uri.EscapeUriString(revenue.ToString())); } if (currency != null) { data.Append("¤cy_code=").Append(Uri.EscapeUriString(currency)); } if (refId != null) { data.Append("&advertiser_ref_id=").Append(Uri.EscapeUriString(refId)); } if (parameters.Age > 0) { data.Append("&age=").Append(Uri.EscapeUriString(parameters.Age.ToString())); } data.Append("&altitude=").Append(Uri.EscapeUriString(parameters.Altitude.ToString())); if (parameters.EventContentType != null) { data.Append("&content_type=").Append(Uri.EscapeUriString(parameters.EventContentType)); } if (parameters.EventContentId != null) { data.Append("&content_id=").Append(Uri.EscapeUriString(parameters.EventContentId)); } data.Append("&level=").Append(parameters.EventLevel.ToString()); data.Append("&quantity=").Append(parameters.EventQuantity.ToString()); if (parameters.EventSearchString != null) { data.Append("&search_string=").Append(Uri.EscapeUriString(parameters.EventSearchString)); } data.Append("&rating=").Append(Uri.EscapeUriString(parameters.EventRating.ToString())); if (parameters.EventDate1 != null) { data.Append("&date1=").Append(Uri.EscapeUriString(UnixTimestamp(parameters.EventDate1).ToString())); } if (parameters.EventDate2 != null) { data.Append("&date2=").Append(Uri.EscapeUriString(UnixTimestamp(parameters.EventDate2).ToString())); } if (parameters.EventAttribute1 != null) { data.Append("&attribute_sub1=").Append(Uri.EscapeUriString(parameters.EventAttribute1)); } if (parameters.EventAttribute2 != null) { data.Append("&attribute_sub2=").Append(Uri.EscapeUriString(parameters.EventAttribute2)); } if (parameters.EventAttribute3 != null) { data.Append("&attribute_sub3=").Append(Uri.EscapeUriString(parameters.EventAttribute3)); } if (parameters.EventAttribute4 != null) { data.Append("&attribute_sub4=").Append(Uri.EscapeUriString(parameters.EventAttribute4)); } if (parameters.EventAttribute5 != null) { data.Append("&attribute_sub5=").Append(Uri.EscapeUriString(parameters.EventAttribute5)); } if (parameters.FacebookUserId != null) { data.Append("&facebook_user_id=").Append(Uri.EscapeUriString(parameters.FacebookUserId)); } if (parameters.Gender != MATGender.NONE) { data.Append("&gender=").Append(Uri.EscapeUriString(parameters.Gender.ToString())); } if (parameters.GoogleUserId != null) { data.Append("&google_user_id=").Append(Uri.EscapeUriString(parameters.GoogleUserId)); } if (parameters.IsPayingUser != false) { data.Append("&is_paying_user=1"); } if (parameters.Latitude != 0) { data.Append("&latitude=").Append(Uri.EscapeUriString(parameters.Latitude.ToString())); } if (parameters.Longitude != 0) { data.Append("&longitude=").Append(Uri.EscapeUriString(parameters.Longitude.ToString())); } if (parameters.TwitterUserId != null) { data.Append("&twitter_user_id=").Append(Uri.EscapeUriString(parameters.TwitterUserId)); } if (parameters.UserEmail != null) { data.Append("&user_email=").Append(Uri.EscapeUriString(parameters.UserEmail)); } if (parameters.UserId != null) { data.Append("&user_id=").Append(Uri.EscapeUriString(parameters.UserId)); } if (parameters.UserName != null) { data.Append("&user_name=").Append(Uri.EscapeUriString(parameters.UserName)); } if (parameters.WindowsAid != null) { data.Append("&windows_aid=").Append(Uri.EscapeUriString(parameters.WindowsAid)); } // Add event items to url as json string if (eventItems != null) { data.Append("&site_event_items=").Append(Uri.EscapeUriString(JsonConvert.SerializeObject(eventItems))); } if (parameters.matRequest != null) { parameters.matRequest.ParamsToBeEncrypted(data.ToString()); } // Encrypt data string as byte array /*byte[] encryptedDataBytes = urlEncrypter.Encrypt(data.ToString()); * * String decrypted = urlEncrypter.Decrypt(encryptedDataBytes); * * // Convert byte[] to hex string to append to url * string dataStr = BitConverter.ToString(encryptedDataBytes).Replace("-", string.Empty);*/ //string dataStr = Encryption.ByteArrayToString(encryptedDataBytes); url.Append("&data=").Append(data.ToString()); url.Append("&response_format=json"); if (parameters.matRequest != null) { parameters.matRequest.ConstructedRequest(url.ToString()); } URLInfo newURL = new URLInfo(); newURL.url = url.ToString(); newURL.retryAttempt = 0; return(newURL); }
internal static URLInfo BuildUrl(string action, string eventName, double revenue, string currency, string refId, List<MATEventItem> eventItems, MATParameters parameters) { StringBuilder url = new StringBuilder("https://"); url.Append(Uri.EscapeUriString(parameters.advertiserId)).Append("."); if (parameters.DebugMode) url.Append("debug."); url.Append(MATConstants.DOMAIN).Append("/serve?sdk=").Append(MATConstants.SDK_TYPE).Append("&ver=").Append(MATConstants.SDK_VERSION); url.Append("&advertiser_id=").Append(Uri.EscapeUriString(parameters.advertiserId)); url.Append("&mat_id=").Append(Uri.EscapeUriString(parameters.MatId)); url.Append("&action=").Append(Uri.EscapeUriString(action)); url.Append("&package_name=").Append(Uri.EscapeUriString(parameters.PackageName)); url.Append("&transaction_id=").Append(Guid.NewGuid().ToString().ToUpper()); // Append event name/ID for events if (action.Equals("conversion")) { long value; if (long.TryParse(eventName, out value)) url.Append("&site_event_id=").Append(eventName); else url.Append("&site_event_name=").Append(Uri.EscapeUriString(eventName)); } // Append open log id if (parameters.OpenLogId != null) url.Append("&open_log_id=").Append(Uri.EscapeUriString(parameters.OpenLogId)); if (parameters.LastOpenLogId != null) url.Append("&last_open_log_id=").Append(Uri.EscapeUriString(parameters.LastOpenLogId)); if (parameters.AllowDuplicates) url.Append("&skip_dup=1"); if (parameters.DebugMode) url.Append("&debug=1"); if (parameters.ExistingUser) url.Append("&existing_user=1"); // Construct encrypted data params and append to url StringBuilder data = new StringBuilder(); // Add UNIX timestamp as system date long timestamp = UnixTimestamp(); data.Append("&system_date=").Append(timestamp.ToString()); if (parameters.AppName != null) { data.Append("&app_name=").Append(Uri.EscapeUriString(parameters.AppName)); } if (parameters.AppVersion != null) { data.Append("&app_version=").Append(Uri.EscapeUriString(parameters.AppVersion)); } data.Append("&device_brand=").Append(Uri.EscapeUriString(parameters.DeviceBrand)); data.Append("&device_model=").Append(Uri.EscapeUriString(parameters.DeviceModel)); data.Append("&device_carrier=").Append(Uri.EscapeUriString(parameters.DeviceCarrier)); data.Append("&device_screen_size=").Append(Uri.EscapeUriString(parameters.DeviceScreenSize)); data.Append("&os_id=").Append(Uri.EscapeUriString(parameters.DeviceUniqueId)); data.Append("&os_version=").Append(Uri.EscapeUriString(parameters.OSVersion)); if (parameters.AppAdTracking) data.Append("&app_ad_tracking=1"); else data.Append("&app_ad_tracking=0"); if (revenue > 0) data.Append("&revenue=").Append(Uri.EscapeUriString(revenue.ToString())); if (currency != null) data.Append("¤cy_code=").Append(Uri.EscapeUriString(currency)); if (refId != null) data.Append("&advertiser_ref_id=").Append(Uri.EscapeUriString(refId)); if (parameters.Age > 0) data.Append("&age=").Append(Uri.EscapeUriString(parameters.Age.ToString())); data.Append("&altitude=").Append(Uri.EscapeUriString(parameters.Altitude.ToString())); if (parameters.EventContentType != null) data.Append("&content_type=").Append(Uri.EscapeUriString(parameters.EventContentType)); if (parameters.EventContentId != null) data.Append("&content_id=").Append(Uri.EscapeUriString(parameters.EventContentId)); data.Append("&level=").Append(parameters.EventLevel.ToString()); data.Append("&quantity=").Append(parameters.EventQuantity.ToString()); if (parameters.EventSearchString != null) data.Append("&search_string=").Append(Uri.EscapeUriString(parameters.EventSearchString)); data.Append("&rating=").Append(Uri.EscapeUriString(parameters.EventRating.ToString())); if (parameters.EventDate1 != null) data.Append("&date1=").Append(Uri.EscapeUriString(UnixTimestamp(parameters.EventDate1).ToString())); if (parameters.EventDate2 != null) data.Append("&date2=").Append(Uri.EscapeUriString(UnixTimestamp(parameters.EventDate2).ToString())); if (parameters.EventAttribute1 != null) data.Append("&attribute_sub1=").Append(Uri.EscapeUriString(parameters.EventAttribute1)); if (parameters.EventAttribute2 != null) data.Append("&attribute_sub2=").Append(Uri.EscapeUriString(parameters.EventAttribute2)); if (parameters.EventAttribute3 != null) data.Append("&attribute_sub3=").Append(Uri.EscapeUriString(parameters.EventAttribute3)); if (parameters.EventAttribute4 != null) data.Append("&attribute_sub4=").Append(Uri.EscapeUriString(parameters.EventAttribute4)); if (parameters.EventAttribute5 != null) data.Append("&attribute_sub5=").Append(Uri.EscapeUriString(parameters.EventAttribute5)); if (parameters.FacebookUserId != null) data.Append("&facebook_user_id=").Append(Uri.EscapeUriString(parameters.FacebookUserId)); if (parameters.Gender != MATGender.NONE) data.Append("&gender=").Append(Uri.EscapeUriString(parameters.Gender.ToString())); if (parameters.GoogleUserId != null) data.Append("&google_user_id=").Append(Uri.EscapeUriString(parameters.GoogleUserId)); if (parameters.IsPayingUser != false) data.Append("&is_paying_user=1"); if (parameters.Latitude != 0) data.Append("&latitude=").Append(Uri.EscapeUriString(parameters.Latitude.ToString())); if (parameters.Longitude != 0) data.Append("&longitude=").Append(Uri.EscapeUriString(parameters.Longitude.ToString())); if (parameters.PhoneNumber != null) { data.Append("&user_phone_md5=").Append(Uri.EscapeUriString(parameters.PhoneNumberMd5)); data.Append("&user_phone_sha1=").Append(Uri.EscapeUriString(parameters.PhoneNumberSha1)); data.Append("&user_phone_sha256=").Append(Uri.EscapeUriString(parameters.PhoneNumberSha256)); } if (parameters.TwitterUserId != null) data.Append("&twitter_user_id=").Append(Uri.EscapeUriString(parameters.TwitterUserId)); if (parameters.UserEmail != null) { data.Append("&user_email_md5=").Append(Uri.EscapeUriString(parameters.UserEmailMd5)); data.Append("&user_email_sha1=").Append(Uri.EscapeUriString(parameters.UserEmailSha1)); data.Append("&user_email_sha256=").Append(Uri.EscapeUriString(parameters.UserEmailSha256)); } if (parameters.UserId != null) data.Append("&user_id=").Append(Uri.EscapeUriString(parameters.UserId)); if (parameters.UserName != null) { data.Append("&user_name_md5=").Append(Uri.EscapeUriString(parameters.UserNameMd5)); data.Append("&user_name_sha1=").Append(Uri.EscapeUriString(parameters.UserNameSha1)); data.Append("&user_name_sha256=").Append(Uri.EscapeUriString(parameters.UserNameSha256)); } if (parameters.WindowsAid != null) data.Append("&windows_aid=").Append(Uri.EscapeUriString(parameters.WindowsAid)); // Add event items to url as json string if (eventItems != null) data.Append("&site_event_items=").Append(Uri.EscapeUriString(JsonConvert.SerializeObject(eventItems))); if (parameters.matRequest != null) parameters.matRequest.ParamsToBeEncrypted(data.ToString()); // Encrypt data string as byte array /*byte[] encryptedDataBytes = urlEncrypter.Encrypt(data.ToString()); String decrypted = urlEncrypter.Decrypt(encryptedDataBytes); // Convert byte[] to hex string to append to url string dataStr = BitConverter.ToString(encryptedDataBytes).Replace("-", string.Empty);*/ //string dataStr = Encryption.ByteArrayToString(encryptedDataBytes); url.Append("&data=").Append(data.ToString()); url.Append("&response_format=json"); if (parameters.matRequest != null) parameters.matRequest.ConstructedRequest(url.ToString()); URLInfo newURL = new URLInfo(); newURL.url = url.ToString(); newURL.retryAttempt = 0; return newURL; }
private AirPricingSolution AddAirPriceSolution(AirService.AirPricingSolution lowestPrice, AirService.AirItinerary airItinerary) { AirPricingSolution finalPrice = new AirPricingSolution() { Key = lowestPrice.Key, TotalPrice = lowestPrice.TotalPrice, BasePrice = lowestPrice.BasePrice, ApproximateTotalPrice = lowestPrice.ApproximateTotalPrice, ApproximateBasePrice = lowestPrice.ApproximateBasePrice, Taxes = lowestPrice.Taxes, ApproximateTaxes = lowestPrice.ApproximateTaxes, QuoteDate = lowestPrice.QuoteDate }; List <typeBaseAirSegment> finalSegments = new List <typeBaseAirSegment>(); List <AirPricingInfo> finalPriceInfo = new List <AirPricingInfo>(); foreach (var segmentRef in lowestPrice.AirSegmentRef) { foreach (var segment in airItinerary.AirSegment) { if (segmentRef.Key.CompareTo(segment.Key) == 0) { typeBaseAirSegment univSeg = new typeBaseAirSegment() { ArrivalTime = segment.ArrivalTime, AvailabilityDisplayType = segment.AvailabilityDisplayType, AvailabilitySource = segment.AvailabilitySource, Carrier = segment.Carrier, ChangeOfPlane = segment.ChangeOfPlane, ClassOfService = segment.ClassOfService, DepartureTime = segment.DepartureTime, Destination = segment.Destination, Distance = segment.Distance, Equipment = segment.Equipment, FlightNumber = segment.FlightNumber, FlightTime = segment.FlightTime, Group = segment.Group, Key = segment.Key, LinkAvailability = segment.LinkAvailability, OptionalServicesIndicator = segment.OptionalServicesIndicator, Origin = segment.Origin, ParticipantLevel = segment.ParticipantLevel, PolledAvailabilityOption = segment.PolledAvailabilityOption, ProviderCode = segment.ProviderCode, TravelTime = segment.TravelTime, }; finalSegments.Add(univSeg); break; } } } foreach (var priceInfo in lowestPrice.AirPricingInfo) { AirPricingInfo info = new AirPricingInfo() { ApproximateBasePrice = priceInfo.ApproximateBasePrice, ApproximateTotalPrice = priceInfo.ApproximateTotalPrice, BasePrice = priceInfo.BasePrice, ETicketability = (typeEticketability)priceInfo.ETicketability, IncludesVAT = priceInfo.IncludesVAT, Key = priceInfo.Key, LatestTicketingTime = priceInfo.LatestTicketingTime, //PlatingCarrier = priceInfo.PlatingCarrier, Optional but might be required for some carriers PricingMethod = (typePricingMethod)priceInfo.PricingMethod, ProviderCode = priceInfo.ProviderCode, Taxes = priceInfo.Taxes, TotalPrice = priceInfo.TotalPrice, }; List <FareInfo> fareInfoList = new List <FareInfo>(); List <ManualFareAdjustment> fareAdjustmentList = new List <ManualFareAdjustment>(); ManualFareAdjustment adjustment = new ManualFareAdjustment() { AdjustmentType = typeAdjustmentType.Amount, AppliedOn = typeAdjustmentTarget.Base, Value = +40, PassengerRef = "gr8AVWGCR064r57Jt0+8bA==" }; fareAdjustmentList.Add(adjustment); info.AirPricingModifiers = new AirPricingModifiers() { ManualFareAdjustment = fareAdjustmentList.ToArray() }; foreach (var fareInfo in priceInfo.FareInfo) { FareInfo createInfo = new FareInfo() { Amount = fareInfo.Amount, DepartureDate = fareInfo.DepartureDate, Destination = fareInfo.Destination, EffectiveDate = fareInfo.EffectiveDate, FareBasis = fareInfo.FareBasis, Key = fareInfo.Key, NotValidAfter = fareInfo.NotValidAfter, NotValidBefore = fareInfo.NotValidBefore, Origin = fareInfo.Origin, PassengerTypeCode = fareInfo.PassengerTypeCode, PrivateFare = (typePrivateFare)fareInfo.PrivateFare, PseudoCityCode = fareInfo.PseudoCityCode, FareRuleKey = new FareRuleKey() { FareInfoRef = fareInfo.FareRuleKey.FareInfoRef, ProviderCode = fareInfo.FareRuleKey.ProviderCode, Value = fareInfo.FareRuleKey.Value } }; List <Endorsement> endorsementList = new List <Endorsement>(); if (fareInfo.Endorsement != null) { foreach (var endorse in fareInfo.Endorsement) { Endorsement createEndorse = new Endorsement() { Value = endorse.Value }; endorsementList.Add(createEndorse); } createInfo.Endorsement = endorsementList.ToArray(); } fareInfoList.Add(createInfo); } info.FareInfo = fareInfoList.ToArray(); List <BookingInfo> bInfo = new List <BookingInfo>(); foreach (var bookingInfo in priceInfo.BookingInfo) { BookingInfo createBookingInfo = new BookingInfo() { BookingCode = bookingInfo.BookingCode, CabinClass = bookingInfo.CabinClass, FareInfoRef = bookingInfo.FareInfoRef, SegmentRef = bookingInfo.SegmentRef }; bInfo.Add(createBookingInfo); } info.BookingInfo = bInfo.ToArray(); List <typeTaxInfo> taxes = new List <typeTaxInfo>(); foreach (var tax in priceInfo.TaxInfo) { typeTaxInfo createTaxInfo = new typeTaxInfo() { Amount = tax.Amount, Category = tax.Category, Key = tax.Key }; taxes.Add(createTaxInfo); } info.TaxInfo = taxes.ToArray(); info.FareCalc = priceInfo.FareCalc; List <PassengerType> passengers = new List <PassengerType>(); /*foreach (var pass in priceInfo.PassengerType) * { * PassengerType passType = new PassengerType() * { * BookingTravelerRef = pass.BookingTravelerRef, * Code = pass.BookingTravelerRef * }; * * passengers.Add(passType); * }*/ passengers.Add(new PassengerType() { Code = "ADT", BookingTravelerRef = "gr8AVWGCR064r57Jt0+8bA==" }); info.PassengerType = passengers.ToArray(); if (priceInfo.ChangePenalty != null) { info.ChangePenalty = new typeFarePenalty() { Amount = priceInfo.ChangePenalty.Amount }; } List <BaggageAllowanceInfo> baggageInfoList = new List <BaggageAllowanceInfo>(); foreach (var allowanceInfo in priceInfo.BaggageAllowances.BaggageAllowanceInfo) { BaggageAllowanceInfo createBaggageInfo = new BaggageAllowanceInfo() { Carrier = allowanceInfo.Carrier, Destination = allowanceInfo.Destination, Origin = allowanceInfo.Origin, TravelerType = allowanceInfo.TravelerType }; List <URLInfo> urlInfoList = new List <URLInfo>(); foreach (var url in allowanceInfo.URLInfo) { URLInfo urlInfo = new URLInfo() { URL = url.URL }; urlInfoList.Add(urlInfo); } createBaggageInfo.URLInfo = urlInfoList.ToArray(); List <ConsoleApplication1.UniversalService.TextInfo> textInfoList = new List <UniversalService.TextInfo>(); foreach (var textData in allowanceInfo.TextInfo) { ConsoleApplication1.UniversalService.TextInfo textInfo = new UniversalService.TextInfo() { Text = textData.Text }; textInfoList.Add(textInfo); } createBaggageInfo.TextInfo = textInfoList.ToArray(); List <BagDetails> bagDetailsList = new List <BagDetails>(); foreach (var bagDetails in allowanceInfo.BagDetails) { BagDetails bag = new BagDetails() { ApplicableBags = bagDetails.ApplicableBags, ApproximateBasePrice = bagDetails.ApproximateBasePrice, ApproximateTotalPrice = bagDetails.ApproximateTotalPrice, BasePrice = bagDetails.BasePrice, TotalPrice = bagDetails.TotalPrice, }; List <BaggageRestriction> bagRestictionList = new List <BaggageRestriction>(); foreach (var restriction in bagDetails.BaggageRestriction) { List <ConsoleApplication1.UniversalService.TextInfo> restrictionTextList = new List <UniversalService.TextInfo>(); foreach (var bagResTextInfo in restriction.TextInfo) { ConsoleApplication1.UniversalService.TextInfo resText = new UniversalService.TextInfo() { Text = bagResTextInfo.Text }; restrictionTextList.Add(resText); } BaggageRestriction bagRes = new BaggageRestriction() { TextInfo = restrictionTextList.ToArray() }; bagRestictionList.Add(bagRes); } bag.BaggageRestriction = bagRestictionList.ToArray(); bagDetailsList.Add(bag); } createBaggageInfo.BagDetails = bagDetailsList.ToArray(); baggageInfoList.Add(createBaggageInfo); } List <CarryOnAllowanceInfo> carryOnAllowanceList = new List <CarryOnAllowanceInfo>(); foreach (var carryOnBag in priceInfo.BaggageAllowances.CarryOnAllowanceInfo) { CarryOnAllowanceInfo carryOn = new CarryOnAllowanceInfo() { Carrier = carryOnBag.Carrier, Destination = carryOnBag.Destination, Origin = carryOnBag.Origin }; carryOnAllowanceList.Add(carryOn); } List <BaseBaggageAllowanceInfo> embargoInfoList = new List <BaseBaggageAllowanceInfo>(); if (priceInfo.BaggageAllowances.EmbargoInfo != null) { foreach (AirService.BaseBaggageAllowanceInfo embargoInfo in priceInfo.BaggageAllowances.EmbargoInfo) { BaseBaggageAllowanceInfo embargo = new BaseBaggageAllowanceInfo() { Carrier = embargoInfo.Carrier, Destination = embargoInfo.Destination, Origin = embargoInfo.Origin }; List <URLInfo> embargoURLList = new List <URLInfo>(); foreach (var embargoUrl in embargoInfo.URLInfo) { URLInfo url = new URLInfo() { URL = embargoUrl.URL, Text = embargoUrl.Text }; embargoURLList.Add(url); } embargo.URLInfo = embargoURLList.ToArray(); List <ConsoleApplication1.UniversalService.TextInfo> embargoTextList = new List <UniversalService.TextInfo>(); foreach (var embargoText in embargoInfo.TextInfo) { ConsoleApplication1.UniversalService.TextInfo text = new UniversalService.TextInfo() { Text = embargoText.Text }; embargoTextList.Add(text); } embargo.TextInfo = embargoTextList.ToArray(); embargoInfoList.Add(embargo); } } info.BaggageAllowances = new BaggageAllowances() { BaggageAllowanceInfo = baggageInfoList.ToArray(), CarryOnAllowanceInfo = carryOnAllowanceList.ToArray(), EmbargoInfo = embargoInfoList.ToArray() }; finalPriceInfo.Add(info); break; } finalPrice.AirPricingInfo = finalPriceInfo.ToArray(); finalPrice.AirSegment = finalSegments.ToArray(); return(finalPrice); }
public List<URLInfo> ParseUrls(String siteRootUrl, string htmlSource) { List<String> urls = new List<String>(); //Log.WriteLine("Source: " + htmlSource); //Regex regex = new Regex(@"<a[^>]*?href\s*=\s*[""']?([^'"">]+?)[ '""]?>"); //Regex regex = new Regex(@"<a[^>]*?href\s*=\s*[""']?([^'"">]+?)['""]?[ '""A-Za-z=]*>"); //Regex regex = new Regex(@"<a[^>]*?href\s*=\s*[""']?([^'"">]+?)[ '""]?[ '""A-Za-z0-9=]*>"); Regex regex = new Regex(@"<a[^>]*?href\s*=\s*[""']?([^'"">]+)['""]", RegexOptions.IgnoreCase); MatchCollection matches = regex.Matches(htmlSource); foreach(Match match in matches) { foreach(Group g in match.Groups) { string url = g.Value; if (!url.StartsWith("<a", StringComparison.CurrentCultureIgnoreCase)) { if( url.StartsWith("mailto:", StringComparison.CurrentCultureIgnoreCase)) { //Log.WriteLine("Not adding URL, is mailto: " + url); } if (url.Contains("#")) { //Console.WriteLine("Dropping # from URL: " + url); int index = url.IndexOf('#'); url = url.Substring(0, index); } if (url.StartsWith("/", StringComparison.CurrentCultureIgnoreCase)) { if (!urls.Contains(siteRootUrl + url)) { urls.Add(siteRootUrl + url); } else { //Console.WriteLine("Not adding URL, already in list: " + url); } } else if (url.StartsWith("http:", StringComparison.CurrentCultureIgnoreCase) || url.StartsWith("https:", StringComparison.CurrentCultureIgnoreCase)) { if (url.Contains(siteRootUrl)) { if (!urls.Contains(url)) { urls.Add(url); } else { //Console.WriteLine("Not adding URL, already in list: " + url); } } else if(url.StartsWith("http://www.", StringComparison.CurrentCultureIgnoreCase) && siteRootUrl.StartsWith("http://", StringComparison.CurrentCultureIgnoreCase)) { if (url.Contains(siteRootUrl.Substring(7))) { urls.Add(url); } else { //Console.WriteLine("Outside Crawl Area, WWW Not added: " + url); } } else { //Console.WriteLine("Outside Crawl Area, Not added: " + url); } } else { if (!urls.Contains(siteRootUrl + "/" + url)) { urls.Add(siteRootUrl + "/" + url); } else { //Console.WriteLine("Not adding URL, already in list: " + url); } } } } } List<URLInfo> urlinfos = new List<URLInfo>(); foreach (String str in urls) { if( str.EndsWith(".png", StringComparison.CurrentCultureIgnoreCase) || str.EndsWith(".gif", StringComparison.CurrentCultureIgnoreCase) || str.EndsWith(".jpg", StringComparison.CurrentCultureIgnoreCase) || str.EndsWith(".jpeg", StringComparison.CurrentCultureIgnoreCase) || str.EndsWith(".zip", StringComparison.CurrentCultureIgnoreCase) || str.EndsWith(".exe", StringComparison.CurrentCultureIgnoreCase) || str.EndsWith(".xml", StringComparison.CurrentCultureIgnoreCase) || str.EndsWith(".mp3", StringComparison.CurrentCultureIgnoreCase) || str.EndsWith(".pdf", StringComparison.CurrentCultureIgnoreCase) || str.EndsWith(".bmp", StringComparison.CurrentCultureIgnoreCase) || str.EndsWith(".ico", StringComparison.CurrentCultureIgnoreCase) || str.EndsWith(".css", StringComparison.CurrentCultureIgnoreCase) || str.EndsWith(".h2drumkit", StringComparison.CurrentCultureIgnoreCase) || str.EndsWith(".tar", StringComparison.CurrentCultureIgnoreCase) || str.EndsWith(".gz", StringComparison.CurrentCultureIgnoreCase) || str.EndsWith(".msi", StringComparison.CurrentCultureIgnoreCase) || str.EndsWith(".dll", StringComparison.CurrentCultureIgnoreCase) || str.EndsWith(".ogg", StringComparison.CurrentCultureIgnoreCase) || str.EndsWith(".wav", StringComparison.CurrentCultureIgnoreCase) || str.EndsWith(".mp4", StringComparison.CurrentCultureIgnoreCase) || str.EndsWith(".psd", StringComparison.CurrentCultureIgnoreCase) || str.EndsWith(".xls", StringComparison.CurrentCultureIgnoreCase) || str.EndsWith(".js", StringComparison.CurrentCultureIgnoreCase) || str.EndsWith(".py", StringComparison.CurrentCultureIgnoreCase) || str.EndsWith(".xcf", StringComparison.CurrentCultureIgnoreCase) || str.EndsWith(".tgz", StringComparison.CurrentCultureIgnoreCase) || str.EndsWith(".dat", StringComparison.CurrentCultureIgnoreCase)) { //Console.WriteLine("Skipped URL: " + str); continue; } // Skip slash as an alias for the site root. if (str == siteRootUrl + "/") continue; // Skip javascript popup windows. if (str == siteRootUrl + "/javascript:void(0);") continue; //Console.WriteLine("Added URL: " + str); URLInfo newurl = new URLInfo(); newurl.RootURL = siteRootUrl; String tmpstr = str; if (tmpstr.Contains(' ')) { tmpstr = tmpstr.Replace(" ", "%20"); } newurl.URL = tmpstr; newurl.LastCrawled = DateTime.MinValue; urlinfos.Add(newurl); } return urlinfos; }
public void FindPageTitle(URLInfo url) { if (url == null) return; Regex regex = new Regex(@"(?<=<title.*>)([\s\S]*)(?=</title>)", RegexOptions.IgnoreCase); Match match = regex.Match(url.PageData); if (match != null) url.PageTitle = match.Value.Trim(); }
public void FindPageDescription(URLInfo url) { if (url == null) return; Regex regex = new Regex(@"(?<name>content)s*=\s*(\""(?<value>[^\""]*)\""|'(?<value>[^']*)'|(?<value>[^\""'<> ]+)\s*)+", RegexOptions.IgnoreCase); MatchCollection matches = regex.Matches(url.PageData); foreach (Match match in matches) { //foreach (Group group in match.Groups) //{ // CaptureCollection str1 = group.Captures; // String str2 = group.Value; //} foreach (Capture capture in match.Captures) { String str1 = capture.Value; } url.PageDescription = match.Value.Trim(); } }
public async Task <URLInfo[]> Load(string xmlSiteMapURL) { List <URLInfo> result = new List <URLInfo>(); string xmlBody = await _SiteMapWebDownloader.DownloadSiteMapBody(xmlSiteMapURL); var xmlDoc = new XmlDocument(); xmlDoc.LoadXml(xmlBody); XmlNamespaceManager nsmgr = new XmlNamespaceManager(xmlDoc.NameTable); nsmgr.AddNamespace("x", "http://www.sitemaps.org/schemas/sitemap/0.9"); //выбираем все ноды <url> XmlNodeList urlNodes = xmlDoc.SelectNodes("//x:url", nsmgr); if (urlNodes.Count > 0) { for (int i = 0; i < urlNodes.Count; i++) { var node = urlNodes[i]; var page = new URLInfo(); var locNode = node.SelectSingleNode("x:loc", nsmgr); if (locNode != null) { page.URL = locNode.InnerText; } var dateNode = node.SelectSingleNode("x:lastmod", nsmgr); if (dateNode != null) { DateTime dt; string dateVal = dateNode.InnerText; if (!string.IsNullOrWhiteSpace(dateVal)) { if (DateTime.TryParse(dateVal, out dt)) { page.LastModified = dt; } } } if (page.URL == null || page.LastModified == DateTime.MinValue) { continue; } //получить тайтл string title = await _GetTitle(page.URL); if (title != null) { page.Title = title; } result.Add(page); //если лимит равен 0, то условие не выполнится никогда и цикл идет целиком if (i + 1 == LimitCount) { break; } } //for } //if return(result.ToArray()); }
private AirPricingSolution AddAirPriceSolution(AirService.AirPricingSolution lowestPrice, AirService.AirItinerary airItinerary) { AirPricingSolution finalPrice = new AirPricingSolution() { Key = lowestPrice.Key, TotalPrice = lowestPrice.TotalPrice, BasePrice = lowestPrice.BasePrice, ApproximateTotalPrice = lowestPrice.ApproximateTotalPrice, ApproximateBasePrice = lowestPrice.ApproximateBasePrice, Taxes = lowestPrice.Taxes, ApproximateTaxes = lowestPrice.ApproximateTaxes, QuoteDate = lowestPrice.QuoteDate }; List<typeBaseAirSegment> finalSegments = new List<typeBaseAirSegment>(); List<AirPricingInfo> finalPriceInfo =new List<AirPricingInfo>(); foreach (var segmentRef in lowestPrice.AirSegmentRef) { foreach (var segment in airItinerary.AirSegment) { if (segmentRef.Key.CompareTo(segment.Key) == 0) { typeBaseAirSegment univSeg = new typeBaseAirSegment() { ArrivalTime = segment.ArrivalTime, AvailabilityDisplayType = segment.AvailabilityDisplayType, AvailabilitySource = segment.AvailabilitySource, Carrier = segment.Carrier, ChangeOfPlane = segment.ChangeOfPlane, ClassOfService = segment.ClassOfService, DepartureTime = segment.DepartureTime, Destination = segment.Destination, Distance = segment.Distance, Equipment = segment.Equipment, FlightNumber = segment.FlightNumber, FlightTime = segment.FlightTime, Group = segment.Group, Key = segment.Key, LinkAvailability = segment.LinkAvailability, OptionalServicesIndicator = segment.OptionalServicesIndicator, Origin = segment.Origin, ParticipantLevel = segment.ParticipantLevel, PolledAvailabilityOption = segment.PolledAvailabilityOption, ProviderCode = segment.ProviderCode, TravelTime = segment.TravelTime, }; finalSegments.Add(univSeg); break; } } } foreach (var priceInfo in lowestPrice.AirPricingInfo) { AirPricingInfo info = new AirPricingInfo() { ApproximateBasePrice = priceInfo.ApproximateBasePrice, ApproximateTotalPrice = priceInfo.ApproximateTotalPrice, BasePrice = priceInfo.BasePrice, ETicketability = (typeEticketability)priceInfo.ETicketability, IncludesVAT = priceInfo.IncludesVAT, Key = priceInfo.Key, LatestTicketingTime = priceInfo.LatestTicketingTime, //PlatingCarrier = priceInfo.PlatingCarrier, Optional but might be required for some carriers PricingMethod = (typePricingMethod)priceInfo.PricingMethod, ProviderCode = priceInfo.ProviderCode, Taxes = priceInfo.Taxes, TotalPrice = priceInfo.TotalPrice, }; List<FareInfo> fareInfoList = new List<FareInfo>(); List<ManualFareAdjustment> fareAdjustmentList = new List<ManualFareAdjustment>(); ManualFareAdjustment adjustment = new ManualFareAdjustment() { AdjustmentType = typeAdjustmentType.Amount, AppliedOn = typeAdjustmentTarget.Base, Value = +40, PassengerRef = "gr8AVWGCR064r57Jt0+8bA==" }; fareAdjustmentList.Add(adjustment); info.AirPricingModifiers = new AirPricingModifiers() { ManualFareAdjustment = fareAdjustmentList.ToArray() }; foreach (var fareInfo in priceInfo.FareInfo) { FareInfo createInfo = new FareInfo() { Amount = fareInfo.Amount, DepartureDate = fareInfo.DepartureDate, Destination = fareInfo.Destination, EffectiveDate = fareInfo.EffectiveDate, FareBasis = fareInfo.FareBasis, Key = fareInfo.Key, NotValidAfter = fareInfo.NotValidAfter, NotValidBefore = fareInfo.NotValidBefore, Origin = fareInfo.Origin, PassengerTypeCode = fareInfo.PassengerTypeCode, PrivateFare = (typePrivateFare)fareInfo.PrivateFare, PseudoCityCode = fareInfo.PseudoCityCode, FareRuleKey = new FareRuleKey() { FareInfoRef = fareInfo.FareRuleKey.FareInfoRef, ProviderCode = fareInfo.FareRuleKey.ProviderCode, Value = fareInfo.FareRuleKey.Value } }; List<Endorsement> endorsementList = new List<Endorsement>(); if (fareInfo.Endorsement != null) { foreach (var endorse in fareInfo.Endorsement) { Endorsement createEndorse = new Endorsement() { Value = endorse.Value }; endorsementList.Add(createEndorse); } createInfo.Endorsement = endorsementList.ToArray(); } fareInfoList.Add(createInfo); } info.FareInfo = fareInfoList.ToArray(); List<BookingInfo> bInfo = new List<BookingInfo>(); foreach (var bookingInfo in priceInfo.BookingInfo) { BookingInfo createBookingInfo = new BookingInfo() { BookingCode = bookingInfo.BookingCode, CabinClass = bookingInfo.CabinClass, FareInfoRef = bookingInfo.FareInfoRef, SegmentRef = bookingInfo.SegmentRef }; bInfo.Add(createBookingInfo); } info.BookingInfo = bInfo.ToArray(); List<typeTaxInfo> taxes = new List<typeTaxInfo>(); foreach (var tax in priceInfo.TaxInfo) { typeTaxInfo createTaxInfo = new typeTaxInfo() { Amount = tax.Amount, Category = tax.Category, Key = tax.Key }; taxes.Add(createTaxInfo); } info.TaxInfo = taxes.ToArray(); info.FareCalc = priceInfo.FareCalc; List<PassengerType> passengers = new List<PassengerType>(); /*foreach (var pass in priceInfo.PassengerType) { PassengerType passType = new PassengerType() { BookingTravelerRef = pass.BookingTravelerRef, Code = pass.BookingTravelerRef }; passengers.Add(passType); }*/ passengers.Add(new PassengerType() { Code = "ADT", BookingTravelerRef = "gr8AVWGCR064r57Jt0+8bA==" }); info.PassengerType = passengers.ToArray(); if (priceInfo.ChangePenalty != null) { info.ChangePenalty = new typeFarePenalty() { Amount = priceInfo.ChangePenalty.Amount }; } List<BaggageAllowanceInfo> baggageInfoList = new List<BaggageAllowanceInfo>(); foreach (var allowanceInfo in priceInfo.BaggageAllowances.BaggageAllowanceInfo) { BaggageAllowanceInfo createBaggageInfo = new BaggageAllowanceInfo() { Carrier = allowanceInfo.Carrier, Destination = allowanceInfo.Destination, Origin = allowanceInfo.Origin, TravelerType = allowanceInfo.TravelerType }; List<URLInfo> urlInfoList = new List<URLInfo>(); foreach (var url in allowanceInfo.URLInfo) { URLInfo urlInfo = new URLInfo() { URL = url.URL }; urlInfoList.Add(urlInfo); } createBaggageInfo.URLInfo = urlInfoList.ToArray(); List<ConsoleApplication1.UniversalService.TextInfo> textInfoList = new List<UniversalService.TextInfo>(); foreach (var textData in allowanceInfo.TextInfo) { ConsoleApplication1.UniversalService.TextInfo textInfo = new UniversalService.TextInfo() { Text = textData.Text }; textInfoList.Add(textInfo); } createBaggageInfo.TextInfo = textInfoList.ToArray(); List<BagDetails> bagDetailsList = new List<BagDetails>(); foreach (var bagDetails in allowanceInfo.BagDetails) { BagDetails bag = new BagDetails() { ApplicableBags = bagDetails.ApplicableBags, ApproximateBasePrice = bagDetails.ApproximateBasePrice, ApproximateTotalPrice = bagDetails.ApproximateTotalPrice, BasePrice = bagDetails.BasePrice, TotalPrice = bagDetails.TotalPrice, }; List<BaggageRestriction> bagRestictionList = new List<BaggageRestriction>(); foreach (var restriction in bagDetails.BaggageRestriction) { List<ConsoleApplication1.UniversalService.TextInfo> restrictionTextList = new List<UniversalService.TextInfo>(); foreach (var bagResTextInfo in restriction.TextInfo) { ConsoleApplication1.UniversalService.TextInfo resText = new UniversalService.TextInfo() { Text = bagResTextInfo.Text }; restrictionTextList.Add(resText); } BaggageRestriction bagRes = new BaggageRestriction() { TextInfo = restrictionTextList.ToArray() }; bagRestictionList.Add(bagRes); } bag.BaggageRestriction = bagRestictionList.ToArray(); bagDetailsList.Add(bag); } createBaggageInfo.BagDetails = bagDetailsList.ToArray(); baggageInfoList.Add(createBaggageInfo); } List<CarryOnAllowanceInfo> carryOnAllowanceList = new List<CarryOnAllowanceInfo>(); foreach (var carryOnBag in priceInfo.BaggageAllowances.CarryOnAllowanceInfo) { CarryOnAllowanceInfo carryOn = new CarryOnAllowanceInfo() { Carrier = carryOnBag.Carrier, Destination = carryOnBag.Destination, Origin = carryOnBag.Origin }; carryOnAllowanceList.Add(carryOn); } List<BaseBaggageAllowanceInfo> embargoInfoList = new List<BaseBaggageAllowanceInfo>(); if(priceInfo.BaggageAllowances.EmbargoInfo != null) { foreach(AirService.BaseBaggageAllowanceInfo embargoInfo in priceInfo.BaggageAllowances.EmbargoInfo) { BaseBaggageAllowanceInfo embargo = new BaseBaggageAllowanceInfo() { Carrier = embargoInfo.Carrier, Destination = embargoInfo.Destination, Origin = embargoInfo.Origin }; List<URLInfo> embargoURLList = new List<URLInfo>(); foreach(var embargoUrl in embargoInfo.URLInfo){ URLInfo url = new URLInfo() { URL = embargoUrl.URL, Text = embargoUrl.Text }; embargoURLList.Add(url); } embargo.URLInfo = embargoURLList.ToArray(); List<ConsoleApplication1.UniversalService.TextInfo> embargoTextList = new List<UniversalService.TextInfo>(); foreach(var embargoText in embargoInfo.TextInfo){ ConsoleApplication1.UniversalService.TextInfo text = new UniversalService.TextInfo() { Text = embargoText.Text }; embargoTextList.Add(text); } embargo.TextInfo = embargoTextList.ToArray(); embargoInfoList.Add(embargo); } } info.BaggageAllowances = new BaggageAllowances() { BaggageAllowanceInfo = baggageInfoList.ToArray(), CarryOnAllowanceInfo = carryOnAllowanceList.ToArray(), EmbargoInfo = embargoInfoList.ToArray() }; finalPriceInfo.Add(info); break; } finalPrice.AirPricingInfo = finalPriceInfo.ToArray(); finalPrice.AirSegment = finalSegments.ToArray(); return finalPrice; }
static int Main(string[] args) { if (args.Length < 1) { Console.WriteLine("You need to supply the name of a site to crawl."); Console.WriteLine("Usage: CrawlMapper.exe <site>"); return 0; } //if (args.Length < 1) //{ // Console.WriteLine("You need to supply the name of a file containing a list of site names to crawl."); // return 0; //} //StreamReader sitefile; //try //{ // sitefile = new StreamReader(args[0]); //} //catch (Exception) //{ // Console.WriteLine("File " + args[0] + " not found."); // return 0; //} String outputfile = "sitemap.txt"; if (args.Length > 1) { outputfile = args[1]; } int maxurls = Int32.MaxValue; if (args.Length > 2) { Int32.TryParse(args[2], out maxurls); } maxurls = ReleaseSettings.SetMaxUrls(Int32.MaxValue); List<URLInfo> sitesToCrawl = new List<URLInfo>(); string line = String.Empty; //while (( line = sitefile.ReadLine()) != null ) //{ URLInfo site = new URLInfo(); //site.RootURL = line; //site.URL = line; site.RootURL = args[0]; site.URL = args[0]; site.LastCrawled = DateTime.MinValue; sitesToCrawl.Add(site); //} //sitefile.Close(); //Console.WriteLine("Loaded site file: " + args[0]); Console.WriteLine("Crawling Site: " + args[0]); Crawler crawler = new Crawler(); List<URLInfo> badUrls; List<URLInfo> urls = crawler.Crawl(sitesToCrawl, out badUrls, maxurls); TextWriter writer = new StreamWriter(outputfile); Console.WriteLine(urls.Count + " URLs found."); foreach (URLInfo str in urls) { writer.WriteLine(str.URL); //writer.WriteLine("URL: " + str.URL + ", Desc: " + str.PageDescription + ", Keyw: " + str.PageKeywords + ", Title: " + str.PageTitle); } Console.WriteLine("Crawl completed. Check the " + outputfile + " file in the executable directory."); writer.Flush(); writer.Close(); writer = new StreamWriter("badurls.txt"); Console.WriteLine(badUrls.Count + " bad URLs."); foreach (URLInfo str in badUrls) { writer.WriteLine(str.URL); } Console.WriteLine("Check the badurls.txt file in the executable directory for any un-crawlable URLs."); writer.Flush(); writer.Close(); return 0; }
private void doPreHeader(string method) { if (!(FetchEnable.fetches_are_enabled())) { Console.Error.Write( "ERROR: An attempt was made to hit an external API " + "(URL {0}) while such hits were disabled because {1}.\n", url, FetchEnable.fetch_disable_reason()); Environment.Exit(1); } URLInfo url_info = parseURL(url); if (url_info.is_script) { int token_count = 1; int follow = url_info.host_start; while (follow < url_info.path_start + url_info.path_length) { if (((url[follow] == ' ') || (url[follow] == '@')) && ((follow == url.Length) || (url[follow + 1] != ' ') || (url[follow + 1] != '@'))) { ++token_count; } ++follow; } string file_name = ""; string arguments = ""; int start = url_info.host_start; int arg_num = 1; int follow_buffer = start; while (follow_buffer < (url_info.path_start + url_info.path_length)) { if ((url[follow_buffer] == ' ') || (url[follow_buffer] == '@')) { string this_arg = url.Substring(start, follow_buffer - start); if (arg_num == 1) { file_name = this_arg; } else if (arg_num == 2) { arguments = this_arg; } else { arguments += " "; arguments += this_arg; } if ((follow_buffer == url.Length) || (url[follow_buffer + 1] != ' ') || (url[follow_buffer + 1] != '@')) { start = follow_buffer + 1; ++arg_num; } } ++follow_buffer; } Debug.Assert(arg_num == token_count); string final_arg = url.Substring(start, follow_buffer - start); if (arg_num == 1) { file_name = final_arg; } else if (arg_num == 2) { arguments = final_arg; } else { arguments += " "; arguments += final_arg; } Process child = new Process { StartInfo = new ProcessStartInfo { FileName = file_name, Arguments = arguments, UseShellExecute = false, RedirectStandardInput = true, RedirectStandardOutput = true, CreateNoWindow = true } }; child.Start(); reader = new ScriptReader(child); writer = new FetchStreamWriter(child.StandardInput.BaseStream, reader); doSaveResponsesSetup(); return; } if (Environment.GetEnvironmentVariable( "GOLDEN_RETRIEVER_SHOW_REAL_URL_HITS") != null) { Console.Error.Write("Golden Retriever hitting `{0}'.\n", url); } string final_hostname = url.Substring(url_info.host_start, url_info.host_length); string hostname = (use_proxy ? proxy_server : final_hostname); int port_to_use = (use_proxy ? proxy_port : url_info.port); NetworkStream stream = new TcpClient(hostname, port_to_use).GetStream(); if (url_info.is_ssl) { if (use_proxy) { HTTPStreamReader proxy_reader = new HTTPStreamReader(stream, 1); FetchStreamWriter proxy_writer = new FetchStreamWriter(stream, proxy_reader); proxy_writer.write("CONNECT "); proxy_writer.write(final_hostname); proxy_writer.write(":" + url_info.port); proxy_writer.write(" HTTP/1.1\r\n"); proxy_writer.write(user_agent_key); proxy_writer.write(": "); proxy_writer.write( (proxy_user_agent != null) ? proxy_user_agent : user_agent); proxy_writer.write("\r\n"); int param_count = proxy_parameters.Count; for (int param_num = 0; param_num < param_count; ++param_num) { proxy_writer.write(proxy_parameters[param_num].key); proxy_writer.write(": "); proxy_writer.write(proxy_parameters[param_num].value); proxy_writer.write("\r\n"); } proxy_writer.write("\r\n"); string first_line = proxy_reader.readLine(); string[] first_components = first_line.Split(new Char[] { ' ' }); if (first_components.Length < 3) { throw new Exception( "Bad first line from proxy server: `" + first_line + "'."); } if (!(first_components[1].Equals("200"))) { string message = "Proxy returned status code "; message += first_components[1]; message += ":"; for (int num = 2; num < first_components.Length; ++num) { message += " " + first_components[num]; } message += "."; throw new Exception(message); } while (true) { string next_line = proxy_reader.readLine(); if (next_line.Equals("") || next_line.Equals("\r")) { break; } } } SslStream ssl_stream = new SslStream(stream); ssl_stream.AuthenticateAsClient(final_hostname); reader = new HTTPStreamReader(ssl_stream); writer = new FetchStreamWriter(ssl_stream, reader); } else { reader = new HTTPStreamReader(stream); writer = new FetchStreamWriter(stream, reader); } doSaveResponsesSetup(); }
public static URLInfo parseURL(string url) { Debug.Assert(url != null); URLInfo result = new URLInfo(); int follow; if (url.StartsWith(http_prefix)) { result.is_ssl = false; result.is_script = false; follow = http_prefix.Length; } else if (url.StartsWith(https_prefix)) { result.is_ssl = true; result.is_script = false; follow = https_prefix.Length; } else if (url.StartsWith(script_prefix)) { result.is_script = true; follow = script_prefix.Length; } else { throw new Exception("URL prefix not recognized."); } result.host_start = follow; while (true) { if (url.Length == follow) { result.host_length = follow - result.host_start; if (!result.is_script) { result.port = (result.is_ssl ? 443 : 80); } result.path_start = follow; result.path_length = 0; return(result); } if (url[follow] == ':') { result.host_length = follow - result.host_start; ++follow; int local_port = 0; while ((url.Length > follow) && (url[follow] != '/')) { char this_char = url[follow]; int digit; if ((this_char >= '0') && (this_char <= '9')) { digit = this_char - '0'; } else { throw new Exception("Bad URL: non-digit in port."); } if (local_port >= (((1 << 16) - digit) / 10)) { throw new Exception("Overflow in port number."); } local_port = ((local_port * 10) + digit); ++follow; } result.port = local_port; break; } if ((url.Length > follow) && (url[follow] == '/')) { result.host_length = follow - result.host_start; if (!result.is_script) { result.port = (result.is_ssl ? 443 : 80); } break; } ++follow; } result.path_start = follow; result.path_length = url.Length - follow; return(result); }