public bool TryExtractAnalyticsGuid(string encryptedAnalyticsCookie, out Guid?analyticsGuid) { analyticsGuid = null; if (cookieParser.TryParseCookieString(encryptedAnalyticsCookie)) { var bufferLen = (uint)cookieParser.AnalyticsData.Length; if (!decryptProvider.DecryptString((uint)cookieParser.KeyVersion, cookieParser.IV, ref cookieParser.AnalyticsData, ref bufferLen) || cookieParser.AnalyticsData == null || bufferLen <= 0) { // TODO: we have about 1600 rows with KeyVersion 1312998531, while the majority of rows (7 mil) have KeyVersion 1275679797 return(false); } var decryptedString = Encoding.ASCII.GetString(cookieParser.AnalyticsData, 0, (int)bufferLen); // decryptedString should be like: Ver=1.0|ts=1450665191|ag=9dc9229b101b40378c3f3d10f41f1aee var agStringTokens = decryptedString.Split('|'); const string agPrefix = "ag="; if (agStringTokens.Length != 3 || !agStringTokens[2].StartsWith(agPrefix, StringComparison.OrdinalIgnoreCase)) { return(false); } analyticsGuid = CommonUtils.ParseGuid(agStringTokens[2].Substring(agPrefix.Length)); return(analyticsGuid.HasValue); } return(false); }
public IEnumerable <string> GetData(IEnumerable <string> data) { List <string> res = new List <string>(); AnalyticsGuidExtractor analyticsGuidExtractor = new AnalyticsGuidExtractor(); BondReader <UserIdCoveragePair> reader = new BondReader <UserIdCoveragePair>(); foreach (var line in data) { if (string.IsNullOrEmpty(line)) { return(null); } string[] values = line.Split(new char[] { '\t' }, StringSplitOptions.RemoveEmptyEntries); if (values == null || values.Length < 3) { return(null); } byte[] LogRecord = Convert.FromBase64String(values[0]); UserIdCoveragePair uicPair; if (reader.TryParse(LogRecord, out uicPair)) { if (uicPair != null && !String.IsNullOrEmpty(uicPair.AnalyticsCookie) && !String.IsNullOrEmpty(uicPair.UETMatchingQueryString)) { Guid?analyticsGuid; if (analyticsGuidExtractor.TryExtractAnalyticsGuid(uicPair.AnalyticsCookie, out analyticsGuid)) { var mid = uicPair.UETMatchingQueryString.Split('&').FirstOrDefault(s => s.StartsWith("mid=")); if (mid != null) { var uetMatchingGuid = CommonUtils.ParseGuid(mid.Substring(4)); if (uetMatchingGuid.HasValue) { var output = new UserIdCoverageShcema(); output.UETMatchingGuid = uetMatchingGuid; output.AnalyticsGuid = analyticsGuid; res.Add(UserIdCoverageShcema.Serialize(output)); } } } } } } return(res); }
public IEnumerable <string> GetData(IEnumerable <string> data) { int count = 0; List <string> res = new List <string>(); var ipDecryptor = new IPAddressDecryptor(keyFileName); var reader = new BondReader <UETLog>(); var tagIdNameMap = new TagIdNameMap(); var analyticsGuidExtractor = new AnalyticsGuidExtractor(); foreach (var line in data) { var eqs = new EnumeratedQueryString(); count++; var uetLogByte = Convert.FromBase64String(line.Split('\t')[0]); UETLog log; UETLogView vSchema = new UETLogView(); if (!reader.TryParse(uetLogByte, out log)) { res.Add(string.Empty); continue; } if (!eqs.TryParse(log.QueryString)) { res.Add(string.Empty); continue; } vSchema.ReferrerURL = eqs.ReferrerURL; if (String.IsNullOrWhiteSpace(vSchema.ReferrerURL)) { vSchema.ReferrerURL = log.ReferrerURL; } vSchema.TagId = eqs.TagId; vSchema.TagName = eqs.TagName; if (String.IsNullOrWhiteSpace(eqs.AppInstallClickId)) { if (String.IsNullOrWhiteSpace(vSchema.ReferrerURL) || log.ClientIP == null || (log.ClientIP.EncryptedIP == null && log.ClientIP.EncryptedIPv6 == null) || (vSchema.TagId <= 0 && String.IsNullOrWhiteSpace(vSchema.TagName))) { res.Add(string.Empty); continue; } if (vSchema.TagId <= 0) { if (!eqs.AdvertiserId.HasValue) { res.Add(string.Empty); continue; } Dictionary <int, int> customerIdToTagId; if (!tagIdNameMap.NameToIdMap.TryGetValue(vSchema.TagName, out customerIdToTagId)) { res.Add(string.Empty); continue; } if (!customerIdToTagId.TryGetValue(eqs.AdvertiserId.Value, out vSchema.TagId)) { res.Add(string.Empty); continue; } } if (!CommonUtils.IsNewUETTagId(vSchema.TagId)) { res.Add(string.Empty); continue; } if (String.IsNullOrWhiteSpace(vSchema.TagName)) { if (!tagIdNameMap.IdToNameMap.TryGetValue(vSchema.TagId, out vSchema.TagName)) { vSchema.TagName = string.Empty; } } } vSchema.ANID = CommonUtils.ParseGuid(log.ANID); vSchema.ClientIP = log.ClientIP; vSchema.EventDateTime = CommonUtils.FromUtcUnixTimeToTicks(log.EventDateTime); vSchema.IsNewMUID = log.IsNewMUID; vSchema.LogServerName = log.LogServerName; vSchema.MUID = CommonUtils.ParseGuid(log.MUID); vSchema.QueryString = log.QueryString; vSchema.UserAgent = log.UserAgent; vSchema.AppInstallClickId = eqs.AppInstallClickId; vSchema.PageLoad = eqs.PageLoad; vSchema.PageTitle = eqs.PageTitle; vSchema.UETMatchingGuid = eqs.UETMatchingGuid; vSchema.Version = eqs.Version; vSchema.NavigatedFromURL = eqs.NavigatedFromURL; if (String.IsNullOrWhiteSpace(vSchema.NavigatedFromURL) && eqs.iframe) { vSchema.NavigatedFromURL = log.ReferrerURL; } CustomEvent customEvent = null; if (String.Equals(eqs.EventType, "custom", StringComparison.OrdinalIgnoreCase)) { customEvent = new CustomEvent { EventCategory = eqs.EventCategory, EventLabel = eqs.EventLabel, EventAction = eqs.EventAction, EventValue = eqs.EventValue }; } vSchema.customEvent = customEvent; vSchema.EventType = eqs.EventType == null ? null : eqs.EventType.ToLower(); vSchema.GoalValue = eqs.GoalValue; Guid?analyticsGuid = null; if (!String.IsNullOrWhiteSpace(log.AnalyticsCookie)) { analyticsGuidExtractor.TryExtractAnalyticsGuid(log.AnalyticsCookie, out analyticsGuid); } vSchema.AnalyticsGuid = analyticsGuid; string ip = null; if (log.ClientIP != null && ipDecryptor != null) { ip = DecryptIp(log.ClientIP, ipDecryptor); } vSchema.IP = string.IsNullOrWhiteSpace(ip) ? "hidden" : ip; if (String.IsNullOrWhiteSpace(ip) && log.ClientIP != null) { ip = String.IsNullOrWhiteSpace(log.ClientIP.EncryptedIPv6) ? log.ClientIP.EncryptedIP : log.ClientIP.EncryptedIPv6; } vSchema.UAIPId = !String.IsNullOrWhiteSpace(ip) ? CommonUtils.GetGuidFromIPUserAgent(ip, log.UserAgent) : Guid.Empty; // Set dedup key for UET Log // If there is mid and rn, and IsNewMUID is false, we still dedup on mid, rn and MUID. // If there is mid and rn, and IsNewMUID is true, we will only dedup on mid and rn. // If there is no mid or rn, we’ll always dedup on timestamp and MUID. string dedupKey = string.Empty; if (eqs.UETMatchingGuid.HasValue && !String.IsNullOrWhiteSpace(eqs.rn)) { dedupKey = eqs.UETMatchingGuid.Value.ToString("N") + "-" + eqs.rn; if (log.IsNewMUID == false && !String.IsNullOrEmpty(log.MUID)) { dedupKey += "-" + log.MUID; } } else { dedupKey = log.EventDateTime.ToString(); if (!String.IsNullOrEmpty(log.MUID)) { dedupKey += "-" + log.MUID; } } vSchema.DedupKey = dedupKey; res.Add(UETLogView.Serialize(vSchema)); } return(res); }
public bool TryParse(string queryString, char keyValueSeparator = '=', char groupSeparator = '&') { if (String.IsNullOrWhiteSpace(queryString) || queryString.IndexOf(keyValueSeparator) == -1) { return(false); } // Remove the /action/[AdvertiserId]? part from QueryString var ei = queryString.IndexOf(QSParameterAdvertiserId_end); if (!ExtractKeyValuePairs(queryString.Substring(ei + 1), keyValueSeparator, groupSeparator)) { return(false); } // Extract AdvertiserId to be used for Version 1 logs to lookup (TagName, AdvertiserId) -> TagId map file. var bi = queryString.IndexOf(QSParameterAdvertiserId_begin); if (bi != -1 && ei > bi) { bi += QSParameterAdvertiserId_begin.Length; var advertiserIdStr = queryString.Substring(bi, ei - bi); int advertiserIdInt; AdvertiserId = int.TryParse(advertiserIdStr, out advertiserIdInt) ? advertiserIdInt : (int?)null; } // Enumerate string columns Version = LookupKeyString(QsParameterVersion); PageTitle = LookupKeyString(QsParameterPageTitle); // QueryString for AppInstall events start with /action/aips? if (queryString.IndexOf(QSParameterAppInstallPS) != -1) { AppInstallClickId = LookupKeyString(QSParameterBCLID); } // decode NavigatedFromURL and ReferrerURL NavigatedFromURL = WebUtility.UrlDecode(LookupKeyString(QsParameterNavigatedFromUrl)); ReferrerURL = WebUtility.UrlDecode(LookupKeyString(QSParameterReferrerURL)); EventCategory = WebUtility.UrlDecode(LookupKeyString(QsParameterEventCategory)); EventAction = WebUtility.UrlDecode(LookupKeyString(QsParameterEventAction)); EventLabel = WebUtility.UrlDecode(LookupKeyString(QsParameterEventLabel)); EventType = LookupKeyString(QsParameterEventType); // If the event type is not specified, but the key "ec" exist in the query string, then set type to custom event. if (String.IsNullOrEmpty(EventType) && !String.IsNullOrWhiteSpace(EventCategory)) { EventType = "custom"; } TagName = WebUtility.UrlDecode(LookupKeyString(QsParameterTagName)); UETMatchingGuid = CommonUtils.ParseGuid(LookupKeyString(QSParameterUETMatchingMUID)); rn = LookupKeyString(QSParameterRandomNumber); // Enumerate nullable number columns EventValue = LookupKeyDouble(QsParameterEventValue); TagId = LookupKeyInt(QsParameterTagId); PageLoad = LookupKeySByte(QsParameterPageLoad); GoalValue = LookupKeyDouble(QsParameterGoalValue); iframe = LookupKeyString(QSParameterIframe) == "1"; return(true); }