private AccessDataDetail DeserializeJsonDataInLocal(string deviceId, string accessDate) { string jsonName = ComputeJsonName(deviceId, accessDate); if (!Directory.Exists(localPath)) { return(null); } string fullPath = Path.Combine(localPath, jsonName); if (File.Exists(fullPath)) { var content = File.ReadAllText(fullPath); AccessDataDetail deserializedObj = JsonConvert.DeserializeObject <AccessDataDetail>(content); return(deserializedObj); } return(null); }
public AccessDataDetail DeserializeJsonDataInCloud(string deviceId, string accessDate) { string jsonName = ComputeJsonName(deviceId, accessDate); // connect to our storage account and create a blob client var storageAccount = CloudStorageAccount.Parse(connectionString); var blobClient = storageAccount.CreateCloudBlobClient(); // get a reference to the container var blobcontainer = blobClient.GetContainerReference(containerName); blobcontainer.CreateIfNotExists(); foreach (var blob in blobcontainer.ListBlobs()) { string flName = blob.Uri.Segments.Last().ToString(); Console.WriteLine(flName); if (jsonName == flName) { CloudBlob cblob = blobcontainer.GetBlobReference(flName); using (StreamReader reader = new StreamReader(cblob.OpenRead())) { string content = ""; StringBuilder strjson = new StringBuilder(); while ((content = reader.ReadLine()) != null) { strjson.Append(content); } AccessDataDetail overlapAddObj = JsonConvert.DeserializeObject <AccessDataDetail>(strjson.ToString()); return(overlapAddObj); } } } return(null); }
public IList <string> Parse(string blobName, Stream accessLogBlob) { string deviceId = GetDeviceId(blobName); IList <string> result = new List <string>(); string lineEntry = string.Empty; //Initialize the prev record to some random date DateTime prevRecord = DateTime.Parse("01/01/1900"); var file = new StreamReader(accessLogBlob); while ((lineEntry = file.ReadLine()) != null) { string[] tokens = lineEntry.Split(' '); bool isMainModule = false; //TODO: We skip this for now - need to check if this is an error and log appropriately if (tokens.Length <= 6) { continue; } //If the first token (token[0]) is not of type IP address - skip processing - evaluate via Regex string clientIP = tokens[0]; Regex ipRegex = new Regex(@"\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b"); MatchCollection regexResult = ipRegex.Matches(clientIP); if (regexResult.Count == 0) { continue; } //Ignore Token 2(token[1]) & Token 3(token[2]) - Irrelevant //Token 4(token[3]) - Date information string dateValue = tokens[3].Substring(1); CultureInfo provider = CultureInfo.InvariantCulture; DateTime dtObj = DateTime.ParseExact(dateValue, "dd/MMM/yyyy:HH:mm:ss", provider); //TODO return if date is not valid string formattedDateValue = dtObj.ToString("yyyyMMdd"); //Ignore Token 5(token[4]) & Token 6(token[5]) - Irrelevant //Token 7(token[6]) Contains Module Information. string module = tokens[6]; string[] moduleTokens = module.Split('/', '?'); //TODO: We skip this for now - need to check if this is an error and log appropriately if (moduleTokens.Length <= 3) { continue; } //Specification says, the module url should start with "modules" - if that's not the case skip if (!moduleTokens.Contains("modules")) { continue; } //Ignore all the Image based URLs if (moduleTokens[moduleTokens.Length - 1].Contains(".png") || moduleTokens[moduleTokens.Length - 1].Contains(".jpg") || moduleTokens[moduleTokens.Length - 1].Contains(".bmp") || moduleTokens[moduleTokens.Length - 1].Contains(".gif") || moduleTokens[moduleTokens.Length - 1].Contains(".js") || moduleTokens[moduleTokens.Length - 1].Contains(".css")) { continue; } //Ignore all the request that came for the same time - An html page has css, js, and lot of references - they dont count towards an article that was read - round off to 1 event for 1 second. if (prevRecord == dtObj) { continue; } else { prevRecord = dtObj; } //If we reached here - we are certainly processing the record - Save any metadata information about the record. if (!dateRangeList.ContainsKey(formattedDateValue)) { //If there is no record for a date - create a record and register start and end time as that of current log entry dateRangeList.Add(formattedDateValue, new DateRange() { StartTime = DateTime.Parse(dtObj.ToString("HH:mm:ss")), EndTime = DateTime.Parse(dtObj.ToString("HH:mm:ss")) }); } else { //If there exist a record, update only the end time as we process each record DateRange rangeObj = dateRangeList[formattedDateValue]; rangeObj.EndTime = DateTime.Parse(dtObj.ToString("HH:mm:ss")); } //If there are no records for the current date - check from cloud or local and load it if (!existingRecords.ContainsKey(formattedDateValue)) { //Check if there is a JSON for this date - if exist load the JSON in memory AccessDataDetail existingObj = DeserializeJsonData(deviceId, formattedDateValue); if (existingObj != null) { existingRecords.Add(formattedDateValue, existingObj); } else { existingRecords.Add(formattedDateValue, null); } } //If there are records for current date - check if there is an overlap in time and ignore if (existingRecords[formattedDateValue] != null) { DateTime startTime = existingRecords[formattedDateValue].StartTime; DateTime endTime = existingRecords[formattedDateValue].EndTime; DateTime currrentTime = DateTime.Parse(dtObj.ToString("HH:mm:ss")); //If the current log entry is a overlapping with existing record - ignore if (currrentTime >= startTime && currrentTime <= endTime) { continue; } } //Check if the request is for Main Module if (moduleTokens[3].Contains(".htm") || moduleTokens[3].Contains("html")) { isMainModule = true; } if (!accessDataList.ContainsKey(formattedDateValue)) { //Add new value object (Dictionary) for each date IDictionary <string, AccessData> accessDataKVPair = new Dictionary <string, AccessData>(); accessDataList.Add(formattedDateValue, accessDataKVPair); } IDictionary <string, AccessData> dictionaryObj = accessDataList[formattedDateValue]; string moduleName = moduleTokens[2]; if (!dictionaryObj.ContainsKey(moduleName)) { //Add new value object (AccessData) for each record in access log AccessData obj = new AccessData() { ModuleName = moduleName, MainModuleCount = 0, SubModuleCount = 0, UpLoadTime = dtObj }; dictionaryObj.Add(moduleName, obj); } AccessData accessDataObj = dictionaryObj[moduleName]; if (isMainModule) { accessDataObj.MainModuleCount = accessDataObj.MainModuleCount + 1; } else { accessDataObj.SubModuleCount = accessDataObj.SubModuleCount + 1; } } //In the loop above - we just skip the overlapping record. Here is where the merge happens //Iterate AccessDataDetails for each date foreach (string dateValue in accessDataList.Keys) { AccessDataDetail addObj = new AccessDataDetail(); addObj.AccessDate = dateValue; addObj.DeviceId = deviceId; //Add the Start and End Date we derived from processing the current log if (dateRangeList.ContainsKey(dateValue)) { addObj.StartTime = dateRangeList[dateValue].StartTime; addObj.EndTime = dateRangeList[dateValue].EndTime; } //Add the Unmerged record from processing the current log IDictionary <string, AccessData> dictionaryObj = accessDataList[dateValue]; foreach (string moduleName in dictionaryObj.Keys) { addObj.AccessDetails.Add(dictionaryObj[moduleName]); } //Merge existing record with current record if (existingRecords.ContainsKey(dateValue)) { if (existingRecords[dateValue] != null) { if (existingRecords[dateValue].StartTime != null) { //Update Start Time with minimum of both if (existingRecords[dateValue].StartTime <= addObj.StartTime) { addObj.StartTime = existingRecords[dateValue].StartTime; } } if (existingRecords[dateValue].EndTime != null) { //Update End Time with maximum of both if (existingRecords[dateValue].EndTime >= addObj.EndTime) { addObj.EndTime = existingRecords[dateValue].EndTime; } } foreach (AccessData existingaddObj in existingRecords[dateValue].AccessDetails) { string moduleName = existingaddObj.ModuleName; //Check if record exist for this module bool matchFound = false; foreach (AccessData newaddObj in addObj.AccessDetails) { if (moduleName == newaddObj.ModuleName) { matchFound = true; newaddObj.MainModuleCount += existingaddObj.MainModuleCount; newaddObj.SubModuleCount += existingaddObj.SubModuleCount; break; } } if (matchFound == false) { addObj.AccessDetails.Add(existingaddObj); } } } } //Serialize AccessDataDetail into JSON File string jsonValue = JsonConvert.SerializeObject(addObj, Formatting.Indented); Console.WriteLine(jsonValue); Console.WriteLine(); try { string tempPath = Path.GetTempPath(); string fileName = ComputeJsonName(addObj.DeviceId, addObj.AccessDate); string filePath = Path.Combine(tempPath, fileName); StreamWriter sw = new StreamWriter(filePath); sw.WriteLine(jsonValue); sw.Close(); result.Add(filePath); } catch (Exception) { return(null); } } return(result); }