/// <summary> Read a IIS web log, analyze completely, and return the corresponding <see cref="SobekCM_Stats_DataSet"/> object </summary> /// <param name="Log_File"> Location for the log file to read </param> /// <returns> Object with all the analyzed hits and sessions from the web log </returns> public SobekCM_Stats_DataSet Read_Log(string Log_File) { // Create the list of hits hits = new SortedList <SobekCM_Hit, SobekCM_Hit>(); // Create the list of sessions sessions = new Dictionary <string, SobekCM_Session>(); // Create the return set SobekCM_Stats_DataSet returnValue = new SobekCM_Stats_DataSet(); // Get the date of the log file FileInfo fileInfo = new FileInfo(Log_File); string name = fileInfo.Name.Replace(fileInfo.Extension, ""); DateTime logDate = new DateTime(Convert.ToInt32("20" + name.Substring(4, 2)), Convert.ToInt32(name.Substring(6, 2)), Convert.ToInt32(name.Substring(8, 2))); returnValue.Date = logDate; // Open a connection to the log file and save each hit StreamReader reader = new StreamReader(Log_File); string line = reader.ReadLine(); while (line != null) { parse_line(line); line = reader.ReadLine(); } // Now, step through each hit in the list foreach (SobekCM_Hit hit in hits.Values) { if (hit.SobekCM_URL.ToUpper().IndexOf(".ASPX") < 0) { // Always increment the hits returnValue.Increment_Hits(); // Add this IP hit returnValue.Add_IP_Hit(hit.IP, hit.UserAgent); // Shouldn't start with '/' if (hit.SobekCM_URL[0] == '/') { hit.SobekCM_URL = hit.SobekCM_URL.Substring(1); } hit.SobekCM_URL = hit.SobekCM_URL.ToLower(); if (hit.SobekCM_URL.IndexOf("design/webcontent/") == 0) { hit.SobekCM_URL = hit.SobekCM_URL.Substring(18); } // Add this as a webcontent hit returnValue.Add_WebContent_Hit(hit.SobekCM_URL); } else { // parse the url string[] splitter = hit.Query_String.ToLower().Split("&".ToCharArray()); NameValueCollection queryStringCollection = new NameValueCollection(); foreach (string thisSplit in splitter) { int equals_index = thisSplit.IndexOf("="); if ((equals_index > 0) && (equals_index < thisSplit.Length - 1)) { string query_name = thisSplit.Substring(0, equals_index); string query_value = thisSplit.Substring(equals_index + 1); queryStringCollection[query_name] = query_value; if (query_name.ToLower() == "portal") { hit.SobekCM_URL = query_value; } } } // Now, get the navigation object using the standard SobekCM method try { Navigation_Object currentMode = new Navigation_Object(); QueryString_Analyzer.Parse_Query(queryStringCollection, currentMode, hit.SobekCM_URL, new string[] { "en" }, Engine_ApplicationCache_Gateway.Codes, Engine_ApplicationCache_Gateway.Collection_Aliases, Engine_ApplicationCache_Gateway.URL_Portals, Engine_ApplicationCache_Gateway.WebContent_Hierarchy, Engine_ApplicationCache_Gateway.Settings.System.Custom_BibID_RegEx, null); if (currentMode != null) { currentMode.Set_Robot_Flag(hit.UserAgent, hit.IP); } if ((currentMode != null) && (!currentMode.Is_Robot)) { // Always increment the hits returnValue.Increment_Hits(); // Add this IP hit returnValue.Add_IP_Hit(hit.IP, hit.UserAgent); // Increment the portal hits returnValue.Add_Portal_Hit(currentMode.Instance_Name.ToUpper()); // Check for pre-existing session SobekCM_Session thisSession; if (sessions.ContainsKey(hit.IP)) { SobekCM_Session possibleSession = sessions[hit.IP]; TimeSpan difference = hit.Time.Subtract(possibleSession.Last_Hit); if (difference.TotalMinutes >= 60) { thisSession = new SobekCM_Session(hit.IP, hit.Time); sessions[hit.IP] = thisSession; returnValue.Increment_Sessions(); } else { possibleSession.Last_Hit = hit.Time; thisSession = possibleSession; } } else { thisSession = new SobekCM_Session(hit.IP, hit.Time); sessions.Add(hit.IP, thisSession); returnValue.Increment_Sessions(); } if ((currentMode.Mode == Display_Mode_Enum.Item_Display) || (currentMode.Mode == Display_Mode_Enum.Item_Print)) { if (((currentMode.ItemID_DEPRECATED.HasValue) && (currentMode.ItemID_DEPRECATED > 0)) || ((!String.IsNullOrEmpty(currentMode.VID)) && (!String.IsNullOrEmpty(currentMode.BibID)))) { if ((!currentMode.ItemID_DEPRECATED.HasValue) || (currentMode.ItemID_DEPRECATED < 0)) { if (bib_vid_itemid_dictionary.ContainsKey(currentMode.BibID + ":" + currentMode.VID)) { currentMode.ItemID_DEPRECATED = bib_vid_itemid_dictionary[currentMode.BibID + ":" + currentMode.VID]; } } int itemid = -1; if (currentMode.ItemID_DEPRECATED.HasValue) { itemid = currentMode.ItemID_DEPRECATED.Value; } returnValue.Add_Item_Hit(itemid, currentMode.BibID, currentMode.VID, currentMode.ViewerCode, currentMode.Text_Search, thisSession.SessionID); } else if (!String.IsNullOrEmpty(currentMode.BibID)) { returnValue.Add_Bib_Hit(currentMode.BibID.ToUpper(), thisSession.SessionID); } } else { string code = currentMode.Aggregation; string institution = String.Empty; if ((!String.IsNullOrEmpty(code)) && (code.ToUpper()[0] == 'I')) { institution = code; code = String.Empty; } if ((!String.IsNullOrEmpty(institution)) && (institution.ToUpper()[0] != 'I')) { institution = "i" + institution; } // For some collections we are counting the institution hit and collection // hit just so the full use of the site is recorded if (!String.IsNullOrEmpty(code)) { returnValue.Add_Collection_Hit(code.ToLower(), currentMode.Mode, currentMode.Aggregation_Type, thisSession.SessionID); } // Was this an institutional level hit? if (!String.IsNullOrEmpty(institution)) { returnValue.Add_Institution_Hit(institution.ToLower(), currentMode.Mode, currentMode.Aggregation_Type, thisSession.SessionID); } // Is this a static "webcontent" top-level page? if (currentMode.Mode == Display_Mode_Enum.Simple_HTML_CMS) { if ((currentMode.Info_Browse_Mode != "unknown") && (currentMode.Info_Browse_Mode != "default")) { returnValue.Add_WebContent_Hit(currentMode.Info_Browse_Mode.ToLower()); } } // Add the write type, if not normal HTML stuff switch (currentMode.Writer_Type) { case Writer_Type_Enum.DataSet: case Writer_Type_Enum.XML: returnValue.Add_XML_Hit(); break; case Writer_Type_Enum.OAI: returnValue.Add_OAI_Hit(); break; case Writer_Type_Enum.JSON: returnValue.Add_JSON_Hit(); break; } } } else { if ((currentMode != null) && (currentMode.Is_Robot)) { returnValue.Add_Robot_Hit(); } } } catch (Exception ee) { if (ee.Message.Length > 0) { return(null); } // Do nothing.. not important? } } } return(returnValue); }
/// <summary> Read a IIS web log, analyze completely, and return the corresponding <see cref="SobekCM_Stats_DataSet"/> object </summary> /// <param name="Log_File"> Location for the log file to read </param> /// <returns> Object with all the analyzed hits and sessions from the web log </returns> public SobekCM_Stats_DataSet Read_Log(string Log_File) { // Create the list of hits hits = new SortedList<SobekCM_Hit, SobekCM_Hit>(); // Create the list of sessions sessions = new Dictionary<string, SobekCM_Session>(); // Create the return set SobekCM_Stats_DataSet returnValue = new SobekCM_Stats_DataSet(); // Get the date of the log file FileInfo fileInfo = new FileInfo(Log_File); string name = fileInfo.Name.Replace(fileInfo.Extension, ""); DateTime logDate = new DateTime(Convert.ToInt32("20" + name.Substring(4, 2)), Convert.ToInt32(name.Substring(6, 2)), Convert.ToInt32(name.Substring(8, 2))); returnValue.Date = logDate; // Open a connection to the log file and save each hit StreamReader reader = new StreamReader(Log_File); string line = reader.ReadLine(); while (line != null) { parse_line(line); line = reader.ReadLine(); } // Now, step through each hit in the list foreach (SobekCM_Hit hit in hits.Values) { if (hit.SobekCM_URL.ToUpper().IndexOf(".ASPX") < 0) { // Always increment the hits returnValue.Increment_Hits(); // Add this IP hit returnValue.Add_IP_Hit(hit.IP, hit.UserAgent); // Shouldn't start with '/' if (hit.SobekCM_URL[0] == '/') { hit.SobekCM_URL = hit.SobekCM_URL.Substring(1); } hit.SobekCM_URL = hit.SobekCM_URL.ToLower(); if (hit.SobekCM_URL.IndexOf("design/webcontent/") == 0) hit.SobekCM_URL = hit.SobekCM_URL.Substring(18); // Add this as a webcontent hit returnValue.Add_WebContent_Hit(hit.SobekCM_URL); } else { // parse the url string[] splitter = hit.Query_String.ToLower().Split("&".ToCharArray()); NameValueCollection queryStringCollection = new NameValueCollection(); foreach (string thisSplit in splitter) { int equals_index = thisSplit.IndexOf("="); if ((equals_index > 0) && (equals_index < thisSplit.Length - 1)) { string query_name = thisSplit.Substring(0, equals_index); string query_value = thisSplit.Substring(equals_index + 1); queryStringCollection[query_name] = query_value; if (query_name.ToLower() == "portal") hit.SobekCM_URL = query_value; } } // Now, get the navigation object using the standard SobekCM method try { Navigation_Object currentMode = new Navigation_Object(); QueryString_Analyzer.Parse_Query(queryStringCollection, currentMode, hit.SobekCM_URL, new string[] { "en" }, Engine_ApplicationCache_Gateway.Codes, Engine_ApplicationCache_Gateway.Collection_Aliases, Engine_ApplicationCache_Gateway.Items, Engine_ApplicationCache_Gateway.URL_Portals, Engine_ApplicationCache_Gateway.WebContent_Hierarchy, null); if (currentMode != null) currentMode.Set_Robot_Flag(hit.UserAgent, hit.IP); if ((currentMode != null) && (!currentMode.Is_Robot)) { // Always increment the hits returnValue.Increment_Hits(); // Add this IP hit returnValue.Add_IP_Hit(hit.IP, hit.UserAgent); // Increment the portal hits returnValue.Add_Portal_Hit(currentMode.Instance_Name.ToUpper()); // Check for pre-existing session SobekCM_Session thisSession; if (sessions.ContainsKey(hit.IP)) { SobekCM_Session possibleSession = sessions[hit.IP]; TimeSpan difference = hit.Time.Subtract(possibleSession.Last_Hit); if (difference.TotalMinutes >= 60) { thisSession = new SobekCM_Session(hit.IP, hit.Time); sessions[hit.IP] = thisSession; returnValue.Increment_Sessions(); } else { possibleSession.Last_Hit = hit.Time; thisSession = possibleSession; } } else { thisSession = new SobekCM_Session(hit.IP, hit.Time); sessions.Add(hit.IP, thisSession); returnValue.Increment_Sessions(); } if ((currentMode.Mode == Display_Mode_Enum.Item_Display) || (currentMode.Mode == Display_Mode_Enum.Item_Print)) { if (((currentMode.ItemID_DEPRECATED.HasValue ) && ( currentMode.ItemID_DEPRECATED > 0)) || (( !String.IsNullOrEmpty(currentMode.VID)) && (!String.IsNullOrEmpty(currentMode.BibID)))) { if ((!currentMode.ItemID_DEPRECATED.HasValue ) || ( currentMode.ItemID_DEPRECATED < 0 )) { if (bib_vid_itemid_dictionary.ContainsKey(currentMode.BibID + ":" + currentMode.VID)) { currentMode.ItemID_DEPRECATED = bib_vid_itemid_dictionary[currentMode.BibID + ":" + currentMode.VID]; } } int itemid = -1; if (currentMode.ItemID_DEPRECATED.HasValue) itemid = currentMode.ItemID_DEPRECATED.Value; returnValue.Add_Item_Hit(itemid, currentMode.BibID, currentMode.VID, currentMode.ViewerCode, currentMode.Text_Search, thisSession.SessionID); } else if ( !String.IsNullOrEmpty(currentMode.BibID)) { returnValue.Add_Bib_Hit(currentMode.BibID.ToUpper(), thisSession.SessionID); } } else { string code = currentMode.Aggregation; string institution = String.Empty; if (( !String.IsNullOrEmpty(code)) && (code.ToUpper()[0] == 'I')) { institution = code; code = String.Empty; } if (( !String.IsNullOrEmpty(institution)) && (institution.ToUpper()[0] != 'I')) institution = "i" + institution; // For some collections we are counting the institution hit and collection // hit just so the full use of the site is recorded if ( !String.IsNullOrEmpty(code)) { returnValue.Add_Collection_Hit(code.ToLower(), currentMode.Mode, currentMode.Aggregation_Type, thisSession.SessionID); } // Was this an institutional level hit? if ( !String.IsNullOrEmpty(institution)) { returnValue.Add_Institution_Hit(institution.ToLower(), currentMode.Mode, currentMode.Aggregation_Type, thisSession.SessionID); } // Is this a static "webcontent" top-level page? if (currentMode.Mode == Display_Mode_Enum.Simple_HTML_CMS) { if ((currentMode.Info_Browse_Mode != "unknown") && (currentMode.Info_Browse_Mode != "default")) { returnValue.Add_WebContent_Hit(currentMode.Info_Browse_Mode.ToLower()); } } // Add the write type, if not normal HTML stuff switch (currentMode.Writer_Type) { case Writer_Type_Enum.DataSet: case Writer_Type_Enum.XML: returnValue.Add_XML_Hit(); break; case Writer_Type_Enum.OAI: returnValue.Add_OAI_Hit(); break; case Writer_Type_Enum.JSON: returnValue.Add_JSON_Hit(); break; } } } else { if ((currentMode != null) && (currentMode.Is_Robot)) returnValue.Add_Robot_Hit(); } } catch (Exception ee) { if (ee.Message.Length > 0) return null; // Do nothing.. not important? } } } return returnValue; }