/// <summary> Merge this data with another existing set of data </summary> /// <remarks> Used when merging daily sets into a monthly set </remarks> /// <param name="Stats_DataSet"> Set of usage data to merge with this one </param> public void Merge(SobekCM_Stats_DataSet Stats_DataSet) { // Add overall stats foreach (DataRow thisRow in Stats_DataSet.SobekCM_Stats_Table.Rows) { sobekcm_stats.Rows[0][0] = Convert.ToInt32(sobekcm_stats.Rows[0][0]) + Convert.ToInt32(thisRow[0]); sobekcm_stats.Rows[0][1] = Convert.ToInt32(sobekcm_stats.Rows[0][1]) + Convert.ToInt32(thisRow[1]); sobekcm_stats.Rows[0][2] = Convert.ToInt32(sobekcm_stats.Rows[0][2]) + Convert.ToInt32(thisRow[2]); sobekcm_stats.Rows[0][3] = Convert.ToInt32(sobekcm_stats.Rows[0][3]) + Convert.ToInt32(thisRow[3]); sobekcm_stats.Rows[0][4] = Convert.ToInt32(sobekcm_stats.Rows[0][4]) + Convert.ToInt32(thisRow[4]); sobekcm_stats.Rows[0][5] = Convert.ToInt32(sobekcm_stats.Rows[0][5]) + Convert.ToInt32(thisRow[5]); } // Add collection stats foreach (DataRow thisRow in Stats_DataSet.Collection_Stats_Table.Rows) { if (collection_rows.ContainsKey(thisRow[0].ToString())) { // Sum the two rows together DataRow matchRow = collection_rows[thisRow[0].ToString()]; matchRow[1] = Convert.ToInt32(matchRow[1]) + Convert.ToInt32(thisRow[1]); matchRow[2] = Convert.ToInt32(matchRow[2]) + Convert.ToInt32(thisRow[2]); matchRow[3] = Convert.ToInt32(matchRow[3]) + Convert.ToInt32(thisRow[3]); matchRow[4] = Convert.ToInt32(matchRow[4]) + Convert.ToInt32(thisRow[4]); matchRow[5] = Convert.ToInt32(matchRow[5]) + Convert.ToInt32(thisRow[5]); } else { // Add as a new row DataRow newCollectionRow = collection_stats.NewRow(); newCollectionRow[0] = thisRow[0]; newCollectionRow[1] = thisRow[1]; newCollectionRow[2] = thisRow[2]; newCollectionRow[3] = thisRow[3]; newCollectionRow[4] = thisRow[4]; newCollectionRow[5] = thisRow[5]; collection_stats.Rows.Add(newCollectionRow); collection_rows.Add(thisRow[0].ToString(), newCollectionRow); } } // Add institutional stats foreach (DataRow thisRow in Stats_DataSet.Institution_Stats_Table.Rows) { if (institution_rows.ContainsKey(thisRow[0].ToString())) { // Sum the two rows together DataRow matchRow = institution_rows[thisRow[0].ToString()]; matchRow[1] = Convert.ToInt32(matchRow[1]) + Convert.ToInt32(thisRow[1]); matchRow[2] = Convert.ToInt32(matchRow[2]) + Convert.ToInt32(thisRow[2]); matchRow[3] = Convert.ToInt32(matchRow[3]) + Convert.ToInt32(thisRow[3]); matchRow[4] = Convert.ToInt32(matchRow[4]) + Convert.ToInt32(thisRow[4]); matchRow[5] = Convert.ToInt32(matchRow[5]) + Convert.ToInt32(thisRow[5]); } else { // Add as a new row DataRow newInstitutionRow = institution_stats.NewRow(); newInstitutionRow[0] = thisRow[0]; newInstitutionRow[1] = thisRow[1]; newInstitutionRow[2] = thisRow[2]; newInstitutionRow[3] = thisRow[3]; newInstitutionRow[4] = thisRow[4]; newInstitutionRow[5] = thisRow[5]; institution_stats.Rows.Add(newInstitutionRow); institution_rows.Add(thisRow[0].ToString(), newInstitutionRow); } } // Add the bib-level stats foreach (DataRow thisRow in Stats_DataSet.Bib_Stats_Table.Rows) { if (bib_rows.ContainsKey(thisRow[0].ToString())) { // Sum the two rows together DataRow matchRow = bib_rows[thisRow[0].ToString()]; matchRow[1] = Convert.ToInt32(matchRow[1]) + Convert.ToInt32(thisRow[1]); matchRow[2] = Convert.ToInt32(matchRow[2]) + Convert.ToInt32(thisRow[2]); } else { // Add as a new row DataRow newBibRow = bib_stats.NewRow(); newBibRow[0] = thisRow[0]; newBibRow[1] = thisRow[1]; newBibRow[2] = thisRow[2]; bib_stats.Rows.Add(newBibRow); bib_rows.Add(thisRow[0].ToString(), newBibRow); } } // Add the item-level stats foreach (DataRow thisRow in Stats_DataSet.Item_Stats_Table.Rows) { if (item_rows.ContainsKey(Convert.ToInt32(thisRow["itemID"]))) { // Sum the two rows together DataRow matchRow = item_rows[Convert.ToInt32(thisRow["itemID"])]; matchRow[3] = Convert.ToInt32(matchRow[3]) + Convert.ToInt32(thisRow[3]); matchRow[4] = Convert.ToInt32(matchRow[4]) + Convert.ToInt32(thisRow[4]); matchRow[5] = Convert.ToInt32(matchRow[5]) + Convert.ToInt32(thisRow[5]); matchRow[6] = Convert.ToInt32(matchRow[6]) + Convert.ToInt32(thisRow[6]); matchRow[7] = Convert.ToInt32(matchRow[7]) + Convert.ToInt32(thisRow[7]); matchRow[8] = Convert.ToInt32(matchRow[8]) + Convert.ToInt32(thisRow[8]); matchRow[9] = Convert.ToInt32(matchRow[9]) + Convert.ToInt32(thisRow[9]); matchRow[10] = Convert.ToInt32(matchRow[10]) + Convert.ToInt32(thisRow[10]); matchRow[11] = Convert.ToInt32(matchRow[11]) + Convert.ToInt32(thisRow[11]); matchRow[12] = Convert.ToInt32(matchRow[12]) + Convert.ToInt32(thisRow[12]); matchRow[13] = Convert.ToInt32(matchRow[13]) + Convert.ToInt32(thisRow[13]); matchRow[14] = Convert.ToInt32(matchRow[14]) + Convert.ToInt32(thisRow[14]); } else { // Add as a new row DataRow newItemRow = item_stats.NewRow(); newItemRow[0] = thisRow[0]; newItemRow[1] = thisRow[1]; newItemRow[2] = thisRow[2]; newItemRow[3] = thisRow[3]; newItemRow[4] = thisRow[4]; newItemRow[5] = thisRow[5]; newItemRow[6] = thisRow[6]; newItemRow[7] = thisRow[7]; newItemRow[8] = thisRow[8]; newItemRow[9] = thisRow[9]; newItemRow[10] = thisRow[10]; newItemRow[11] = thisRow[11]; newItemRow[12] = thisRow[12]; newItemRow[13] = thisRow[13]; newItemRow[14] = thisRow[14]; item_stats.Rows.Add(newItemRow); item_rows.Add(Convert.ToInt32(thisRow["itemID"]), newItemRow); } } // Add the IP addresses foreach (DataRow thisRow in Stats_DataSet.IP_Addresses.Rows) { if (ip_rows.ContainsKey(thisRow[0].ToString())) { // Sum the two rows together DataRow matchRow = ip_rows[thisRow[0].ToString()]; matchRow[2] = Convert.ToInt32(matchRow[2]) + Convert.ToInt32(thisRow[2]); } else { // Add as a new row DataRow newIpRow = ip_addresses.NewRow(); newIpRow[0] = thisRow[0]; newIpRow[1] = thisRow[1]; newIpRow[2] = thisRow[2]; ip_addresses.Rows.Add(newIpRow); ip_rows.Add(thisRow[0].ToString(), newIpRow); } } // Add the portal stats if (Stats_DataSet.Portal_Stats_Table != null) { foreach (DataRow thisRow in Stats_DataSet.Portal_Stats_Table.Rows) { string portal = thisRow[0].ToString().ToUpper(); if (portal_rows.ContainsKey(portal)) { // Sum the two rows together DataRow matchRow = portal_rows[portal]; matchRow[1] = Convert.ToInt32(matchRow[1]) + Convert.ToInt32(thisRow[1]); } else { // Add as a new row DataRow newPortalRow = portal_stats.NewRow(); newPortalRow[0] = portal; newPortalRow[1] = thisRow[1]; portal_stats.Rows.Add(newPortalRow); portal_rows.Add(portal, newPortalRow); } } } // Add the webcontent stats if (Stats_DataSet.WebContent_Stats_Table != null) { foreach (DataRow thisRow in Stats_DataSet.WebContent_Stats_Table.Rows) { if (webcontent_rows.ContainsKey(thisRow[0].ToString())) { // Sum the two rows together DataRow matchRow = webcontent_rows[thisRow[0].ToString()]; matchRow[1] = Convert.ToInt32(matchRow[1]) + Convert.ToInt32(thisRow[1]); } else { // Add as a new row DataRow newWebContentRow = webcontent_stats.NewRow(); newWebContentRow[0] = thisRow[0]; newWebContentRow[1] = thisRow[1]; webcontent_stats.Rows.Add(newWebContentRow); webcontent_rows.Add(thisRow[0].ToString(), newWebContentRow); } } } }
/// <summary> Process the IIS web logs to SQL insert commands </summary> public void Process_IIS_Logs() { // **** READ THE LOOKUP TABLES FROM THE DATABASE **** // DataSet lookupTables; try { lookupTables = Engine_Database.Get_Statistics_Lookup_Tables(); On_New_Status("Retrieved statistics lookup tables from the database", false ); } catch (Exception ee) { On_New_Status("Error getting statistics lookup tables from the database. " + ee.Message, true ); return; } // Ensure the lookup tables were not null if (lookupTables == null) { On_New_Status("Error getting statistics lookup tables from the database. Table is NULL", true); return; } // Determine, from the year_month, which logs to read List<string> logs_start = new List<string>(); foreach (string thisYearMonth in year_months) { logs_start.Add("u_ex" + thisYearMonth.Substring(2, 2) + thisYearMonth.Substring(4, 2)); } // ***** CODE BELOW READS ALL THE LOG FILES AND THEN WRITES THEM AS XML DATASETS *****// On_New_Status("Read all needed log files and write them as XML datasets", false); SobekCM_Log_Reader sobekcm_log_reader = new SobekCM_Log_Reader(lookupTables.Tables[0], sobekcm_web_location); string[] files = Directory.GetFiles(sobekcm_log_location, "u_ex*.log"); try { foreach (string thisFile in files) { string filename_lower = Path.GetFileName(thisFile).ToLower().Substring(0, 8); if (logs_start.Contains(filename_lower)) { On_New_Status("Processing " + (new FileInfo(thisFile)).Name, false); FileInfo fileInfo = new FileInfo(thisFile); string name = fileInfo.Name.Replace(fileInfo.Extension, ""); DateTime logDate = new DateTime(Convert.ToInt32("20" + name.Substring(4, 2)), Convert.ToInt32(name.Substring(6, 2)), Convert.ToInt32(name.Substring(8, 2))); string resultant_file = dataset_location + "\\" + logDate.Year.ToString() + logDate.Month.ToString().PadLeft(2, '0') + logDate.Day.ToString().PadLeft(2, '0') + ".xml"; if (!File.Exists(resultant_file)) sobekcm_log_reader.Read_Log(thisFile).Write_XML(dataset_location); } } } catch (Exception ee) { On_New_Status("Error reading a log file and writing as XML dataset. " + ee.Message, true); return; } // ***** CODE BELOW READS ALL THE DAILY XML DATASETS AND COMBINES THEM INTO MONTHLY *****// // ***** DATASETS WHICH ARE SUBSEQUENTLY WRITTEN AS XML DATASETS AS WELL *****// On_New_Status("Combining daily datasets into monthly datasets", false); try { foreach (string year_month in year_months) { On_New_Status("Combining " + year_month + " daily datasets into one month", false); string[] year_month_files = Directory.GetFiles(dataset_location, year_month + "*.xml"); if (year_month_files.Length > 0) { SobekCM_Stats_DataSet combined = new SobekCM_Stats_DataSet(); foreach (string file in year_month_files) { if ((new FileInfo(file)).Name.IndexOf(year_month + ".xml") < 0) { SobekCM_Stats_DataSet daily = new SobekCM_Stats_DataSet(); daily.Read_XML(file); combined.Merge(daily); } } // Write the complete data set combined.Write_XML(dataset_location, year_month + ".xml"); // Just write the highest users in a seperate, more readable, file combined.Write_Highest_Users(dataset_location, "users_" + year_month + ".xml"); } } } catch (Exception ee) { On_New_Status("Error combining daily datasets into monthly datasets. " + ee.Message, true); return; } //// ***** CODE BELOW READS THE MONTHLY DATASETS AND THEN WRITES THE SQL INSERTION SCRIPTS ***** // // Read all the data lists first for id lookups Dictionary<string, int> aggregationHash = Table_To_Hash(lookupTables.Tables[2]); Dictionary<string, int> bibHash = Table_To_Hash(lookupTables.Tables[1]); Dictionary<string, int> portalHash = new Dictionary<string, int>(); foreach (DataRow thisRow in lookupTables.Tables[3].Rows) { if (!portalHash.ContainsKey(thisRow[2].ToString().ToUpper())) { portalHash[thisRow[2].ToString().ToUpper()] = Convert.ToInt32(thisRow[0]); } } On_New_Status("Insert new statistics into database", false ); try { foreach (string yearmonth in year_months) { On_New_Status("Writing statistics for " + yearmonth, false); SobekCM_Stats_DataSet monthly; string thisFile = dataset_location + "\\" + yearmonth + ".xml"; if (File.Exists(thisFile)) { monthly = new SobekCM_Stats_DataSet(); monthly.Read_XML(thisFile); int year = Convert.ToInt32(yearmonth.Substring(0, 4)); int month = Convert.ToInt32(yearmonth.Substring(4)); monthly.Perform_SQL_Inserts(year, month, aggregationHash, bibHash, portalHash); } } } catch (Exception ee) { On_New_Status("Error saving new usage statistics into the database. " + ee.Message, true); On_New_Status("Trace from previous error: " + ee.StackTrace, true); return; } On_New_Status("COMPLETE!", false); }
/// <summary> Read a IIS web log, analyze completely, and return the corresponding <see cref="SobekCM_Stats_DataSet"/> object </summary> /// <param name="Log_File"> Location for the log file to read </param> /// <returns> Object with all the analyzed hits and sessions from the web log </returns> public SobekCM_Stats_DataSet Read_Log(string Log_File) { // Create the list of hits hits = new SortedList<SobekCM_Hit, SobekCM_Hit>(); // Create the list of sessions sessions = new Dictionary<string, SobekCM_Session>(); // Create the return set SobekCM_Stats_DataSet returnValue = new SobekCM_Stats_DataSet(); // Get the date of the log file FileInfo fileInfo = new FileInfo(Log_File); string name = fileInfo.Name.Replace(fileInfo.Extension, ""); DateTime logDate = new DateTime(Convert.ToInt32("20" + name.Substring(4, 2)), Convert.ToInt32(name.Substring(6, 2)), Convert.ToInt32(name.Substring(8, 2))); returnValue.Date = logDate; // Open a connection to the log file and save each hit StreamReader reader = new StreamReader(Log_File); string line = reader.ReadLine(); while (line != null) { parse_line(line); line = reader.ReadLine(); } // Now, step through each hit in the list foreach (SobekCM_Hit hit in hits.Values) { if (hit.SobekCM_URL.ToUpper().IndexOf(".ASPX") < 0) { // Always increment the hits returnValue.Increment_Hits(); // Add this IP hit returnValue.Add_IP_Hit(hit.IP, hit.UserAgent); // Shouldn't start with '/' if (hit.SobekCM_URL[0] == '/') { hit.SobekCM_URL = hit.SobekCM_URL.Substring(1); } hit.SobekCM_URL = hit.SobekCM_URL.ToLower(); if (hit.SobekCM_URL.IndexOf("design/webcontent/") == 0) hit.SobekCM_URL = hit.SobekCM_URL.Substring(18); // Add this as a webcontent hit returnValue.Add_WebContent_Hit(hit.SobekCM_URL); } else { // parse the url string[] splitter = hit.Query_String.ToLower().Split("&".ToCharArray()); NameValueCollection queryStringCollection = new NameValueCollection(); foreach (string thisSplit in splitter) { int equals_index = thisSplit.IndexOf("="); if ((equals_index > 0) && (equals_index < thisSplit.Length - 1)) { string query_name = thisSplit.Substring(0, equals_index); string query_value = thisSplit.Substring(equals_index + 1); queryStringCollection[query_name] = query_value; if (query_name.ToLower() == "portal") hit.SobekCM_URL = query_value; } } // Now, get the navigation object using the standard SobekCM method try { Navigation_Object currentMode = new Navigation_Object(); QueryString_Analyzer.Parse_Query(queryStringCollection, currentMode, hit.SobekCM_URL, new string[] { "en" }, Engine_ApplicationCache_Gateway.Codes, Engine_ApplicationCache_Gateway.Collection_Aliases, Engine_ApplicationCache_Gateway.Items, Engine_ApplicationCache_Gateway.URL_Portals, Engine_ApplicationCache_Gateway.WebContent_Hierarchy, null); if (currentMode != null) currentMode.Set_Robot_Flag(hit.UserAgent, hit.IP); if ((currentMode != null) && (!currentMode.Is_Robot)) { // Always increment the hits returnValue.Increment_Hits(); // Add this IP hit returnValue.Add_IP_Hit(hit.IP, hit.UserAgent); // Increment the portal hits returnValue.Add_Portal_Hit(currentMode.Instance_Name.ToUpper()); // Check for pre-existing session SobekCM_Session thisSession; if (sessions.ContainsKey(hit.IP)) { SobekCM_Session possibleSession = sessions[hit.IP]; TimeSpan difference = hit.Time.Subtract(possibleSession.Last_Hit); if (difference.TotalMinutes >= 60) { thisSession = new SobekCM_Session(hit.IP, hit.Time); sessions[hit.IP] = thisSession; returnValue.Increment_Sessions(); } else { possibleSession.Last_Hit = hit.Time; thisSession = possibleSession; } } else { thisSession = new SobekCM_Session(hit.IP, hit.Time); sessions.Add(hit.IP, thisSession); returnValue.Increment_Sessions(); } if ((currentMode.Mode == Display_Mode_Enum.Item_Display) || (currentMode.Mode == Display_Mode_Enum.Item_Print)) { if (((currentMode.ItemID_DEPRECATED.HasValue ) && ( currentMode.ItemID_DEPRECATED > 0)) || (( !String.IsNullOrEmpty(currentMode.VID)) && (!String.IsNullOrEmpty(currentMode.BibID)))) { if ((!currentMode.ItemID_DEPRECATED.HasValue ) || ( currentMode.ItemID_DEPRECATED < 0 )) { if (bib_vid_itemid_dictionary.ContainsKey(currentMode.BibID + ":" + currentMode.VID)) { currentMode.ItemID_DEPRECATED = bib_vid_itemid_dictionary[currentMode.BibID + ":" + currentMode.VID]; } } int itemid = -1; if (currentMode.ItemID_DEPRECATED.HasValue) itemid = currentMode.ItemID_DEPRECATED.Value; returnValue.Add_Item_Hit(itemid, currentMode.BibID, currentMode.VID, currentMode.ViewerCode, currentMode.Text_Search, thisSession.SessionID); } else if ( !String.IsNullOrEmpty(currentMode.BibID)) { returnValue.Add_Bib_Hit(currentMode.BibID.ToUpper(), thisSession.SessionID); } } else { string code = currentMode.Aggregation; string institution = String.Empty; if (( !String.IsNullOrEmpty(code)) && (code.ToUpper()[0] == 'I')) { institution = code; code = String.Empty; } if (( !String.IsNullOrEmpty(institution)) && (institution.ToUpper()[0] != 'I')) institution = "i" + institution; // For some collections we are counting the institution hit and collection // hit just so the full use of the site is recorded if ( !String.IsNullOrEmpty(code)) { returnValue.Add_Collection_Hit(code.ToLower(), currentMode.Mode, currentMode.Aggregation_Type, thisSession.SessionID); } // Was this an institutional level hit? if ( !String.IsNullOrEmpty(institution)) { returnValue.Add_Institution_Hit(institution.ToLower(), currentMode.Mode, currentMode.Aggregation_Type, thisSession.SessionID); } // Is this a static "webcontent" top-level page? if (currentMode.Mode == Display_Mode_Enum.Simple_HTML_CMS) { if ((currentMode.Info_Browse_Mode != "unknown") && (currentMode.Info_Browse_Mode != "default")) { returnValue.Add_WebContent_Hit(currentMode.Info_Browse_Mode.ToLower()); } } // Add the write type, if not normal HTML stuff switch (currentMode.Writer_Type) { case Writer_Type_Enum.DataSet: case Writer_Type_Enum.XML: returnValue.Add_XML_Hit(); break; case Writer_Type_Enum.OAI: returnValue.Add_OAI_Hit(); break; case Writer_Type_Enum.JSON: returnValue.Add_JSON_Hit(); break; } } } else { if ((currentMode != null) && (currentMode.Is_Robot)) returnValue.Add_Robot_Hit(); } } catch (Exception ee) { if (ee.Message.Length > 0) return null; // Do nothing.. not important? } } } return returnValue; }