Example #1
0
        /// <summary> Read a IIS web log, analyze completely, and return the corresponding <see cref="SobekCM_Stats_DataSet"/> object </summary>
        /// <param name="Log_File"> Location for the log file to read </param>
        /// <returns> Object with all the analyzed hits and sessions from the web log </returns>
        public SobekCM_Stats_DataSet Read_Log(string Log_File)
        {
            // Create the list of hits
            hits = new SortedList <SobekCM_Hit, SobekCM_Hit>();

            // Create the list of sessions
            sessions = new Dictionary <string, SobekCM_Session>();

            // Create the return set
            SobekCM_Stats_DataSet returnValue = new SobekCM_Stats_DataSet();

            // Get the date of the log file
            FileInfo fileInfo = new FileInfo(Log_File);
            string   name     = fileInfo.Name.Replace(fileInfo.Extension, "");
            DateTime logDate  = new DateTime(Convert.ToInt32("20" + name.Substring(4, 2)),
                                             Convert.ToInt32(name.Substring(6, 2)), Convert.ToInt32(name.Substring(8, 2)));

            returnValue.Date = logDate;

            // Open a connection to the log file and save each hit
            StreamReader reader = new StreamReader(Log_File);
            string       line   = reader.ReadLine();

            while (line != null)
            {
                parse_line(line);
                line = reader.ReadLine();
            }

            // Now, step through each hit in the list
            foreach (SobekCM_Hit hit in hits.Values)
            {
                if (hit.SobekCM_URL.ToUpper().IndexOf(".ASPX") < 0)
                {
                    // Always increment the hits
                    returnValue.Increment_Hits();

                    // Add this IP hit
                    returnValue.Add_IP_Hit(hit.IP, hit.UserAgent);

                    // Shouldn't start with '/'
                    if (hit.SobekCM_URL[0] == '/')
                    {
                        hit.SobekCM_URL = hit.SobekCM_URL.Substring(1);
                    }
                    hit.SobekCM_URL = hit.SobekCM_URL.ToLower();
                    if (hit.SobekCM_URL.IndexOf("design/webcontent/") == 0)
                    {
                        hit.SobekCM_URL = hit.SobekCM_URL.Substring(18);
                    }

                    // Add this as a webcontent hit
                    returnValue.Add_WebContent_Hit(hit.SobekCM_URL);
                }
                else
                {
                    // parse the url
                    string[]            splitter = hit.Query_String.ToLower().Split("&".ToCharArray());
                    NameValueCollection queryStringCollection = new NameValueCollection();
                    foreach (string thisSplit in splitter)
                    {
                        int equals_index = thisSplit.IndexOf("=");
                        if ((equals_index > 0) && (equals_index < thisSplit.Length - 1))
                        {
                            string query_name  = thisSplit.Substring(0, equals_index);
                            string query_value = thisSplit.Substring(equals_index + 1);
                            queryStringCollection[query_name] = query_value;

                            if (query_name.ToLower() == "portal")
                            {
                                hit.SobekCM_URL = query_value;
                            }
                        }
                    }

                    // Now, get the navigation object using the standard SobekCM method

                    try
                    {
                        Navigation_Object currentMode = new Navigation_Object();
                        QueryString_Analyzer.Parse_Query(queryStringCollection, currentMode, hit.SobekCM_URL,
                                                         new string[] { "en" }, Engine_ApplicationCache_Gateway.Codes, Engine_ApplicationCache_Gateway.Collection_Aliases,
                                                         Engine_ApplicationCache_Gateway.URL_Portals, Engine_ApplicationCache_Gateway.WebContent_Hierarchy,
                                                         Engine_ApplicationCache_Gateway.Settings.System.Custom_BibID_RegEx, null);

                        if (currentMode != null)
                        {
                            currentMode.Set_Robot_Flag(hit.UserAgent, hit.IP);
                        }
                        if ((currentMode != null) && (!currentMode.Is_Robot))
                        {
                            // Always increment the hits
                            returnValue.Increment_Hits();

                            // Add this IP hit
                            returnValue.Add_IP_Hit(hit.IP, hit.UserAgent);

                            // Increment the portal hits
                            returnValue.Add_Portal_Hit(currentMode.Instance_Name.ToUpper());

                            // Check for pre-existing session
                            SobekCM_Session thisSession;
                            if (sessions.ContainsKey(hit.IP))
                            {
                                SobekCM_Session possibleSession = sessions[hit.IP];
                                TimeSpan        difference      = hit.Time.Subtract(possibleSession.Last_Hit);
                                if (difference.TotalMinutes >= 60)
                                {
                                    thisSession      = new SobekCM_Session(hit.IP, hit.Time);
                                    sessions[hit.IP] = thisSession;

                                    returnValue.Increment_Sessions();
                                }
                                else
                                {
                                    possibleSession.Last_Hit = hit.Time;
                                    thisSession = possibleSession;
                                }
                            }
                            else
                            {
                                thisSession = new SobekCM_Session(hit.IP, hit.Time);
                                sessions.Add(hit.IP, thisSession);

                                returnValue.Increment_Sessions();
                            }

                            if ((currentMode.Mode == Display_Mode_Enum.Item_Display) ||
                                (currentMode.Mode == Display_Mode_Enum.Item_Print))
                            {
                                if (((currentMode.ItemID_DEPRECATED.HasValue) && (currentMode.ItemID_DEPRECATED > 0)) ||
                                    ((!String.IsNullOrEmpty(currentMode.VID)) && (!String.IsNullOrEmpty(currentMode.BibID))))
                                {
                                    if ((!currentMode.ItemID_DEPRECATED.HasValue) || (currentMode.ItemID_DEPRECATED < 0))
                                    {
                                        if (bib_vid_itemid_dictionary.ContainsKey(currentMode.BibID + ":" + currentMode.VID))
                                        {
                                            currentMode.ItemID_DEPRECATED = bib_vid_itemid_dictionary[currentMode.BibID + ":" + currentMode.VID];
                                        }
                                    }

                                    int itemid = -1;
                                    if (currentMode.ItemID_DEPRECATED.HasValue)
                                    {
                                        itemid = currentMode.ItemID_DEPRECATED.Value;
                                    }

                                    returnValue.Add_Item_Hit(itemid, currentMode.BibID,
                                                             currentMode.VID, currentMode.ViewerCode,
                                                             currentMode.Text_Search, thisSession.SessionID);
                                }
                                else if (!String.IsNullOrEmpty(currentMode.BibID))
                                {
                                    returnValue.Add_Bib_Hit(currentMode.BibID.ToUpper(), thisSession.SessionID);
                                }
                            }
                            else
                            {
                                string code        = currentMode.Aggregation;
                                string institution = String.Empty;
                                if ((!String.IsNullOrEmpty(code)) && (code.ToUpper()[0] == 'I'))
                                {
                                    institution = code;
                                    code        = String.Empty;
                                }


                                if ((!String.IsNullOrEmpty(institution)) && (institution.ToUpper()[0] != 'I'))
                                {
                                    institution = "i" + institution;
                                }

                                // For some collections we are counting the institution hit and collection
                                // hit just so the full use of the site is recorded
                                if (!String.IsNullOrEmpty(code))
                                {
                                    returnValue.Add_Collection_Hit(code.ToLower(), currentMode.Mode, currentMode.Aggregation_Type, thisSession.SessionID);
                                }

                                // Was this an institutional level hit?
                                if (!String.IsNullOrEmpty(institution))
                                {
                                    returnValue.Add_Institution_Hit(institution.ToLower(), currentMode.Mode, currentMode.Aggregation_Type, thisSession.SessionID);
                                }

                                // Is this a static "webcontent" top-level page?
                                if (currentMode.Mode == Display_Mode_Enum.Simple_HTML_CMS)
                                {
                                    if ((currentMode.Info_Browse_Mode != "unknown") &&
                                        (currentMode.Info_Browse_Mode != "default"))
                                    {
                                        returnValue.Add_WebContent_Hit(currentMode.Info_Browse_Mode.ToLower());
                                    }
                                }

                                // Add the write type, if not normal HTML stuff
                                switch (currentMode.Writer_Type)
                                {
                                case Writer_Type_Enum.DataSet:
                                case Writer_Type_Enum.XML:
                                    returnValue.Add_XML_Hit();
                                    break;

                                case Writer_Type_Enum.OAI:
                                    returnValue.Add_OAI_Hit();
                                    break;

                                case Writer_Type_Enum.JSON:
                                    returnValue.Add_JSON_Hit();
                                    break;
                                }
                            }
                        }
                        else
                        {
                            if ((currentMode != null) && (currentMode.Is_Robot))
                            {
                                returnValue.Add_Robot_Hit();
                            }
                        }
                    }
                    catch (Exception ee)
                    {
                        if (ee.Message.Length > 0)
                        {
                            return(null);
                        }
                        // Do nothing.. not important?
                    }
                }
            }

            return(returnValue);
        }
        /// <summary> Read a IIS web log, analyze completely, and return the corresponding <see cref="SobekCM_Stats_DataSet"/> object </summary>
        /// <param name="Log_File"> Location for the log file to read </param>
        /// <returns> Object with all the analyzed hits and sessions from the web log </returns>
        public SobekCM_Stats_DataSet Read_Log(string Log_File)
        {
            // Create the list of hits
            hits = new SortedList<SobekCM_Hit, SobekCM_Hit>();

            // Create the list of sessions
            sessions = new Dictionary<string, SobekCM_Session>();

            // Create the return set
            SobekCM_Stats_DataSet returnValue = new SobekCM_Stats_DataSet();

            // Get the date of the log file
            FileInfo fileInfo = new FileInfo(Log_File);
            string name = fileInfo.Name.Replace(fileInfo.Extension, "");
            DateTime logDate = new DateTime(Convert.ToInt32("20" + name.Substring(4, 2)),
                                            Convert.ToInt32(name.Substring(6, 2)), Convert.ToInt32(name.Substring(8, 2)));
            returnValue.Date = logDate;

            // Open a connection to the log file and save each hit
            StreamReader reader = new StreamReader(Log_File);
            string line = reader.ReadLine();
            while (line != null)
            {
                parse_line(line);
                line = reader.ReadLine();
            }

            // Now, step through each hit in the list
            foreach (SobekCM_Hit hit in hits.Values)
            {
                if (hit.SobekCM_URL.ToUpper().IndexOf(".ASPX") < 0)
                {
                    // Always increment the hits
                    returnValue.Increment_Hits();

                    // Add this IP hit
                    returnValue.Add_IP_Hit(hit.IP, hit.UserAgent);

                    // Shouldn't start with '/'
                    if (hit.SobekCM_URL[0] == '/')
                    {
                        hit.SobekCM_URL = hit.SobekCM_URL.Substring(1);
                    }
                    hit.SobekCM_URL = hit.SobekCM_URL.ToLower();
                    if (hit.SobekCM_URL.IndexOf("design/webcontent/") == 0)
                        hit.SobekCM_URL = hit.SobekCM_URL.Substring(18);

                    // Add this as a webcontent hit
                    returnValue.Add_WebContent_Hit(hit.SobekCM_URL);
                }
                else
                {
                    // parse the url
                    string[] splitter = hit.Query_String.ToLower().Split("&".ToCharArray());
                    NameValueCollection queryStringCollection = new NameValueCollection();
                    foreach (string thisSplit in splitter)
                    {
                        int equals_index = thisSplit.IndexOf("=");
                        if ((equals_index > 0) && (equals_index < thisSplit.Length - 1))
                        {
                            string query_name = thisSplit.Substring(0, equals_index);
                            string query_value = thisSplit.Substring(equals_index + 1);
                            queryStringCollection[query_name] = query_value;

                            if (query_name.ToLower() == "portal")
                                hit.SobekCM_URL = query_value;
                        }
                    }

                    // Now, get the navigation object using the standard SobekCM method

                    try
                    {
                        Navigation_Object currentMode = new Navigation_Object();
                        QueryString_Analyzer.Parse_Query(queryStringCollection, currentMode, hit.SobekCM_URL,
                            new string[] { "en" }, Engine_ApplicationCache_Gateway.Codes, Engine_ApplicationCache_Gateway.Collection_Aliases,
                            Engine_ApplicationCache_Gateway.Items, Engine_ApplicationCache_Gateway.URL_Portals, Engine_ApplicationCache_Gateway.WebContent_Hierarchy, null);

                        if (currentMode != null)
                            currentMode.Set_Robot_Flag(hit.UserAgent, hit.IP);
                        if ((currentMode != null) && (!currentMode.Is_Robot))
                        {
                            // Always increment the hits
                            returnValue.Increment_Hits();

                            // Add this IP hit
                            returnValue.Add_IP_Hit(hit.IP, hit.UserAgent);

                            // Increment the portal hits
                            returnValue.Add_Portal_Hit(currentMode.Instance_Name.ToUpper());

                            // Check for pre-existing session
                            SobekCM_Session thisSession;
                            if (sessions.ContainsKey(hit.IP))
                            {
                                SobekCM_Session possibleSession = sessions[hit.IP];
                                TimeSpan difference = hit.Time.Subtract(possibleSession.Last_Hit);
                                if (difference.TotalMinutes >= 60)
                                {
                                    thisSession = new SobekCM_Session(hit.IP, hit.Time);
                                    sessions[hit.IP] = thisSession;

                                    returnValue.Increment_Sessions();
                                }
                                else
                                {
                                    possibleSession.Last_Hit = hit.Time;
                                    thisSession = possibleSession;
                                }
                            }
                            else
                            {
                                thisSession = new SobekCM_Session(hit.IP, hit.Time);
                                sessions.Add(hit.IP, thisSession);

                                returnValue.Increment_Sessions();
                            }

                            if ((currentMode.Mode == Display_Mode_Enum.Item_Display) ||
                                (currentMode.Mode == Display_Mode_Enum.Item_Print))
                            {
                                if (((currentMode.ItemID_DEPRECATED.HasValue ) && ( currentMode.ItemID_DEPRECATED > 0)) ||
                                    (( !String.IsNullOrEmpty(currentMode.VID)) && (!String.IsNullOrEmpty(currentMode.BibID))))
                                {
                                    if ((!currentMode.ItemID_DEPRECATED.HasValue ) || ( currentMode.ItemID_DEPRECATED < 0 ))
                                    {
                                        if (bib_vid_itemid_dictionary.ContainsKey(currentMode.BibID + ":" + currentMode.VID))
                                        {
                                            currentMode.ItemID_DEPRECATED = bib_vid_itemid_dictionary[currentMode.BibID + ":" + currentMode.VID];
                                        }
                                    }

                                    int itemid = -1;
                                    if (currentMode.ItemID_DEPRECATED.HasValue)
                                        itemid = currentMode.ItemID_DEPRECATED.Value;

                                    returnValue.Add_Item_Hit(itemid, currentMode.BibID,
                                                             currentMode.VID, currentMode.ViewerCode,
                                                             currentMode.Text_Search, thisSession.SessionID);
                                }
                                else if ( !String.IsNullOrEmpty(currentMode.BibID))
                                {
                                    returnValue.Add_Bib_Hit(currentMode.BibID.ToUpper(), thisSession.SessionID);
                                }
                            }
                            else
                            {
                                string code = currentMode.Aggregation;
                                string institution = String.Empty;
                                if (( !String.IsNullOrEmpty(code)) && (code.ToUpper()[0] == 'I'))
                                {
                                    institution = code;
                                    code = String.Empty;
                                }

                                if (( !String.IsNullOrEmpty(institution)) && (institution.ToUpper()[0] != 'I'))
                                    institution = "i" + institution;

                                // For some collections we are counting the institution hit and collection
                                // hit just so the full use of the site is recorded
                                if ( !String.IsNullOrEmpty(code))
                                {
                                    returnValue.Add_Collection_Hit(code.ToLower(), currentMode.Mode, currentMode.Aggregation_Type, thisSession.SessionID);
                                }

                                // Was this an institutional level hit?
                                if ( !String.IsNullOrEmpty(institution))
                                {
                                    returnValue.Add_Institution_Hit(institution.ToLower(), currentMode.Mode, currentMode.Aggregation_Type, thisSession.SessionID);
                                }

                                // Is this a static "webcontent" top-level page?
                                if (currentMode.Mode == Display_Mode_Enum.Simple_HTML_CMS)
                                {
                                    if ((currentMode.Info_Browse_Mode != "unknown") &&
                                        (currentMode.Info_Browse_Mode != "default"))
                                    {
                                        returnValue.Add_WebContent_Hit(currentMode.Info_Browse_Mode.ToLower());
                                    }
                                }

                                // Add the write type, if not normal HTML stuff
                                switch (currentMode.Writer_Type)
                                {
                                    case Writer_Type_Enum.DataSet:
                                    case Writer_Type_Enum.XML:
                                        returnValue.Add_XML_Hit();
                                        break;

                                    case Writer_Type_Enum.OAI:
                                        returnValue.Add_OAI_Hit();
                                        break;

                                    case Writer_Type_Enum.JSON:
                                        returnValue.Add_JSON_Hit();
                                        break;
                                }
                            }
                        }
                        else
                        {
                            if ((currentMode != null) && (currentMode.Is_Robot))
                                returnValue.Add_Robot_Hit();
                        }
                    }
                    catch (Exception ee)
                    {
                        if (ee.Message.Length > 0)
                            return null;
                        // Do nothing.. not important?
                    }
                }
            }

            return returnValue;
        }