Example #1
0
        /*--------------------------------------------------------------------------
         * Functions for the CITATIONS Table.
         * Table Format:
         *     key: ArticleGuid or other unique string ID
         *     value: "ArticleCreateTimestamp|citation1|citation2|..."
         */

        // This is the function the Crawler will call every time it adds a new Article
        // to the database, when the Crawler adds it, sumultaneously populating the
        // CITATONS table for that Article.
        public static void WriteArticleRankingDataToDb(RankingType_T rankingType,
                                                       Grouping verticalId,
                                                       string articleId, // Uniquely identifies the Article within the Vertical
                                                       DateTimeOffset articleCreationTime,
                                                       string articleUrl,
                                                       string articleText)
        {
            string key   = articleId;
            string value = "";

            if (rankingType == RankingType_T.Popularity)
            {
                value = RankingDataProcessor.GetCitationsDbString(articleCreationTime,
                                                                  articleUrl,
                                                                  articleText);
            }
            else
            {
                StringBuilder sb = new StringBuilder(articleCreationTime.ToString());
                sb.Append(RankingDataProcessor.Separator).Append(articleUrl);
                sb.Append(RankingDataProcessor.Separator).Append(articleText);
                value = sb.ToString();
            }

            if (!string.IsNullOrEmpty(value))
            {
                // Write <key, value> to database CITATIONS table for Vertical
            }
        }
        public void LoadCitationsModel(RankingType_T rankingType,
                                       Dictionary <string, string> dbStr,
                                       DateTimeOffset to_time)
        {
            KeyValuePair <DateTimeOffset, List <string> > cit;
            DateTimeOffset modelTimeLimit = to_time.AddHours(-1 * Ranker.RankingHoursConsidered);
            string         matchingUrl;

            foreach (KeyValuePair <string, string> rec in dbStr)
            {
                _PrlOpt.CancellationToken.ThrowIfCancellationRequested();

                cit = RankingDataProcessor.GetCitationsFromDbString(rankingType, rec.Value);

                // Add the title Article's URL to the Index
                matchingUrl = RankingDataProcessor.GetMatchingUrl(cit.Value[0]);

                ArtcileIndexInfo idxval = new ArtcileIndexInfo();
                idxval.ArticleId       = rec.Key;
                idxval.ArtcileDatetime = cit.Key;

                try
                {
                    if (!string.IsNullOrEmpty(matchingUrl))
                    {
                        ArticleUrlIdIndex[matchingUrl] = idxval;
                    }
                }
                catch (Exception)    // This Article is already in Index
                {
                    Log.Warn("Ranker init(): Error adding Article for matching URL [{0}] to Index for Article: {1}",
                             matchingUrl,
                             rec.Key.ToString());
                }


                // Build the citation model.
                if (idxval.ArtcileDatetime >= modelTimeLimit)
                {
                    try
                    {
                        AddCitationsInArticle(cit.Key, cit.Value);
                    }
                    catch (Exception)
                    {
                        Log.Warn("Adding Citations in Article with ID [{0}] failed, URL: {1}",
                                 rec.Key.ToString(), rec.Value[0]);
                        //throw;
                    }
                }
            }
        }
Example #3
0
        // The Ranker will call this functions to read the relevant citations to populate
        // the Citations Model and do the ranking.
        public static Dictionary <string, string> ReadRankingDataFromDb(RankingType_T rankingType,
                                                                        Grouping verticalId,
                                                                        DateTimeOffset fromTime,
                                                                        DateTimeOffset toTime)
        {
            Dictionary <string, string> citations = new Dictionary <string, string>();

            // Extract data from the citations table durion period: fromTime - toTime
            // store them as (<Article ID string>, <DB read value string>) pair in the Dictionary
            // for this Vertical.

            return(citations);
        }
Example #4
0
        public static RankingType_T GetRankingType(Grouping vertical)
        {
            RankingType_T rtype = RankingType_T.Popularity;

            string key = null, value = null;

            switch (vertical)
            {
            case Grouping.FashionMen: key = "FasionMenRankingType";
                break;

            case Grouping.FashionWomen: key = "FasionWomenRankingType";
                break;

            case Grouping.HipHop: key = "FasionTbsrRankingType";
                break;

            default: key = null; break;
            }

            if (String.IsNullOrEmpty(key))
            {
                return(RankingType_T.Popularity);
            }

            try
            {
                value = ConfigurationManager.AppSettings["RankerRunPeriodMins"].ToLower();

                if (value == "popularitysimilarity")
                {
                    rtype = RankingType_T.PopularitySimilarity;
                }
                else
                {
                    rtype = RankingType_T.Popularity;
                }
            }
            catch (Exception)
            {
                rtype = RankingType_T.Popularity;
            }

            return(rtype);
        }
        /// <summary>
        /// Builds the initial Citation Model and the Index.
        /// </summary>
        public void Init(Grouping verticalId,
                         RankingType_T rankingType,
                         Dictionary <string, string> rankingData,
                         CancellationTokenSource cancellationToken)
        {
            _verticalId = verticalId;

            _cToken = cancellationToken;
            _PrlOpt = new ParallelOptions()
            {
                MaxDegreeOfParallelism = System.Environment.ProcessorCount,
                CancellationToken      = cancellationToken.Token
            };

            // Timeframe considered
            DateTimeOffset toTime     = DateTimeOffset.Now;
            uint           readPeriod = Ranker.RankingHoursConsidered + Ranker.RankingHoursBuffer;
            DateTimeOffset fromTime   = toTime.AddHours(-1 * readPeriod);


            // Build the initial citations model
            try
            {
                LoadCitationsModel(rankingType, rankingData, toTime);
            }
            catch (Exception ex)
            {
                Log.Error("Ranker Init(): LoadCitationsModel() failed.");
                throw ex;
            }
            Log.Info("Initial Citation Model build completed.");


            try
            {
                SetTimeWeightedCitationPopularity();
            }
            catch (Exception ex)
            {
                Log.Error("Citation Popularity Calculation failed in Init(). ");
                throw ex;
            }

            Log.Info("Citation Polularity scores set.");
        }
Example #6
0
        public static KeyValuePair <DateTimeOffset, string> GetSimilarityDataFromDbString(
            RankingType_T rankingType,
            string dbStr)
        {
            if (rankingType != RankingType_T.PopularitySimilarity)
            {
                Ranker.Log.Error("[CitationProcessor.GetSimilarityDataFromDbString() failed. "
                                 + " RankingType does not include Similarity Ranking.");
                return(new KeyValuePair <DateTimeOffset, string>());
            }

            char[] delimiters = { Separator };

            string[] parts = dbStr.Split(delimiters);

            DateTimeOffset dt = DateTimeOffset.Parse(parts[0]);

            return(new KeyValuePair <DateTimeOffset, string>(dt, parts[2]));
        }
Example #7
0
        /// <summary>
        /// Converts and returns the citations from the database string that stores the citations
        /// in the CITATIONS table.
        /// </summary>
        /// <param name="dbStr">The databse value string from the CITATIONS table</param>
        /// <returns>The list of citations against the time they were sited.</returns>
        ///
        public static KeyValuePair <DateTimeOffset, List <string> > GetCitationsFromDbString(
            RankingType_T rankingType,
            string dbStr)
        {
            char[] delimiters = { Separator };

            string[] parts = dbStr.Split(delimiters);

            DateTimeOffset dt = DateTimeOffset.Parse(parts[0]);

            List <string> urls = new List <string>();

            if (rankingType == RankingType_T.Popularity)
            {
                for (int i = 1; i < parts.Length; ++i)
                {
                    urls.Add(parts[i]);
                }
            }
            else if (rankingType == RankingType_T.PopularitySimilarity)
            {
                urls.Add(parts[1]); // this is the title Article's URL

                // get citations from raw text
                List <string> citations = RankingDataProcessor.GetCitations(parts[2], true);
                for (int i = 0; i < citations.Count; ++i)
                {
                    urls.Add(citations[i]);
                }
            }

            KeyValuePair <DateTimeOffset, List <string> > ct =
                new KeyValuePair <DateTimeOffset, List <string> >(dt, urls);

            return(ct);
        }
Example #8
0
 Ranker(Grouping verticalId, RankingType_T rankingType)
 {
     VerticalId  = verticalId;
     RankingType = rankingType;
 }