Example #1
0
        /*--------------------------------------------------------------------------
         * Functions for the CITATIONS Table.
         * Table Format:
         *     key: ArticleGuid or other unique string ID
         *     value: "ArticleCreateTimestamp|citation1|citation2|..."
         */

        // This is the function the Crawler will call every time it adds a new Article
        // to the database, when the Crawler adds it, sumultaneously populating the
        // CITATONS table for that Article.
        public static void WriteArticleRankingDataToDb(RankingType_T rankingType,
                                                       Grouping verticalId,
                                                       string articleId, // Uniquely identifies the Article within the Vertical
                                                       DateTimeOffset articleCreationTime,
                                                       string articleUrl,
                                                       string articleText)
        {
            string key   = articleId;
            string value = "";

            if (rankingType == RankingType_T.Popularity)
            {
                value = RankingDataProcessor.GetCitationsDbString(articleCreationTime,
                                                                  articleUrl,
                                                                  articleText);
            }
            else
            {
                StringBuilder sb = new StringBuilder(articleCreationTime.ToString());
                sb.Append(RankingDataProcessor.Separator).Append(articleUrl);
                sb.Append(RankingDataProcessor.Separator).Append(articleText);
                value = sb.ToString();
            }

            if (!string.IsNullOrEmpty(value))
            {
                // Write <key, value> to database CITATIONS table for Vertical
            }
        }
        public void LoadCitationsModel(RankingType_T rankingType,
                                       Dictionary <string, string> dbStr,
                                       DateTimeOffset to_time)
        {
            KeyValuePair <DateTimeOffset, List <string> > cit;
            DateTimeOffset modelTimeLimit = to_time.AddHours(-1 * Ranker.RankingHoursConsidered);
            string         matchingUrl;

            foreach (KeyValuePair <string, string> rec in dbStr)
            {
                _PrlOpt.CancellationToken.ThrowIfCancellationRequested();

                cit = RankingDataProcessor.GetCitationsFromDbString(rankingType, rec.Value);

                // Add the title Article's URL to the Index
                matchingUrl = RankingDataProcessor.GetMatchingUrl(cit.Value[0]);

                ArtcileIndexInfo idxval = new ArtcileIndexInfo();
                idxval.ArticleId       = rec.Key;
                idxval.ArtcileDatetime = cit.Key;

                try
                {
                    if (!string.IsNullOrEmpty(matchingUrl))
                    {
                        ArticleUrlIdIndex[matchingUrl] = idxval;
                    }
                }
                catch (Exception)    // This Article is already in Index
                {
                    Log.Warn("Ranker init(): Error adding Article for matching URL [{0}] to Index for Article: {1}",
                             matchingUrl,
                             rec.Key.ToString());
                }


                // Build the citation model.
                if (idxval.ArtcileDatetime >= modelTimeLimit)
                {
                    try
                    {
                        AddCitationsInArticle(cit.Key, cit.Value);
                    }
                    catch (Exception)
                    {
                        Log.Warn("Adding Citations in Article with ID [{0}] failed, URL: {1}",
                                 rec.Key.ToString(), rec.Value[0]);
                        //throw;
                    }
                }
            }
        }
        /// <summary>
        /// Add all the citations in the Article to the Citations Model.
        /// </summary>
        /// <param name="timestamp">timestamp of the parent Article</param>
        /// <param name="citations">the list of citations in the Article</param>
        private void AddCitationsInArticle(DateTimeOffset timestamp, List <string> citations)
        {
            // leave out for i=0; this is the parent Article's URL
            for (int i = 1; i < citations.Count; ++i)
            {
                _PrlOpt.CancellationToken.ThrowIfCancellationRequested();

                string matchingUrl = RankingDataProcessor.GetMatchingUrl(citations[i]);
                if (!string.IsNullOrEmpty(matchingUrl))
                {
                    try
                    {
                        AddCitation(matchingUrl, timestamp);
                    }
                    catch (Exception ex)
                    {
                        Log.Warn("AddCitation() failed at timestamp[{0}] for URL [{1}]",
                                 timestamp.ToString("yyyy/MM/dd HH:mm:ss"), citations[i]);
                    }
                }
            }
        }
Example #4
0
        /// <summary>
        /// Converts and returns the citations from the database string that stores the citations
        /// in the CITATIONS table.
        /// </summary>
        /// <param name="dbStr">The databse value string from the CITATIONS table</param>
        /// <returns>The list of citations against the time they were sited.</returns>
        ///
        public static KeyValuePair <DateTimeOffset, List <string> > GetCitationsFromDbString(
            RankingType_T rankingType,
            string dbStr)
        {
            char[] delimiters = { Separator };

            string[] parts = dbStr.Split(delimiters);

            DateTimeOffset dt = DateTimeOffset.Parse(parts[0]);

            List <string> urls = new List <string>();

            if (rankingType == RankingType_T.Popularity)
            {
                for (int i = 1; i < parts.Length; ++i)
                {
                    urls.Add(parts[i]);
                }
            }
            else if (rankingType == RankingType_T.PopularitySimilarity)
            {
                urls.Add(parts[1]); // this is the title Article's URL

                // get citations from raw text
                List <string> citations = RankingDataProcessor.GetCitations(parts[2], true);
                for (int i = 0; i < citations.Count; ++i)
                {
                    urls.Add(citations[i]);
                }
            }

            KeyValuePair <DateTimeOffset, List <string> > ct =
                new KeyValuePair <DateTimeOffset, List <string> >(dt, urls);

            return(ct);
        }