/*-------------------------------------------------------------------------- * Functions for the CITATIONS Table. * Table Format: * key: ArticleGuid or other unique string ID * value: "ArticleCreateTimestamp|citation1|citation2|..." */ // This is the function the Crawler will call every time it adds a new Article // to the database, when the Crawler adds it, sumultaneously populating the // CITATONS table for that Article. public static void WriteArticleRankingDataToDb(RankingType_T rankingType, Grouping verticalId, string articleId, // Uniquely identifies the Article within the Vertical DateTimeOffset articleCreationTime, string articleUrl, string articleText) { string key = articleId; string value = ""; if (rankingType == RankingType_T.Popularity) { value = RankingDataProcessor.GetCitationsDbString(articleCreationTime, articleUrl, articleText); } else { StringBuilder sb = new StringBuilder(articleCreationTime.ToString()); sb.Append(RankingDataProcessor.Separator).Append(articleUrl); sb.Append(RankingDataProcessor.Separator).Append(articleText); value = sb.ToString(); } if (!string.IsNullOrEmpty(value)) { // Write <key, value> to database CITATIONS table for Vertical } }
public void LoadCitationsModel(RankingType_T rankingType, Dictionary <string, string> dbStr, DateTimeOffset to_time) { KeyValuePair <DateTimeOffset, List <string> > cit; DateTimeOffset modelTimeLimit = to_time.AddHours(-1 * Ranker.RankingHoursConsidered); string matchingUrl; foreach (KeyValuePair <string, string> rec in dbStr) { _PrlOpt.CancellationToken.ThrowIfCancellationRequested(); cit = RankingDataProcessor.GetCitationsFromDbString(rankingType, rec.Value); // Add the title Article's URL to the Index matchingUrl = RankingDataProcessor.GetMatchingUrl(cit.Value[0]); ArtcileIndexInfo idxval = new ArtcileIndexInfo(); idxval.ArticleId = rec.Key; idxval.ArtcileDatetime = cit.Key; try { if (!string.IsNullOrEmpty(matchingUrl)) { ArticleUrlIdIndex[matchingUrl] = idxval; } } catch (Exception) // This Article is already in Index { Log.Warn("Ranker init(): Error adding Article for matching URL [{0}] to Index for Article: {1}", matchingUrl, rec.Key.ToString()); } // Build the citation model. if (idxval.ArtcileDatetime >= modelTimeLimit) { try { AddCitationsInArticle(cit.Key, cit.Value); } catch (Exception) { Log.Warn("Adding Citations in Article with ID [{0}] failed, URL: {1}", rec.Key.ToString(), rec.Value[0]); //throw; } } } }
// The Ranker will call this functions to read the relevant citations to populate // the Citations Model and do the ranking. public static Dictionary <string, string> ReadRankingDataFromDb(RankingType_T rankingType, Grouping verticalId, DateTimeOffset fromTime, DateTimeOffset toTime) { Dictionary <string, string> citations = new Dictionary <string, string>(); // Extract data from the citations table durion period: fromTime - toTime // store them as (<Article ID string>, <DB read value string>) pair in the Dictionary // for this Vertical. return(citations); }
public static RankingType_T GetRankingType(Grouping vertical) { RankingType_T rtype = RankingType_T.Popularity; string key = null, value = null; switch (vertical) { case Grouping.FashionMen: key = "FasionMenRankingType"; break; case Grouping.FashionWomen: key = "FasionWomenRankingType"; break; case Grouping.HipHop: key = "FasionTbsrRankingType"; break; default: key = null; break; } if (String.IsNullOrEmpty(key)) { return(RankingType_T.Popularity); } try { value = ConfigurationManager.AppSettings["RankerRunPeriodMins"].ToLower(); if (value == "popularitysimilarity") { rtype = RankingType_T.PopularitySimilarity; } else { rtype = RankingType_T.Popularity; } } catch (Exception) { rtype = RankingType_T.Popularity; } return(rtype); }
/// <summary> /// Builds the initial Citation Model and the Index. /// </summary> public void Init(Grouping verticalId, RankingType_T rankingType, Dictionary <string, string> rankingData, CancellationTokenSource cancellationToken) { _verticalId = verticalId; _cToken = cancellationToken; _PrlOpt = new ParallelOptions() { MaxDegreeOfParallelism = System.Environment.ProcessorCount, CancellationToken = cancellationToken.Token }; // Timeframe considered DateTimeOffset toTime = DateTimeOffset.Now; uint readPeriod = Ranker.RankingHoursConsidered + Ranker.RankingHoursBuffer; DateTimeOffset fromTime = toTime.AddHours(-1 * readPeriod); // Build the initial citations model try { LoadCitationsModel(rankingType, rankingData, toTime); } catch (Exception ex) { Log.Error("Ranker Init(): LoadCitationsModel() failed."); throw ex; } Log.Info("Initial Citation Model build completed."); try { SetTimeWeightedCitationPopularity(); } catch (Exception ex) { Log.Error("Citation Popularity Calculation failed in Init(). "); throw ex; } Log.Info("Citation Polularity scores set."); }
public static KeyValuePair <DateTimeOffset, string> GetSimilarityDataFromDbString( RankingType_T rankingType, string dbStr) { if (rankingType != RankingType_T.PopularitySimilarity) { Ranker.Log.Error("[CitationProcessor.GetSimilarityDataFromDbString() failed. " + " RankingType does not include Similarity Ranking."); return(new KeyValuePair <DateTimeOffset, string>()); } char[] delimiters = { Separator }; string[] parts = dbStr.Split(delimiters); DateTimeOffset dt = DateTimeOffset.Parse(parts[0]); return(new KeyValuePair <DateTimeOffset, string>(dt, parts[2])); }
/// <summary> /// Converts and returns the citations from the database string that stores the citations /// in the CITATIONS table. /// </summary> /// <param name="dbStr">The databse value string from the CITATIONS table</param> /// <returns>The list of citations against the time they were sited.</returns> /// public static KeyValuePair <DateTimeOffset, List <string> > GetCitationsFromDbString( RankingType_T rankingType, string dbStr) { char[] delimiters = { Separator }; string[] parts = dbStr.Split(delimiters); DateTimeOffset dt = DateTimeOffset.Parse(parts[0]); List <string> urls = new List <string>(); if (rankingType == RankingType_T.Popularity) { for (int i = 1; i < parts.Length; ++i) { urls.Add(parts[i]); } } else if (rankingType == RankingType_T.PopularitySimilarity) { urls.Add(parts[1]); // this is the title Article's URL // get citations from raw text List <string> citations = RankingDataProcessor.GetCitations(parts[2], true); for (int i = 0; i < citations.Count; ++i) { urls.Add(citations[i]); } } KeyValuePair <DateTimeOffset, List <string> > ct = new KeyValuePair <DateTimeOffset, List <string> >(dt, urls); return(ct); }
Ranker(Grouping verticalId, RankingType_T rankingType) { VerticalId = verticalId; RankingType = rankingType; }