Ejemplo n.º 1
0
    public static Task <IReadOnlyCollection <string> > MissingUsers(this YtCollectDbCtx ctx, int?limit = null)
    {
        limit ??= ctx.YtCfg.MaxMissingUsers;
        return(ctx.Db.Query <string>("missing users", @$ "
select distinct author_channel_id
from comment t
join video_latest v on v.video_id = t.video_id
where
 platform = 'YouTube'
 and not exists(select * from user where user_id = author_channel_id)
{limit.Dot(i => $" limit {
            i
        } ")}
"));
    }
Ejemplo n.º 2
0
    public static async Task <ChannelUpdatePlan[]> DiscoverChannelsViaRecs(this YtCollectDbCtx ctx)
    {
        var toAdd = await ctx.Db.Query <(string channel_id, string channel_title, string source)>("channels to classify",
                                                                                                  @"with review_channels as (
  select channel_id
       , channel_title -- probably missing values. reviews without channels don't have titles
  from channel_review r
  where not exists(select * from channel_stage c where c.v:ChannelId::string=r.channel_id)
)
   , rec_channels as (
  select to_channel_id as channel_id, any_value(to_channel_title) as channel_title
  from rec r
  where to_channel_id is not null
    and not exists(select * from channel_stage c where c.v:ChannelId::string=r.to_channel_id)
  group by to_channel_id
)
   , s as (
  select channel_id, channel_title, 'review' as source
  from review_channels sample (:remaining rows)
  union all
  select channel_id, channel_title, 'rec' as source
  from rec_channels sample (:remaining rows)
)
select *
from s
limit :remaining", new { remaining = ctx.YtCfg.DiscoverChannels });

        ctx.Log.Debug("Collect - found {Channels} new channels for discovery", toAdd.Count);

        var toDiscover = toAdd
                         .Select(c => new ChannelUpdatePlan {
            Channel = new() {
                ChannelId    = c.channel_id,
                ChannelTitle = c.channel_title
            },
            ChannelUpdate = c.source == "review" ? ChannelUpdateType.Standard : ChannelUpdateType.Discover
        })
Ejemplo n.º 3
0
        public static Task <IReadOnlyCollection <string> > MissingUsers(this YtCollectDbCtx ctx) =>
        ctx.Db.Query <string>("missing users", @$ "
select distinct author_channel_id
Ejemplo n.º 4
0
        /// <summary>Existing reviewed channels with information on the last updates to extra parts.
        ///   <param name="channelSelect">By default will return channels that meet review criteria. To override, specify a select
        ///     query that returns rows with a column named channel_id</param>
        /// </summary>
        public static async Task <IReadOnlyCollection <ChannelUpdatePlan> > ChannelUpdateStats(this YtCollectDbCtx ctx,
                                                                                               IReadOnlyCollection <string> chans = null, string channelSelect = null)
        {
            channelSelect ??= @$ "
select channel_id from channel_latest  
where platform = 'YouTube' and {(chans.None() ? " meets_review_criteria " : $" channel_id in ({ SqlList(chans) }) ")}";

            var channels = await ctx.Db.Query <(string j, long?daysBack,
                                                DateTime?lastVideoUpdate, DateTime?lastCaptionUpdate, DateTime?lastRecUpdate, DateTime?lastCommentUpdate)>(
                "channels - previous",
                $@"
with channels_raw as (
  select distinct channel_id from ({channelSelect})
  where channel_id is not null
)
, stage_latest as (
  select v
  from channel_stage -- query from stage because it can be deserialized without modification
  where exists(select * from channels_raw r where r.channel_id=v:ChannelId)
    qualify row_number() over (partition by v:ChannelId::string order by v:Updated::timestamp_ntz desc)=1
)
select coalesce(v, object_construct('ChannelId', r.channel_id)) channel_json
     , b.daily_update_days_back
     , (select max(v:Updated::timestamp_ntz) from video_stage where v:ChannelId=r.channel_id) last_video_update
     , (select max(v:Updated::timestamp_ntz) from caption_stage where v:ChannelId=r.channel_id) last_caption_update
     , (select max(v:Updated::timestamp_ntz) from rec_stage where v:FromChannelId=r.channel_id) last_rec_update
      , (select max(v:Updated::timestamp_ntz) from comment_stage where v:ChannelId=r.channel_id) last_comment_update
from channels_raw r
       left join stage_latest on v:ChannelId=r.channel_id
       left join channel_collection_days_back b on b.channel_id=v:ChannelId
");

            return(channels.Select(r => new ChannelUpdatePlan {
                Channel = r.j.ToObject <Channel>(IJsonlStore.JCfg),
                VideosFrom = r.daysBack != null ? DateTime.UtcNow - r.daysBack.Value.Days() : null,
                LastVideoUpdate = r.lastVideoUpdate,
                LastCaptionUpdate = r.lastCaptionUpdate,
                LastRecUpdate = r.lastRecUpdate,
                LastCommentUpdate = r.lastCommentUpdate
            }).ToArray());
        }