public override void OnHanlder(DataFlowContext context, Root parseObj) { if (parseObj.data == null) { return; } var upId = int.Parse(parseObj.data.card.mid); // 额,理论上是数字id using var db = DBSet.GetCon(DBSet.SqliteDBName.Bilibili); var up = db.SingleById <UP>(upId); if (up == null) { // 没有up主信息,则新建,否则只是复制数据 up = new UP(); up.Id = upId; db.Insert(up); } var data = parseObj.data.card; up.following = data.fans; up.follower = data.attention; up.friend = data.friend; up.video = parseObj.data.archive_count; up.name = data.name; up.sex = data.sex; up.face = data.face; up.sign = data.sign; up.level = data.level_info.current_level; db.Update(up); }
/// <summary> /// 保存当前的状态 /// </summary> /// <param name="sender"></param> /// <param name="e"></param> private void btnSave_Click(object sender, RoutedEventArgs e) { if (currentShowTag != null) { var allTags = new HashSet <string>(); foreach (var part in currentShowTag.OpenCvParts) { if (part.TagNames == null) { continue; } foreach (var t in part.TagNames.Split(',')) { allTags.Add(t); } } currentShowTag.TagsName = allTags.ToArray().ToCsv(); using var db = DBSet.GetCon(DBSet.SqliteDBName.Bilibili); currentShowTag.Status = "completed"; db.Update(currentShowTag); btnNextImage_Click(null, null); } }
private void Window_Loaded(object sender, RoutedEventArgs e) { // 加载数据 using var db = DBSet.GetCon(DBSet.SqliteDBName.Bilibili); tags = db.Select <ImageTag>(o => o.Status == "completed"); labOutput.Content = $"当前有效tag数量:{tags.Count}"; }
public void OnGet() { using (var db = DBSet.GetCon(DBSet.SqliteDBName.Bilibili)) { VModel.AVCount = db.Count <AV>().ToString(); VModel.UPCount = db.Count <UP>().ToString(); VModel.ImageDetectCount = db.Count <ImageDetect>().ToString(); VModel.DbFaceCount = db.Count <ImageDetect>(o => o.AddToFaceDB).ToString(); } }
private void btnOpenOutput_Click(object sender, RoutedEventArgs e) { using var db = DBSet.GetCon(DBSet.SqliteDBName.Bilibili); var tags = db.Select <ImageTag>(o => o.Status == "completed"); // TagsDataSet.WriteSourceData(tags, Path.Combine(Utils.DefaultDataFolder, "TrainData/bilibili.tags")); TagsDataSet.WriteSourceData2(tags, Path.Combine(Utils.DefaultDataFolder, "TrainData/")); MessageBox.Show("输出完成!"); // new Output().ShowDialog(); }
public static void Do() { var db = DBSet.GetCon(DBSet.SqliteDBName.Bilibili); foreach (var imgTag in db.Select <ImageTag>(o => o.Status == "downfile_finish")) { ImageProcess.OpenCVAnalysis(imgTag); db.Update(imgTag); GC.Collect(); } }
protected override async Task ParseAsync(DataFlowContext context) { object type; if (!context.Request.Properties.TryGetValue("requestType", out type)) { return; } if (type as string != "image") { return; } var image = context.Request.Properties[typeof(ImageTag).Name] as ImageTag; if (image == null) { return; } var imageBytes = context.Response.Content.Bytes; var trueFile = Path.Combine(DefaultImagePath, image.LocalFileName); var fi = new FileInfo(trueFile); if (!fi.Directory.Exists) { fi.Directory.Create(); } File.WriteAllBytes(trueFile, imageBytes); using var db = DBSet.GetCon(DBSet.SqliteDBName.Bilibili); var dbItem = db.Single <ImageTag>(o => o.ImageUrl == image.ImageUrl); if (dbItem == null) { image.Status = "downfile_finish"; OpenCVAnalysis(image); db.Insert(image); } else { dbItem.Status = "downfile_finish"; OpenCVAnalysis(image); db.Update(dbItem); } // 这里做一下opencv的识别,如果识别出来,再改变一下状态 }
public override void OnHanlder(DataFlowContext context, Root parseObj) { if (parseObj.code != 0 && parseObj.data == null) { return; } var avId = (int)context.Request.Properties["avId"]; using var db = DBSet.GetCon(DBSet.SqliteDBName.Bilibili); var up = db.SingleById <AV>(avId); var tags = parseObj.data; up.tagIds = tags.Select(o => o.tag_id).ToCsv(); up.tagNames = tags.Select(o => o.tag_name).ToCsv(); db.Update(up); // 单独处理 tag 标签数据 foreach (var tag in tags) { var dbTag = db.SingleById <Tag>(tag.tag_id); if (dbTag == null) { dbTag = new Tag { Id = tag.tag_id }; db.Insert(dbTag); } dbTag.name = tag.tag_name; dbTag.ctime = tag.ctime; dbTag.cover = tag.cover; dbTag.head_cover = tag.head_cover; dbTag.subscribed_count = tag.subscribed_count; dbTag.archive_count = tag.archive_count; dbTag.featured_count = tag.featured_count; dbTag.alpha = tag.alpha; dbTag.color = tag.color; dbTag.tag_type = tag.tag_type; dbTag.content = tag.content; dbTag.short_content = tag.short_content; dbTag.use = tag.count.use; db.Update(dbTag); } }
public override void OnHanlder(DataFlowContext context, Root parseObj) { if (parseObj.code != 0 && !string.IsNullOrEmpty(parseObj.data)) { return; } var avId = (int)context.Request.Properties["avId"]; using var db = DBSet.GetCon(DBSet.SqliteDBName.Bilibili); var up = db.SingleById <AV>(avId); up.desc = parseObj.data; db.Update(up); }
/// <summary> /// 将upo主添加到人脸数据库里 /// </summary> public static void AddUPToFaceDb2(int count = 100) { var faceDb = new FaceDb2("zhaiwu"); using (var db = DBSet.GetCon(DBSet.SqliteDBName.Bilibili)) { var images = db.Select <ImageDetect>(o => o.face_num == 1 && o.max_quality > 0.5 && o.AddToFaceDB == false); foreach (var item in images) { if (isExit) { break; } //var imagePath = SpiderConfig.GetPath($"imgs/{item.UpId}/{item.AVId}"); //var imageBytes = File.ReadAllBytes(Path.Combine(imagePath, item.LocalFile)); var ret = faceDb.AddUserByURL(item.UpId.ToString(), item.Url); var errorCode = ret["error_code"].ToString(); if (errorCode == "223105" || errorCode == "222210") { item.AddToFaceDB = true; db.Update(item); } if (ret["error_code"].ToString() == "0") { item.AddToFaceDB = true; db.Update(item); Console.WriteLine($"Add:{item.UpId}"); } Console.WriteLine(JsonConvert.SerializeObject(ret, Formatting.Indented)); Thread.Sleep(500); if (count-- < 0) { return; } } } }
private void Load100ImageDatas() { images.Clear(); // 窗口初始化结束 using var db = DBSet.GetCon(DBSet.SqliteDBName.Bilibili); var list = db.Select <ImageTag>(o => o.Status == "opencv_finish").Take(100); foreach (var item in list) { var trueFile = item.GetTrueImageFile(); if (File.Exists(trueFile)) { images.Add(item); } } }
/// <summary> /// 导出图片文件 /// </summary> private static void ExportImageFiles() { using (var db = DBSet.GetCon(DBSet.SqliteDBName.Bilibili)) { var querySQL = @" SELECT ImageTag.* FROM ImageTag, AV WHERE ImageTag.`Status` = 'opencv_finish' AND ImageTag.AvId = AV.Id ORDER BY AV.ctime DESC LIMIT 100 "; var tags = db.Select <ImageTag>(querySQL); Console.WriteLine(tags.Count); var count = ExportImageFiles(@"V:\Data\ExportCalssifiedImage", tags); foreach (var item in tags) { item.Status = "export"; db.Update(item); } Console.WriteLine(count); } }
public override void OnHanlder(DataFlowContext context, BilibiliListRet parseObj) { // Console.WriteLine(Newtonsoft.Json.JsonConvert.SerializeObject(parseObj)); if (parseObj == null) { return; } if (parseObj.data.archives.Length == 0) { return; } using var db = DBSet.GetCon(DBSet.SqliteDBName.Bilibili); int newCount = 0; foreach (var item in parseObj.data.archives) { var av = new AV() { Id = item.aid, bvId = item.bvid, copyright = item.copyright, ctime = item.ctime, ctime2 = item.ctime.UnixToDateTime().ToLongDateString(), pic = item.pic, title = item.title, videos = item.videos, view = item.stat.view, rid = item.tid, UpId = item.owner.mid, cid = item.cid, }; av.stat = item.stat; var existsDB = db.SingleById <AV>(av.Id); if (existsDB != null) { // 之前有过这个视频,则忽略这次操作 Console.WriteLine($"exits {existsDB.title}"); continue; } db.Insert(av); newCount++; // 这里可以获得up的一些简单的信息 https://api.bilibili.com/x/web-interface/card?mid=3630684&photo=1 // 这里不判断了,只要发新视频,都更新一次up主信息 var request = UpProcess.CreateRquerst(av.UpId); context.AddFollowRequests(request); // 爬取 tag 信息,这个信息可能需要不断的更新才行,但也仅限于视频更新1个月以内吧 https://api.bilibili.com/x/web-interface/view/detail/tag?aid=286927170 request = TagProcess.CreateRquerst(av.Id); context.AddFollowRequests(request); // todo 如果视频有多个,还得获得视频下面分视频的数据 https://api.bilibili.com/x/player/pagelist?bvid=BV1wf4y1X7ka // 获得视频的简介 https://api.bilibili.com/x/web-interface/archive/desc?aid=286927170 request = DescProcess.CreateRquerst(av.Id); context.AddFollowRequests(request); // todo 定时更新获得视频的状态 https://api.bilibili.com/x/web-interface/archive/stat?aid=286927170 // todo 读取评论信息 https://api.bilibili.com/x/v2/reply?pn=2&type=1&oid=286927170&sort=0 // 评论的回复翻页内容 https://api.bilibili.com/x/v2/reply/reply?&pn=2&type=1&oid=244913305&ps=10&root=3602777175 // todo 抓取弹幕信息 http://comment.bilibili.com/245666614.xml 这里用的是cid // 爬取封面照片 request = ImageProcess.CreateRequest(av); context.AddFollowRequests(request); } if (newCount > -1) { notfindCount++; } else { notfindCount = 0; } if (notfindCount < 20) { Console.WriteLine("getNextPage"); var page = (int)context.Request.Properties["pageNo"]; var tid = (int)context.Request.Properties["rid"]; // if (page < 2) { var request = CreateListRequest(tid, page + 1); context.AddFollowRequests(request); } } //else //{ // Console.WriteLine("finish"); //} }
public void OnPost(List <IFormFile> files, string imageUrl) { long size = 0; this.ViewData["error"] = -1; if (files != null && files.Count > 0) { var file = files[0]; var stream = file.OpenReadStream(); size = stream.Length; var bytes = new byte[size]; stream.Read(bytes, 0, (int)size); var ret = faceDb.SereachUserByImage(bytes); if ((int)ret["error_code"] != 0) { this.ViewData["error"] = -1; // ErrorMessage = JsonConvert.SerializeObject(ret, Formatting.Indented); ErrorMessage = ret["error_msg"].ToString(); return; } // 能找到对应的人脸 ErrorMessage = JsonConvert.SerializeObject(ret, Formatting.Indented); } else if (!string.IsNullOrEmpty(imageUrl)) { var ret = faceDb.SereachUserByUrl(imageUrl); if ((int)ret["error_code"] != 0) { this.ViewData["error"] = -1; // ErrorMessage = JsonConvert.SerializeObject(ret, Formatting.Indented); ErrorMessage = ret["error_msg"].ToString(); return; } // ErrorMessage = JsonConvert.SerializeObject(ret, Formatting.Indented); List <UPModel> ups = new List <UPModel>(); var arr = ret["result"]["user_list"] as JArray; if (arr != null && arr.Count > 0) { using (var db = DBSet.GetCon(DBSet.SqliteDBName.Bilibili)) { foreach (var item in arr) { var id = int.Parse(item["user_id"].ToString()); var dbItem = db.SingleById <UP>(id); if (dbItem != null) { ups.Add(new UPModel { UPId = id.ToString(), Name = dbItem.name, FaceUrl = dbItem.face, Rate = ((double)item["score"]).ToString("f2") }); } } } } FindUP = ups.ToArray(); } else { ErrorMessage = "上传图片或者url,你总得设置一个吧"; } }
private void UpdateIamgeTagStatus(ImageTag item, string status) { using var db = DBSet.GetCon(DBSet.SqliteDBName.Bilibili); item.Status = status; db.Update(currentShowTag); }
void TreadTrain() { this.Dispatcher.Invoke(() => { labStatus.Content = "保存训练数据中..."; }); using var db = DBSet.GetCon(DBSet.SqliteDBName.Bilibili); var tags = db.Select <ImageTag>(o => o.Status == "completed"); var folder = Path.Combine(Utils.DefaultDataFolder, "TrainData/"); var saveResult = TagsDataSet.WriteSourceData2(tags, folder); this.Dispatcher.Invoke(() => { labStatus.Content = "开始加载训练数据"; }); var dataSet = TagsDataSet.ReadDataSets(folder); var model = TFModels.GetBilibiliModelV1(dataSet.Width, dataSet.Height, dataSet.LabNames.Length); var weightsFileName = Path.Combine(folder, "bilibili.h5"); //if (File.Exists(weightsFileName)) //{ // model.load_weights(weightsFileName); //} isRuning = true; var epochs = 0; var epochsStep = 5; this.Dispatcher.Invoke(() => { labStatus.Content = "开始训练"; lab_pic.Content = $"训练用图片:{saveResult.PicNum}"; lab_label.Content = string.Join("\n", saveResult.LabelCount.OrderByDescending(o => o.Value).Select(o => $"{o.Key}({o.Value}) ")); }); DateTime start = DateTime.Now; while (isRuning) { model.fit(dataSet.Images, dataSet.Lables, epochs: epochsStep); var metrices = model.metrics.ToArray(); var loss = (float)metrices[0].result(); var accuracy = (float)metrices[1].result(); epochs += epochsStep; this.Dispatcher.Invoke(() => { labStatus.Content = $"训练中: {epochs} 轮 用时:{(DateTime.Now - start)}"; labLoss.Content = $"损失率: {loss * 100}"; labAccuracy.Content = $"准确率: {accuracy * 100}"; pb.Value = accuracy * 100; }); if (accuracy > 0.95 && loss < 0.1) { break; } } model.save_weights(weightsFileName); TagsDataSet.ChangeModel(model, dataSet); isSaveWeights = true; this.Dispatcher.Invoke(() => { btnQuit.Content = "退出"; }); }
public static bool GetZhaiWuNewList(int page = 0, int rid = 20) { var ret = new Newlist().Download(page, 50, rid); bool hasNew = false; using (var historyCon = DBSet.GetCon(DBSet.SqliteDBName.History)) { using (var con = DBSet.GetCon(DBSet.SqliteDBName.Bilibili)) { foreach (var item in ret.archives) { var dbItem = con.SingleById <AV>(item.aid); if (dbItem == null) { dbItem = new AV() { Id = item.aid, UpId = item.owner.mid, rid = rid, }; Console.WriteLine(item.title); UpdateAVData(dbItem, item); con.Insert(dbItem); // 更新封面照片 //var imagePath = SpiderConfig.GetPath($"imgs/{dbItem.UpId}/{dbItem.Id}"); //var pic = new Uri(item.pic).AbsolutePath.Replace("/","_"); //var imageFile = Path.Combine(imagePath, pic); //if (!File.Exists(imageFile)) //{ // Console.WriteLine(imageFile); // var bytes = new WebClient().DownloadData(item.pic); // File.WriteAllBytes(imageFile, bytes); //} hasNew = true; } else { UpdateAVData(dbItem, item); con.Update(dbItem); } var upItem = con.SingleById <UP>(item.owner.mid); if (upItem == null) { var upspider = new UpSpider(); int mid = item.owner.mid; Console.WriteLine($"get up {mid}"); var info = upspider.GetUpInfo(mid); var face = upspider.GetFaceStat(mid); var upstat = upspider.GetUpStat(mid); upItem = new UP { Id = mid, follower = face.follower, following = face.following, face = info.face, jointime = info.jointime, level = info.jointime, name = info.name, rank = info.rank, sex = info.sex, sign = info.sign, views = upstat.archive.view, }; con.Insert(upItem); } var dbHistoryItem = historyCon.SingleById <AVHistory>(item.aid); if (dbHistoryItem == null) { dbHistoryItem = new AVHistory { Id = item.aid, History = new List <ArchivesItem> { item } }; historyCon.Insert(dbHistoryItem); } else { dbHistoryItem.History.Add(item); historyCon.Update(dbHistoryItem); } } } } return(hasNew); }
/// <summary> /// 将视频封面照,拿去百度检查 /// </summary> private static void DetectFace2(int maxGetCount = 60 * 2 * 30) // 30分钟的数据) { var baiduai = new FaceDetect(); int i = 0; DateTime nextCallTime = DateTime.Now; using (var db = DBSet.GetCon(DBSet.SqliteDBName.Bilibili)) { foreach (var up in db.Select <UP>(o => o.follower > 3000).OrderByDescending(o => o.follower).ToArray()) { foreach (var av in db.Select <AV>(o => o.UpId == up.Id)) { if (isExit) { break; } var pic = new Uri(av.pic).AbsolutePath.Replace("/", "_"); // 只按照本地文件名做验证 var detect = db.Single <ImageDetect>(o => o.LocalFile == pic); if (detect == null) { byte[] bytes = null; if (FromWeb) { try { bytes = new WebClient().DownloadData(av.pic); } catch (Exception e) { Console.WriteLine(e); } } else { // 封面照落地,根据目前采集到数据,如果将宅舞区的封面照落地的话,估计要100多G // 再加上三次元区,估计服务器硬盘干不动 var imagePath = SpiderConfig.GetPath($"imgs/{av.UpId}/{av.Id}"); var imageFile = Path.Combine(imagePath, pic); if (!File.Exists(imageFile)) { continue; } bytes = File.ReadAllBytes(imageFile); } if (bytes == null) { continue; } var wait = (int)(nextCallTime - DateTime.Now).TotalMilliseconds + 1; if (wait > 0) { Console.WriteLine($"wait {wait}"); Thread.Sleep(wait); } var start = DateTime.Now; var ret = baiduai.DetectFromBytes(bytes); Console.Write($"useTime:{ (DateTime.Now - start).TotalMilliseconds} ms "); nextCallTime = DateTime.Now.AddMilliseconds(500); if (ret != null) { var dbItem = new ImageDetect { AVId = av.Id, UpId = av.UpId, LocalFile = pic, Url = av.pic, Detect = ret.result, }; if (ret.error_code == 0) { dbItem.face_num = ret.result.face_num; if (ret.result.face_num > 0) { dbItem.max_face_probability = ret.result.face_list.Max(o => o.face_probability); dbItem.max_quality = ret.result.face_list.Max(o => GetQuality(o)); } } db.Insert(dbItem); Console.WriteLine(av.title); if (maxGetCount-- < 0) { return; } // Thread.Sleep(500); // 百度的免费接口只有 2 qps,所以在这里做一下延迟。 } } else { // Console.WriteLine("忽略 " + av.title); } } } } }