コード例 #1
0
        public int SimilarityMapInitial()
        {
            DateTime lastUpdateTime;

            if (db.NewsSimilarityMaps.Any())
            {
                lastUpdateTime = db.NewsSimilarityMaps.Max(o => o.UpdateTime);
            }
            else
            {
                lastUpdateTime = new DateTime(2000, 1, 1);
            }
            var newAddedNewsIDs = (from o in db.NewsContainer
                                   where o.Time > lastUpdateTime
                                   select o.Id).ToList();
            int currentUpdateCount = 0;

            foreach (var item in newAddedNewsIDs)
            {
                NewsSimilarityMap newMap = new NewsSimilarityMap
                {
                    OwingNewsId = item,
                    UpdateTime  = new DateTime(2000, 1, 1)
                };
                db.NewsSimilarityMaps.Add(newMap);
                currentUpdateCount++;
                if (currentUpdateCount % 1000 == 0)
                {
                    Console.WriteLine("{0}条新闻相似度映射已被添加!", currentUpdateCount);
                }
            }
            return(db.SaveChanges());
        }
コード例 #2
0
        /// <summary>
        /// 统计每条新闻的相似新闻
        /// </summary>
        public int CollectSimilarity(bool isFirstCollected = false)
        {
            IEnumerable <NewsCategory> typeDB = db.NewsCategorys;

            foreach (var type in typeDB)
            {
                IEnumerable <News> typeNewsContainer = db.NewsContainer.Where(o => o.Category == type.Name).ToList();
                if (typeNewsContainer == null)
                {
                    throw new Exception("找不到指定类别的新闻");
                }
                foreach (var item in typeNewsContainer)
                {
                    //LinkedList<NewsSimilarityMap> itemMapContainer = new LinkedList<NewsSimilarityMap>();
                    Dictionary <int, int> updateMapNews = new Dictionary <int, int>();
                    var tmpList = new List <News>(); tmpList.Add(item);
                    IEnumerable <News> exceptedNewsContainer = null;
                    if (isFirstCollected)
                    {
                        exceptedNewsContainer = typeNewsContainer.Except(tmpList);
                    }
                    else
                    {
                        exceptedNewsContainer = typeNewsContainer.Where(o => o.Time > item.Time).Except(tmpList);
                    }
                    foreach (var readyItem in exceptedNewsContainer)
                    {
                        //两新闻间相似度计算策略
                        var newMap = new NewsSimilarityMap();
                        newMap.OwingNewsId = item.Id;
                        int similarity = 0;
                        //如果新闻来源相同,则相似度加2
                        if (IsSimilaritySource(item.Source, readyItem.Source))
                        {
                            similarity = 2;
                        }
                        else
                        {
                            similarity = 0;
                        }
                        similarity += GetKeyWordSimilarity(item.Keywords, readyItem.Keywords);
                        if (similarity != 0)
                        {
                            updateMapNews.Add(readyItem.Id, similarity);
                        }
                    }
                    NewsSimilarityMap map = db.NewsSimilarityMaps.FirstOrDefault(o => o.OwingNewsId == item.Id);
                    if (map == null)
                    {
                        throw new Exception(string.Format("不存在ID为{0}的新闻相似度映射", item.Id));
                    }
                    var oldMapNews     = Serialization.ConvertStrToDic(map.StrRelatedNewsIds, map.StrSimilarities);
                    var currentMapNews = updateMapNews.Union(oldMapNews).OrderByDescending(o => o.Value).Take(similarityNum);
                    //out修饰符无法对属性使用
                    string tmpRelatedNewIds, tmpSimilarities;
                    Serialization.ConvertEnumToStr(currentMapNews, out tmpRelatedNewIds, out tmpSimilarities);
                    map.StrRelatedNewsIds = tmpRelatedNewIds;
                    map.StrSimilarities   = tmpSimilarities;
                    //保存修改
                    map.UpdateTime      = DateTime.Now;
                    db.Entry(map).State = EntityState.Modified;
                }
            }
            return(db.SaveChanges());
        }