private static void SimpleTest(Enums.MediaType type)
        {
            #region 建立一个item
            StringBuilder st = new StringBuilder();
            string[] article = File.ReadAllLines(@"1.txt");
            ItemToDuplication s = new ItemToDuplication(new Item());
            s.ItemID = "source"; s.MediaType = type; s.SpliteTitle = article[0]; s.PubDate = DateTime.Now;
            st.Clear();
            for (int i = 1; i < article.Length; ++i)
                st.Append(article[i]);
            s.SpliteText = st.ToString();
            #endregion

            //将刚才建立的item添加到myDetector中
            myDetector.TestAndTryAdd(s);

            #region 建立一个测试item
            article = File.ReadAllLines(@"2.txt");
            //s = new ItemToDuplication();
            s.ItemID = "test"; s.MediaType = type; s.SpliteTitle = article[0]; s.PubDate = DateTime.Now;
            st.Clear();
            for (int i = 1; i < article.Length; ++i)
                st.Append(article[i]);
            s.SpliteText = st.ToString();
            #endregion

            //测试转载检测
            string id = null;
            id = myDetector.TestAndTryAdd(s);
            if (id != null)
                Console.WriteLine("Copied Item: " + id);
            else Console.WriteLine("Not copied.");
        }
        public string TestAndTryAdd(ItemToDuplication item, double TITLE_WEIGHT = -1, double THRESHOLD = -1)
        {
            if (string.IsNullOrEmpty(item.SpliteText) && string.IsNullOrEmpty(item.SpliteTitle))
                return null;
            string DupItemID = null;
            ReaderWriterLockSlim targetLock = GetLock(item.MediaType);
            try
            {
                targetLock.EnterReadLock();
                Detector curDetector = GetCurDetector(item.MediaType);
                if (TITLE_WEIGHT < 0)
                    TITLE_WEIGHT = curDetector.contextParameters.TITLE_WEIGHT;
                if (THRESHOLD < 0)
                    THRESHOLD = curDetector.contextParameters.THRESHOLD;

                int[] sentenceTitle, sentenceContext, kwordsTitle, kwordsContext;
                curDetector.GetFingerPrints(item, out sentenceTitle, out sentenceContext, out kwordsTitle, out kwordsContext);

                if (!curDetector.IsItemCopied(sentenceTitle, sentenceContext, kwordsTitle, kwordsContext, TITLE_WEIGHT, THRESHOLD, out DupItemID))
                {
                    if ((DateTime.Now - item.PubDate) < DetectPeriod)
                    {
                        targetLock.ExitReadLock();
                        EnterWriteLock(targetLock);
                        curDetector.RegisterArticle(item, sentenceTitle, sentenceContext, kwordsTitle, kwordsContext);
                        ExitWriteLock(targetLock);
                        targetLock.EnterReadLock();
                    }
                }
            }
            catch (Exception e)
            {
                Logger.Error(string.Format("DetectorFacade IsItemCopied Exp:{0}\n{1}", e.Message, e.StackTrace));
            }
            finally
            {
                targetLock.ExitReadLock();
            }
            return DupItemID;
        }
 /// <summary>
 /// 获取与某篇文章相似的内存中的所有文章
 /// </summary>
 /// <param name="item"></param>
 /// <returns></returns>
 public string[] GetSimilarItemIDs(ItemToDuplication item, double TITLE_WEIGHT = -1, double THRESHOLD = -1)
 {
     ReaderWriterLockSlim targetLock = GetLock(item.MediaType);
     string[] result = null;
     try
     {
         targetLock.EnterReadLock();
         Detector curDetector = GetCurDetector(item.MediaType);
         if (TITLE_WEIGHT < 0)
             TITLE_WEIGHT = curDetector.contextParameters.TITLE_WEIGHT;
         if (THRESHOLD < 0)
             THRESHOLD = curDetector.contextParameters.THRESHOLD;
         result = curDetector.GetSimilarItems(item, TITLE_WEIGHT, THRESHOLD);
     }
     catch (Exception e)
     {
         Logger.Error(string.Format("DetectorFacade GetSimilarItemIDs Exp:{0}\n{2}", e.Message, e.StackTrace));
     }
     finally
     {
         targetLock.ExitReadLock();
     }
     return result;
 }
        public void RegisterArticle(ItemToDuplication item, int[] sentenceTitle, int[] sentenceContext, int[] kwordsTitle, int[] kwordsContext)
        {
            if (sentenceTitle == null && sentenceContext == null && kwordsTitle == null && kwordsContext == null) return;

            string id = item.ItemID;
            if (item.DuplicationID != null) id = item.DuplicationID;
            sentenceHolder.RegisterArticleFingerPrint(sentenceTitle, sentenceContext, id, item.PubDate);
            kwordsHolder.RegisterArticleFingerPrint(kwordsTitle, kwordsContext, id, item.PubDate);
        }
        public string[] GetSimilarItems(ItemToDuplication item, double TITLE_WEIGHT, double THRESHOLD)
        {
            if (string.IsNullOrEmpty(item.SpliteText) && string.IsNullOrEmpty(item.SpliteTitle)) return null;

            int[] sentenceTitle, sentenceContext, kwordsTitle, kwordsContext;
            GetFingerPrints(item, out sentenceTitle, out sentenceContext, out kwordsTitle, out kwordsContext);

            string[] listA = sentenceHolder.GetSimilarArticles(sentenceTitle, sentenceContext, TITLE_WEIGHT, THRESHOLD);
            string[] listB = kwordsHolder.GetSimilarArticles(kwordsTitle, kwordsContext, TITLE_WEIGHT, THRESHOLD);
            return MergeArticleNames(listA, listB);
        }
 public void GetFingerPrints(ItemToDuplication item, out int[] sentenceTitle, out int[] sentenceContext, out int[] kwordsTitle, out int[] kwordsContext)
 {
     sentenceTitle = sentenceContext = kwordsTitle = kwordsContext = null;
     Parameters titleParameters = new Parameters(1, 1, contextParameters.TITLE_WEIGHT, contextParameters.THRESHOLD);
     if (string.IsNullOrEmpty(item.SpliteText) && item.SpliteTitle.Length > Parameters.MAX_TITLE_LENGTH)
         titleParameters = contextParameters;
     if (!string.IsNullOrEmpty(item.SpliteTitle))
     {
         sentenceTitle = FingerPrintBuilder.GetSentenceFingerPrint(item.SpliteTitle, titleParameters, true);
         kwordsTitle = FingerPrintBuilder.GetK_WordsFingerPrint(item.SpliteTitle, titleParameters, true);
     }
     if (!string.IsNullOrEmpty(item.SpliteText))
     {
         sentenceContext = FingerPrintBuilder.GetSentenceFingerPrint(item.SpliteText, contextParameters, false);
         kwordsContext = FingerPrintBuilder.GetK_WordsFingerPrint(item.SpliteText, contextParameters, false);
     }
 }
 /// <summary>
 /// 
 /// </summary>
 /// <param name="Date"></param>
 /// <returns></returns>
 //static void LoadDaily_Mongo(DateTime Date, DetectorFacade detector)
 //{
 //    //左闭右开
 //    //QueryConditionList low = Query.GTE("PubDate", Date.Date);
 //    //QueryConditionList high = Query.LT("PubDate", Date.Date.AddDays(1));
 //    //var query = Query.And(low, high);
 //    //这里就已经挂了
 //    //int count = MongoItemAccess.Items.Count(query);
 //    //总count>4000要再分批
 //    //const int MaxSetSize = 4000;
 //    //if (count < MaxSetSize)
 //    //    LoadPeriod_Mongo(Date.Date, Date.Date.AddDays(1), Items);
 //    //else
 //    {
 //        int HourStep = 1; //24 / ((count + MaxSetSize - 1) / MaxSetSize);
 //        int Hour = 0;
 //        while (Hour < 24)
 //        {
 //            LoadPeriod_Mongo(Date.Date.AddHours(Hour), Date.Date.AddHours(Hour + HourStep > 24 ? 24 : Hour + HourStep), detector);
 //            Hour += HourStep;
 //            Console.Write('*');
 //        }
 //    }
 //}
 //static void LoadPeriod_Mongo(DateTime lowTime, DateTime highTime, DetectorFacade detector)
 //{
 //    //左闭右开
 //    QueryConditionList low = Query.GTE("FetchTime", lowTime);
 //    QueryConditionList high = Query.LT("FetchTime", highTime);
 //    var query = Query.And(low, high);
 //    var sort = SortBy.Ascending("FetchTime");
 //    //int count = MongoItemAccess.Items.Count(query);
 //    //const int packsize = 1000;   //每次读取条数
 //    const int sleepMS = 200;   //两次读取的间隔时间
 //    //int skip = 0;
 //    //while (skip < count)
 //        try
 //        {
 //            var result = MongoItemAccess.Items.Find(query)./*SetSortOrder(sort).*/SetFields("ItemID", "PubDate", "DuplicationID", "SpliteTitle", "SpliteText", "ProsdDuplication");
 //                //.Take(packsize).Skip(skip);
 //            foreach (var item in result)
 //                detector.AddItem(new ItemToDuplication(item));
 //            //skip += packsize;
 //        }
 //        catch (Exception e)
 //        {
 //            Logger.Error(string.Format(@"Duplication读取文章失败[{0}-{1}]:{2}", lowTime, highTime, e.Message));
 //            //break;
 //        }
 //        Thread.Sleep(sleepMS);
 //}
 /// <summary>
 /// (对象实例方法)转载判别,基于文本指纹
 /// </summary>
 /// <param name="Item"></param>
 /// <returns></returns>
 string IsDuplication_FingerPrint(ItemToDuplication Item)
 {
     try
     {
         //没有则加入
         string r = FingerDetector.TestAndTryAdd(Item);
         //if (!string.IsNullOrEmpty(r))
         //    FingerDetector.AddItem(Item);
         return r;
     }
     catch (Exception e)
     {
         Logger.Error(string.Format("Dup Err:{0} ItemID:{1}\n{2}", e.Message, Item.ItemID, e.StackTrace));
         return null;
     }
 }