/// <summary>
 /// 构造函数:除Unkonw类型外都使用提供的默认参数
 /// </summary>
 /// <param name="defaultParameters"></param>
 /// <param name="Items"></param>
 public DetectorFacade(Parameters defaultContextParameters)
 {
     detectorLock = new ConcurrentDictionary<Enums.MediaType, ReaderWriterLockSlim>();
     this.defaultContextParameters = defaultContextParameters;
     detectorOFMedia = new ConcurrentDictionary<Enums.MediaType, Detector>();
     DetectPeriod = new TimeSpan(Duplication.nBackwardDays, 0, 0, 0);
 }
 /// <summary>
 /// 将粗粒度哈希后的结果进行筛选提取
 /// </summary>
 /// <param name="hashCodes">粗粒度哈希结果</param>
 /// <returns></returns>
 private static int[] GetSimplifiedFingerPrint(int[] hashCodes, Parameters parameters, bool isTitle)
 {
     int windowSize = parameters.G - parameters.K + 1;
     int size = hashCodes.Length - windowSize + 1;
     if (size <= 0) size = 1;
     int[] result = new int[size];
     Tuple<int, int>[] q = new Tuple<int, int>[windowSize + hashCodes.Length + 10];
     int head = 0, tail = 0;
     q[0] = new Tuple<int, int>(hashCodes[0], 0);
     for (int i = 1; i < windowSize; ++i)
     {
         if (i >= hashCodes.Length) break;
         while (tail >= head && hashCodes[i] <= q[tail].Item1)
             tail--;
         q[++tail] = new Tuple<int, int>(hashCodes[i], i);
     }
     if (!isTitle)
         result[0] = q[0].Item1;//* 999983 + q[0].Item2;
     else result[0] = q[0].Item1;
     for (int i = windowSize; i < hashCodes.Length; ++i)
     {
         while (tail >= head && hashCodes[i] <= q[tail].Item1)
             tail--;
         q[++tail] = new Tuple<int, int>(hashCodes[i], i);
         while (head <= tail && q[head].Item2 + windowSize <= i)
         head++;
         if (!isTitle)
             result[i - windowSize + 1] = q[head].Item1;// * 999983 + q[head].Item2;
         else result[i - windowSize + 1] = q[head].Item1;
     }
     return result;
 }
 /// <summary>
 /// 以句子为最小粒度获得文章指纹
 /// </summary>
 /// <param name="article">待提取指纹的文章</param>
 /// <returns></returns>
 internal static int[] GetSentenceFingerPrint(string content, Parameters parameters, bool isTitle)
 {
     List<List<int>> codes = WordList.GetWordIDs(content);
     HashSet<int> hashSet = new HashSet<int>();
     for (int i = 0; i < codes.Count; ++i)
     {
         if (codes[i].Count < parameters.K) continue;
         int[] hashCodes = GetKarpRabinHashCode(codes[i], codes[i].Count);
         int curFingerPrint = hashCodes[0];
         if (!hashSet.Contains(curFingerPrint)) hashSet.Add(curFingerPrint);
     }
     if (hashSet.Count > 0)
         return hashSet.ToArray();
     else return null;
 }
 /// <summary>
 /// 用k-words算法获取文章的指纹
 /// </summary>
 /// <param name="article">待提取指纹的文章</param>
 /// <returns></returns>
 internal static int[] GetK_WordsFingerPrint(string content, Parameters parameters, bool isTitle)
 {
     List<List<int>> codes = WordList.GetWordIDs(content);
     HashSet<int> hashSet = new HashSet<int>();
     for (int i = 0; i < codes.Count; ++i)
     {
         if (codes[i].Count < parameters.K) continue;
         int[] hashCodes = GetKarpRabinHashCode(codes[i], parameters.K);
         int[] curFingerPrint = GetSimplifiedFingerPrint(hashCodes, parameters, isTitle);
         for (int j = 0; j < curFingerPrint.Length; ++j)
             if (!hashSet.Contains(curFingerPrint[j])) hashSet.Add(curFingerPrint[j]);
     }
     if (hashSet.Count > 0)
         return hashSet.ToArray();
     else return null;
 }
 public void GetFingerPrints(ItemToDuplication item, out int[] sentenceTitle, out int[] sentenceContext, out int[] kwordsTitle, out int[] kwordsContext)
 {
     sentenceTitle = sentenceContext = kwordsTitle = kwordsContext = null;
     Parameters titleParameters = new Parameters(1, 1, contextParameters.TITLE_WEIGHT, contextParameters.THRESHOLD);
     if (string.IsNullOrEmpty(item.SpliteText) && item.SpliteTitle.Length > Parameters.MAX_TITLE_LENGTH)
         titleParameters = contextParameters;
     if (!string.IsNullOrEmpty(item.SpliteTitle))
     {
         sentenceTitle = FingerPrintBuilder.GetSentenceFingerPrint(item.SpliteTitle, titleParameters, true);
         kwordsTitle = FingerPrintBuilder.GetK_WordsFingerPrint(item.SpliteTitle, titleParameters, true);
     }
     if (!string.IsNullOrEmpty(item.SpliteText))
     {
         sentenceContext = FingerPrintBuilder.GetSentenceFingerPrint(item.SpliteText, contextParameters, false);
         kwordsContext = FingerPrintBuilder.GetK_WordsFingerPrint(item.SpliteText, contextParameters, false);
     }
 }
 public Detector(Parameters _contextParameters, string collectionName)
 {
     sentenceHolder = new FingerPrintHolder(collectionName, 0);
     kwordsHolder = new FingerPrintHolder(collectionName, 2);
     contextParameters = _contextParameters;
 }
 private void RegisterMediaType(Enums.MediaType mediaType, Parameters contextParameters)
 {
     if (mediaType == Enums.MediaType.SearchWeibo)
         mediaType = Enums.MediaType.Weibo;
     if (mediaType == Enums.MediaType.SearchForum)
         mediaType = Enums.MediaType.Forum;
     if (mediaType != Enums.MediaType.Weibo && mediaType != Enums.MediaType.Forum)
         mediaType = Enums.MediaType.Unknown;
     if (detectorOFMedia.ContainsKey(mediaType)) return;
     string collectionName;
     switch (mediaType)
     {
         case Enums.MediaType.Forum: collectionName = "FingerPrint_Forum"; break;
         case Enums.MediaType.Weibo: collectionName = "FingerPrint_Weibo"; break;
         default: collectionName = "FingerPrint_Other"; break;
     }
     Detector detector = new Detector(contextParameters, collectionName);
     detectorOFMedia.TryAdd(mediaType, detector);
 }