public LanguageIdentifier(string distroFile, int cap)
 {
     using (var br = new BinaryReader(new GZipStream(new FileStream(distroFile, FileMode.Open, FileAccess.Read), CompressionMode.Decompress))) {
         this.tokenizer = Tokenization.Tokenizer(br.ReadString());
         this.lo        = br.ReadInt32();
         this.hi        = br.ReadInt32();
         this.tlc       = br.ReadBoolean();
         var numLangs = br.ReadInt32();
         this.langs   = new string[numLangs];
         this.distros = new Dictionary <string, double> [numLangs];
         this.numOccs = new long[numLangs];
         for (int i = 0; i < numLangs; i++)
         {
             var distro = new Dictionary <string, double>();
             this.langs[i] = br.ReadString();
             var numGrams = br.ReadInt64();
             var maxRank  = (cap == -1 || numGrams < cap) ? numGrams : cap;
             var arr      = new GramFreq[maxRank];
             for (int j = 0; j < numGrams; j++)
             {
                 var gram = br.ReadString();
                 var occs = br.ReadInt64();
                 if (j < maxRank)
                 {
                     this.numOccs[i] += occs;
                     arr[j]           = new GramFreq {
                         gram = gram, freq = (double)occs
                     };
                 }
             }
             this.distros[i] = PopulateDistro(arr);
         }
     }
 }
        override protected void FillInSims(LangSim[] res, string s)
        {
            var docFreqs = new Dictionary <string, long>();

            foreach (var tok in this.tokenizer(s, this.tlc, this.lo, this.hi))
            {
                if (!docFreqs.ContainsKey(tok))
                {
                    docFreqs[tok] = 0;
                }
                docFreqs[tok]++;
            }
            var keys = docFreqs.Keys.ToArray();
            var arr  = new GramFreq[keys.Length];

            for (int i = 0; i < keys.Length; i++)
            {
                var token = keys[i];
                arr[i] = new GramFreq {
                    gram = token, freq = docFreqs[token]
                };
            }
            Array.Sort <GramFreq>(arr);
            var docMaxRank = (cap == -1 || arr.Length < cap) ? arr.Length : cap;

            for (int j = 0; j < res.Length; j++)
            {
                res[j].sim = this.rankSim(arr, docMaxRank, this.distros[j], this.maxPos);
            }
        }