Пример #1
0
    public void TestCount()
    {
        MultiSet<string> empty = new MultiSet<string>();
        Assert.AreEqual<int>(0, empty.Count);

        empty.Add("foo");
        empty.Add("bar");
        Assert.AreEqual<int>(2, empty.Count);

        empty.Add("foo");
        empty.Add("foo");
        Assert.AreEqual<int>(4, empty.Count);
    }
Пример #2
0
    public Potion(Aspects.Primary primary1, Aspects.Primary primary2, Aspects.Primary primary3,
                  MultiSet<Aspects.Secondary> secondaries, PotionSlot slot, Aspects.Secondary type, Effect effect)
    {
        primaries = new MultiSet<Aspects.Primary>();
        primaries.Add(primary1);
        primaries.Add(primary2);
        primaries.Add(primary3);

        this.secondaries = new MultiSet<Aspects.Secondary>(secondaries);

        _slot = slot;
        _type = type;
        _effect = effect;
    }
Пример #3
0
    public void add(Aspects.Primary primary1, Aspects.Primary primary2, Aspects.Primary primary3, MultiSet<Aspects.Secondary> secondaries, PotionSlot slot, Aspects.Secondary type, Effect effect)
    {
        Potion potion = new Potion(primary1, primary2, primary3, secondaries, slot, type, effect);
        MultiSet<Aspects.Primary> primaries = new MultiSet<Aspects.Primary>();
        primaries.Add(primary1);
        primaries.Add(primary2);
        primaries.Add(primary3);

        if (!potions.ContainsKey(primaries)) {
            potions.Add(primaries, new List<Potion>());
        }

        potions[primaries].Add(potion);
    }
Пример #4
0
 public static string GetCharRange(string text)
 {
     MultiSet<string> counter = new MultiSet<string>();
     foreach (int ch in text)
     {
         if (ch >= mCharRanges.Count) { counter.Add("Other"); continue; }
         counter.Add(mCharRanges[ch]);
     }
     int max = 0;
     string charRange = "Other";
     foreach (KeyValuePair<string, int> item in counter)
     {
         if (item.Value > max) { max = item.Value; charRange = item.Key; }
     }
     return charRange;
 }
Пример #5
0
    public Potion createPotion(IngredientType ingredient1, IngredientType ingredient2, IngredientType ingredient3)
    {
        IngredientData data1 = Ingredients.instance().getIngredient(ingredient1);
        IngredientData data2 = Ingredients.instance().getIngredient(ingredient2);
        IngredientData data3 = Ingredients.instance().getIngredient(ingredient3);

        MultiSet<Aspects.Secondary> secondaries = new MultiSet<Aspects.Secondary>();
        secondaries.Add(data1.secondary);
        secondaries.Add(data2.secondary);
        secondaries.Add(data3.secondary);

        Potion createdPotion = getBestMatch(data1.primary, data2.primary, data3.primary, secondaries);
        logPotionCreation(createdPotion, ingredient1, ingredient2, ingredient3);

        if (!hasDoneAutoPause && createdPotion != defaultPotion) {
            hasDoneAutoPause = true;
            GameObject.FindObjectOfType<PauseMenuController>().pause();
        }
        return createdPotion;
    }
Пример #6
0
        public static void GetVocabularyRichness(Text text, out double ttr, out double hl, out double honore, out double brunet, bool lemmas)
        {
            // type-token ratio (TTR)
            MultiSet <string> tokens = new MultiSet <string>();
            int n = 0;

            foreach (Sentence sentence in text.mSentences)
            {
                foreach (Token token in sentence.mTokens)
                {
                    if (!token.mIsPunctuation)
                    {
                        if (lemmas)
                        {
                            tokens.Add(token.mLemma.ToLower());
                        }
                        else
                        {
                            tokens.Add(token.mTokenStr.ToLower());
                        }
                        n++;
                    }
                }
            }
            int v = tokens.CountUnique;

            ttr = (double)v / (double)n;
            // hapax legomena
            int v1 = tokens.ToList().Count(x => x.Key == 1);

            hl = (double)v1 / (double)n;
            // Honore's statistic: R = 100 x log(N) / (1 - V1 / V)
            honore = 100.0 * Math.Log(n) / (1.0 - (double)v1 / (double)v);
            // Brunet's index: W = N^(V^-0.165)
            brunet = Math.Pow(n, Math.Pow(v, -0.165));
        }
Пример #7
0
        public void Remove()
        {
            IMultiSet <string> multiSet = new MultiSet <string>();

            multiSet.Add("itemToDecrease", 3);
            multiSet.Add("itemToDelete", 3);
            multiSet.Add("itemToDelete_negative", 3);
            multiSet.Add("itemNotChanged", 3);

            Assert.AreEqual(3, multiSet.Remove("itemToDecrease", 1));
            Assert.IsTrue(multiSet.Remove("itemToDecrease"));
            Assert.AreEqual(3, multiSet.Remove("itemToDelete", 3));
            Assert.IsFalse(multiSet.Remove("itemToDelete"));
            Assert.AreEqual(3, multiSet.Remove("itemToDelete_negative", 4));
            Assert.AreEqual(3, multiSet.Remove("itemNotChanged", 0));
            Assert.AreEqual(0, multiSet.Remove("itemNotExist", 1));
            Assert.IsFalse(multiSet.Remove("itemNotExist"));

            Assert.AreEqual(1, multiSet.Count(i => i == "itemToDecrease"));
            Assert.AreEqual(3, multiSet.Count(i => i == "itemNotChanged"));
            Assert.IsFalse(multiSet.Any(i => i == "itemToDelete"));
            Assert.IsFalse(multiSet.Any(i => i == "itemToDelete_negative"));
            Assert.IsFalse(multiSet.Any(i => i == "itemNotExist"));
        }
Пример #8
0
        public void SetItemCount_ExpectedCountMisMatch_DoesNotUpdate()
        {
            IMultiSet <string> multiSet = new MultiSet <string>();

            multiSet.Add("item", 3);

            Assert.IsFalse(multiSet.SetItemCount("item", 4, 5));
            Assert.IsFalse(multiSet.SetItemCount("item", 0, 5));

            Assert.IsFalse(multiSet.SetItemCount("itemNotExist", 1, 5));
            Assert.IsFalse(multiSet.SetItemCount("itemNotExist", 5, 5));

            Assert.AreEqual(3, multiSet.Count(i => i == "item"));
            Assert.IsFalse(multiSet.Any(i => i == "itemNotExist"));
        }
Пример #9
0
        public static string GetCharRange(string text)
        {
            MultiSet <string> counter = new MultiSet <string>();

            foreach (int ch in text)
            {
                if (ch >= mCharRanges.Count)
                {
                    counter.Add("Other"); continue;
                }
                counter.Add(mCharRanges[ch]);
            }
            int    max       = 0;
            string charRange = "Other";

            foreach (KeyValuePair <string, int> item in counter)
            {
                if (item.Value > max)
                {
                    max = item.Value; charRange = item.Key;
                }
            }
            return(charRange);
        }
Пример #10
0
        public void Train(ILabeledExampleCollection <LblT, ExT> dataset)
        {
            Utils.ThrowException(dataset == null ? new ArgumentNullException("dataset") : null);
            Utils.ThrowException(dataset.Count == 0 ? new ArgumentValueException("dataset") : null);
            MultiSet <LblT> counter = new MultiSet <LblT>(mLblCmp);

            foreach (LabeledExample <LblT, ExT> lblEx in dataset)
            {
                counter.Add(lblEx.Label);
            }
            mPrediction = new Prediction <LblT>();
            foreach (KeyValuePair <LblT, int> keyVal in counter)
            {
                mPrediction.Inner.Add(new KeyDat <double, LblT>((double)keyVal.Value / (double)dataset.Count, keyVal.Key));
            }
            mPrediction.Inner.Sort(DescSort <KeyDat <double, LblT> > .Instance);
        }
Пример #11
0
        public void Train(ILabeledExampleCollection <LblT, SparseVector <double> > dataset)
        {
            Utils.ThrowException(dataset == null ? new ArgumentNullException("dataset") : null);
            Utils.ThrowException(dataset.Count == 0 ? new ArgumentValueException("dataset") : null);
            Dispose();
            int[] trainSet = new int[dataset.Count];
            int[] labels   = new int[dataset.Count];
            Dictionary <LblT, int> lblToIdx = new Dictionary <LblT, int>(mLblCmp);
            MultiSet <int>         lblCount = new MultiSet <int>();
            int j = 0;

            foreach (LabeledExample <LblT, SparseVector <double> > lblEx in dataset)
            {
                SparseVector <double> vec = lblEx.Example;
                int[]   idx = new int[vec.Count];
                float[] val = new float[vec.Count];
                for (int i = 0; i < vec.Count; i++)
                {
                    idx[i] = vec.InnerIdx[i] + 1;
                    val[i] = (float)vec.InnerDat[i]; // *** cast to float
                }
                int lbl;
                if (!lblToIdx.TryGetValue(lblEx.Label, out lbl))
                {
                    lblToIdx.Add(lblEx.Label, lbl = lblToIdx.Count);
                    mIdxToLbl.Add(lblEx.Label);
                }
                Utils.ThrowException(lbl == 2 ? new ArgumentValueException("dataset") : null);
                trainSet[j++] = SvmLightLib.NewFeatureVector(idx.Length, idx, val, lbl == 0 ? 1 : -1);
                lblCount.Add(lbl == 0 ? 1 : -1);
            }
            string costFactor = "";

            if (mBiasedCostFunction)
            {
                costFactor = "-j " + ((double)lblCount.GetCount(-1) / (double)lblCount.GetCount(1));
            }
            mModelId = SvmLightLib.TrainModel(string.Format(CultureInfo.InvariantCulture, "-v {0} -c {1} -t {2} -g {3} -d {4} -s {5} -r {6} -b {7} -e {8} -# {9} {10} {11}",
                                                            (int)mVerbosityLevel, mC, (int)mKernelType, mKernelParamGamma, mKernelParamD, mKernelParamS, mKernelParamC, mBiasedHyperplane ? 1 : 0,
                                                            mEps, mMaxIter, mCustomParams, costFactor), trainSet.Length, trainSet);
            // delete training vectors
            foreach (int vecIdx in trainSet)
            {
                SvmLightLib.DeleteFeatureVector(vecIdx);
            }
        }
        public void AddSBTest()
        {
            StringBuilder        sb   = new StringBuilder("aaa");
            StringBuilder        sb1  = new StringBuilder("bbb");
            StringBuilder        sb2  = new StringBuilder("ccc");
            List <StringBuilder> list = new List <StringBuilder>()
            {
                sb, sb1, sb2
            };
            MultiSet <StringBuilder> ms = new MultiSet <StringBuilder>();

            foreach (var s in list)
            {
                ms.Add(s);
            }
            string output = "aaa, bbb, ccc";

            Assert.AreEqual(output, ms.ToString());
            Assert.AreEqual(3, ms.Count);
        }
Пример #13
0
    private Potion getBestMatch(Aspects.Primary primary1, Aspects.Primary primary2, Aspects.Primary primary3, MultiSet<Aspects.Secondary> secondaries)
    {
        MultiSet<Aspects.Primary> primaries = new MultiSet<Aspects.Primary>();
        primaries.Add(primary1);
        primaries.Add(primary2);
        primaries.Add(primary3);

        if (!potions.ContainsKey(primaries)) {
            return defaultPotion;
        }

        List<Potion> primaryMatches = potions[primaries];

        Potion bestMatch = null;
        foreach (Potion primaryMatch in primaryMatches) {
            bool match = (primaryMatch.getSecondaries().Except(secondaries).Count == 0);

            if (match && (bestMatch == null || bestMatch.getSecondaries().Count() < primaryMatch.getSecondaries().Count())) {
                bestMatch = primaryMatch;
            }
        }

        return bestMatch;
    }
Пример #14
0
        private void PrecomputeProbabilities(ILabeledExampleCollection <LblT, BinaryVector> dataset)
        {
            mFeaturePriors = new Dictionary <int, double>();
            ArrayList <LblT>       tmp      = new ArrayList <LblT>();
            Dictionary <LblT, int> lblToIdx = new Dictionary <LblT, int>(mLblCmp);

            foreach (LabeledExample <LblT, BinaryVector> labeledExample in dataset)
            {
                if (!lblToIdx.ContainsKey(labeledExample.Label))
                {
                    lblToIdx.Add(labeledExample.Label, lblToIdx.Count);
                    tmp.Add(labeledExample.Label);
                }
            }
            // prepare counters
            mExampleCount = new int[tmp.Count];
            mFeatureProb  = new Dictionary <int, double> [tmp.Count];
            for (int j = 0; j < mFeatureProb.Length; j++)
            {
                mFeatureProb[j] = new Dictionary <int, double>();
            }
            MultiSet <int> featureCounter = new MultiSet <int>();
            // count features
            int i = 0;

            foreach (LabeledExample <LblT, BinaryVector> labeledExample in dataset)
            {
                mLogger.ProgressFast(Logger.Level.Info, /*sender=*/ this, "PrecomputeProbabilities", "Processing example {0} / {1}", ++i, dataset.Count);
                int lblIdx = lblToIdx[labeledExample.Label];
                mExampleCount[lblIdx]++;
                double val;
                foreach (int idx in labeledExample.Example)
                {
                    featureCounter.Add(idx);
                    if (mFeatureProb[lblIdx].TryGetValue(idx, out val))
                    {
                        mFeatureProb[lblIdx][idx] = val + 1;
                    }
                    else
                    {
                        mFeatureProb[lblIdx].Add(idx, 1);
                    }
                }
            }
            // estimate probabilities
            i = 0;
            foreach (Dictionary <int, double> probVec in mFeatureProb)
            {
                foreach (int featIdx in new ArrayList <int>(probVec.Keys))
                {
                    double p0 = ((double)featureCounter.GetCount(featIdx) + 1.0) / ((double)dataset.Count + 2.0); // rule of succession (feature prior)
                    double p  = (probVec[featIdx] + 2.0 * p0) / ((double)mExampleCount[i] + 2.0);                 // m-estimate (m = 2)
                    probVec[featIdx] = p;
                    if (!mFeaturePriors.ContainsKey(featIdx))
                    {
                        mFeaturePriors.Add(featIdx, p0);
                    }
                }
                i++;
            }
            mIdxToLbl = tmp.ToArray();
        }
Пример #15
0
 public void AddSegment(Vector2 _left, Vector2 _right)
 {
     m_segements.Add(_left, _right);
 }
Пример #16
0
    public void TestEquals()
    {
        MultiSet<String> requirements = new MultiSet<string>();
        requirements.Add("DAIRY");
        requirements.Add("PLANT");
        requirements.Add("PLANT");

        MultiSet<String> available = new MultiSet<string>();
        available.Add("PLANT");
        available.Add("PLANT");
        available.Add("DAIRY");

        Assert.AreEqual(requirements, available);
        Assert.AreEqual<int>(requirements.GetHashCode(), available.GetHashCode());
    }
Пример #17
0
        static void Main(string[] args)
        {
            Console.WriteLine(TimePoint.FromTicks(16087186800000000).ToString());

            return;


#if true
            var h = new HashSet <TimePoint>();
            var d = new Dictionary <TimePoint, int>();
            h.Add(new TimePoint(2));
            h.Add(new TimePoint(2));

            d.Add(new TimePoint(302342342341), 2);
            d.Add(new TimePoint(302342342340), 3);

            foreach (var i in d)
            {
                Console.WriteLine(i.Key.ToString() + i.Value);
            }
#elif true // Json
            var jsonObjectCollection = (JsonDataObject)JsonParser.Parse("{\"a\" : FalSe}");

            Console.WriteLine(jsonObjectCollection["a"].GetBool());

            //foreach (var i in jsonObjectCollection)
            //{
            //    var a = (JsonDataArray)i;
            //    foreach (var jj in a)
            //    {
            //        var n = (JsonDataNumber)jj;
            //        var Number = n.GetUInt32();
            //        Console.WriteLine(Number);
            //    }
            //}
            return;

            string JsonStr = @"

[3],
[4]

";

            var j = JsonParser.Parse(JsonStr);
            Console.WriteLine(j.ToString());

//            string JsonStr = @"{
//    """" : """",
//  ""boolName"" : true,
//  ""boolName2"" : True,
//  ""boolName3"" : false,
//  ""boolName4"" : False,
//    ""name"" : ""string: ,@'  t value"",
//    ""name2"":12.34 ,
//    ""name3"" :5667,
//    ""objname"" :{},
//    ""array"": [123,2234,""ok"",false,true,{},[]]
//}";

//            var j = JsonParser.Parse(JsonStr);

            // 중첩 컨테이너 가능할 때까지 아래 테스트 불가.
            //var pa = new SPoint[2];
            //for (int i = 0; i < pa.Length; ++i)
            //    pa[i] = new SPoint(1, 2);
            //j.Push("PointArray", pa);

            //var sa = new string[2];
            //for (int i = 0; i < sa.Length; ++i)
            //    sa[i] = i.ToString();
            //j.Push("StringArray", sa);

            //var sl = new List<string>();
            //for (int i = 0; i < 2; ++i)
            //    sl.Add(i.ToString());
            //j.Push("StringList", sl);

            //var hs = new HashSet<string>();
            //for (int i = 0; i < 2; ++i)
            //    hs.Add(i.ToString());
            //j.Push("StringHashSet", hs);

            //var mss = new MultiSet<string>();
            //for (int i = 0; i < 2; ++i)
            //    mss.Add(i.ToString());
            //j.Push("StringMultiSet", mss);

            //var pd = new Dictionary<string, SPoint>();
            //for (int i = 0; i < 2; ++i)
            //    pd.Add(i.ToString(), new SPoint(1, 1));
            //j.Push("PointDictionary", pd);

            //var pmm = new CMultiMap<string, SPoint>();
            //for (int i = 0; i < 2; ++i)
            //    pmm.Add(i.ToString(), new SPoint(i, i));
            //j.Push("PointMultiMap", pmm);

            //var l = new List<int>();
            //l.Add(1000);
            //l.Add(2000);
            //j.Push("ListList", l);

            //var dd = new Dictionary<int, int>();
            //dd.Add(1000, 1000);
            //dd.Add(2000, 2000);
            //var dhs = new HashSet<Dictionary<int, int>>();
            //for (int i = 0; i < 2; ++i)
            //    dhs.Add(dd);
            //j.Push("DictionaryHashSet", dhs);

//            Console.WriteLine(j.ToString());
#elif false // MultiMap
            int v;

            Console.WriteLine("MultiSet");
            var ms = new MultiSet <int>();
            ms.Add(3);
            ms.Add(3);
            ms.Add(3);
            ms.Add(2);
            ms.Add(4);
            Console.WriteLine("Count : " + ms.Count);
            ms.RemoveLast();
            ms.RemoveLast();
            ms.RemoveLast();
            ms.RemoveLast();

            Console.WriteLine("All Datas");
            foreach (var i in ms)
            {
                Console.WriteLine(i);
            }

            Console.WriteLine("MultiMap");
            var mm = new MultiMap <int, int>();
            mm.Add(3, 4);
            mm.Add(3, 5);
            mm.Add(3, 6);
            mm.Add(2, 2);

            Console.WriteLine("All Datas : " + mm.Count);
            foreach (var i in mm)
            {
                Console.WriteLine(i);
            }

            Console.WriteLine("ToArray");
            var a = mm.ToArray(3);
            foreach (var i in a)
            {
                Console.WriteLine(i);
            }

            var it = mm.First();
            Console.WriteLine("First");
            Console.WriteLine(it);
            mm.RemoveFirst();
            Console.WriteLine("First Removed Count : " + mm.Count);
            foreach (var i in mm)
            {
                Console.WriteLine(i);
            }
#elif true // Resize
            var l = new List <int>();
            l.Resize(4);
#endif
        }
Пример #18
0
    public void TestExcept()
    {
        MultiSet<String> requirements = new MultiSet<string>();
        requirements.Add("DAIRY");
        requirements.Add("PLANT");
        requirements.Add("PLANT");

        MultiSet<String> available = new MultiSet<string>();
        available.Add("PLANT");
        available.Add("PLANT");
        available.Add("DAIRY");

        Assert.AreEqual(requirements.Except(available), new MultiSet<string>());
        Assert.AreEqual(available.Except(requirements), new MultiSet<String>());
    }
Пример #19
0
        private static void EncodeInternal(Stream input, Stream output, bool xor, long inputLength)
        {
            var rleSource = new List <NibbleRun>();
            var counts    = new SortedList <NibbleRun, long>();

            using (IEnumerator <byte> unpacked = Unpacked(input))
            {
                // Build RLE nibble runs, RLE-encoding the nibble runs as we go along.
                // Maximum run length is 8, meaning 7 repetitions.
                if (unpacked.MoveNext())
                {
                    NibbleRun current = new NibbleRun(unpacked.Current, 0);
                    while (unpacked.MoveNext())
                    {
                        NibbleRun next = new NibbleRun(unpacked.Current, 0);
                        if (next.Nibble != current.Nibble || current.Count >= 7)
                        {
                            rleSource.Add(current);
                            long count;
                            counts.TryGetValue(current, out count);
                            counts[current] = count + 1;
                            current         = next;
                        }
                        else
                        {
                            ++current.Count;
                        }
                    }
                }
            }

            // We will use the Package-merge algorithm to build the optimal length-limited
            // Huffman code for the current file. To do this, we must map the current
            // problem onto the Coin Collector's problem.
            // Build the basic coin collection.
            var qt = new List <EncodingCodeTreeNode>();

            foreach (var kvp in counts)
            {
                // No point in including anything with weight less than 2, as they
                // would actually increase compressed file size if used.
                if (kvp.Value > 1)
                {
                    qt.Add(new EncodingCodeTreeNode(kvp.Key, kvp.Value));
                }
            }

            qt.Sort();

            // The base coin collection for the length-limited Huffman coding has
            // one coin list per character in length of the limmitation. Each coin list
            // has a constant "face value", and each coin in a list has its own
            // "numismatic value". The "face value" is unimportant in the way the code
            // is structured below; the "numismatic value" of each coin is the number
            // of times the underlying nibble run appears in the source file.

            // This will hold the Huffman code map.
            // NOTE: while the codes that will be written in the header will not be
            // longer than 8 bits, it is possible that a supplementary code map will
            // add "fake" codes that are longer than 8 bits.
            var codeMap = new SortedList <NibbleRun, KeyValuePair <long, byte> >();

            // Size estimate. This is used to build the optimal compressed file.
            long sizeEstimate = long.MaxValue;

            // We will solve the Coin Collector's problem several times, each time
            // ignoring more of the least frequent nibble runs. This allows us to find
            // *the* lowest file size.
            while (qt.Count > 1)
            {
                // Make a copy of the basic coin collection.
                var q0 = new List <EncodingCodeTreeNode>(qt);

                // Ignore the lowest weighted item. Will only affect the next iteration
                // of the loop. If it can be proven that there is a single global
                // minimum (and no local minima for file size), then this could be
                // simplified to a binary search.
                qt.RemoveAt(qt.Count - 1);

                // We now solve the Coin collector's problem using the Package-merge
                // algorithm. The solution goes here.
                var solution = new List <EncodingCodeTreeNode>();

                // This holds the packages from the last iteration.
                var q = new List <EncodingCodeTreeNode>(q0);

                int target = (q0.Count - 1) << 8, idx = 0;
                while (target != 0)
                {
                    // Gets lowest bit set in its proper place:
                    int val = (target & -target), r = 1 << idx;

                    // Is the current denomination equal to the least denomination?
                    if (r == val)
                    {
                        // If yes, take the least valuable node and put it into the solution.
                        solution.Add(q[q.Count - 1]);
                        q.RemoveAt(q.Count - 1);
                        target -= r;
                    }

                    // The coin collection has coins of values 1 to 8; copy from the
                    // original in those cases for the next step.
                    var q1 = new List <EncodingCodeTreeNode>();
                    if (idx < 7)
                    {
                        q1.AddRange(q0);
                    }

                    // Split the current list into pairs and insert the packages into
                    // the next list.
                    while (q.Count > 1)
                    {
                        EncodingCodeTreeNode child1 = q[q.Count - 1];
                        q.RemoveAt(q.Count - 1);
                        EncodingCodeTreeNode child0 = q[q.Count - 1];
                        q.RemoveAt(q.Count - 1);
                        q1.Add(new EncodingCodeTreeNode(child0, child1));
                    }

                    idx++;
                    q.Clear();
                    q.AddRange(q1);
                    q.Sort();
                }

                // The Coin Collector's problem has been solved. Now it is time to
                // map the solution back into the length-limited Huffman coding problem.

                // To do that, we iterate through the solution and count how many times
                // each nibble run has been used (remember that the coin collection had
                // had multiple coins associated with each nibble run) -- this number
                // is the optimal bit length for the nibble run.
                var baseSizeMap = new SortedList <NibbleRun, long>();
                foreach (var item in solution)
                {
                    item.Traverse(baseSizeMap);
                }

                // With the length-limited Huffman coding problem solved, it is now time
                // to build the code table. As input, we have a map associating a nibble
                // run to its optimal encoded bit length. We will build the codes using
                // the canonical Huffman code.

                // To do that, we must invert the size map so we can sort it by code size.
                var sizeOnlyMap = new MultiSet <long>();

                // This map contains lots more information, and is used to associate
                // the nibble run with its optimal code. It is sorted by code size,
                // then by frequency of the nibble run, then by the nibble run.
                var sizeMap = new MultiSet <SizeMapItem>();

                foreach (var item in baseSizeMap)
                {
                    long size = item.Value;
                    sizeOnlyMap.Add(size);
                    sizeMap.Add(new SizeMapItem(size, counts[item.Key], item.Key));
                }

                // We now build the canonical Huffman code table.
                // "baseCode" is the code for the first nibble run with a given bit length.
                // "carry" is how many nibble runs were demoted to a higher bit length
                // at an earlier step.
                // "cnt" is how many nibble runs have a given bit length.
                long baseCode = 0;
                long carry = 0, cnt;

                // This list contains the codes sorted by size.
                var codes = new List <KeyValuePair <long, byte> >();
                for (byte j = 1; j <= 8; j++)
                {
                    // How many nibble runs have the desired bit length.
                    cnt   = sizeOnlyMap.Count(j) + carry;
                    carry = 0;

                    for (int k = 0; k < cnt; k++)
                    {
                        // Sequential binary numbers for codes.
                        long code = baseCode + k;
                        long mask = (1L << j) - 1;

                        // We do not want any codes composed solely of 1's or which
                        // start with 111111, as that sequence is reserved.
                        if ((j <= 6 && code == mask) ||
                            (j > 6 && code == (mask & ~((1L << (j - 6)) - 1))))
                        {
                            // We must demote this many nibble runs to a longer code.
                            carry = cnt - k;
                            cnt   = k;
                            break;
                        }

                        codes.Add(new KeyValuePair <long, byte>(code, j));
                    }

                    // This is the beginning bit pattern for the next bit length.
                    baseCode = (baseCode + cnt) << 1;
                }

                // With the canonical table build, the codemap can finally be built.
                var tempCodemap = new SortedList <NibbleRun, KeyValuePair <long, byte> >();
                using (IEnumerator <SizeMapItem> enumerator = sizeMap.GetEnumerator())
                {
                    int pos = 0;
                    while (enumerator.MoveNext() && pos < codes.Count)
                    {
                        tempCodemap[enumerator.Current.NibbleRun] = codes[pos];
                        ++pos;
                    }
                }

                // We now compute the final file size for this code table.
                // 2 bytes at the start of the file, plus 1 byte at the end of the
                // code table.
                long tempsize_est = 3 * 8;
                byte last         = 0xff;

                // Start with any nibble runs with their own code.
                foreach (var item in tempCodemap)
                {
                    // Each new nibble needs an extra byte.
                    if (item.Key.Nibble != last)
                    {
                        tempsize_est += 8;
                        last          = item.Key.Nibble;
                    }

                    // 2 bytes per nibble run in the table.
                    tempsize_est += 2 * 8;

                    // How many bits this nibble run uses in the file.
                    tempsize_est += counts[item.Key] * item.Value.Value;
                }

                // Supplementary code map for the nibble runs that can be broken up into
                // shorter nibble runs with a smaller bit length than inlining.
                var supCodemap = new Dictionary <NibbleRun, KeyValuePair <long, byte> >();

                // Now we will compute the size requirements for inline nibble runs.
                foreach (var item in counts)
                {
                    if (!tempCodemap.ContainsKey(item.Key))
                    {
                        // Nibble run does not have its own code. We need to find out if
                        // we can break it up into smaller nibble runs with total code
                        // size less than 13 bits or if we need to inline it (13 bits).
                        if (item.Key.Count == 0)
                        {
                            // If this is a nibble run with zero repeats, we can't break
                            // it up into smaller runs, so we inline it.
                            tempsize_est += (6 + 7) * item.Value;
                        }
                        else if (item.Key.Count == 1)
                        {
                            // We stand a chance of breaking the nibble run.

                            // This case is rather trivial, so we hard-code it.
                            // We can break this up only as 2 consecutive runs of a nibble
                            // run with count == 0.
                            KeyValuePair <long, byte> value;
                            if (!tempCodemap.TryGetValue(new NibbleRun(item.Key.Nibble, 0), out value) || value.Value > 6)
                            {
                                // The smaller nibble run either does not have its own code
                                // or it results in a longer bit code when doubled up than
                                // would result from inlining the run. In either case, we
                                // inline the nibble run.
                                tempsize_est += (6 + 7) * item.Value;
                            }
                            else
                            {
                                // The smaller nibble run has a small enough code that it is
                                // more efficient to use it twice than to inline our nibble
                                // run. So we do exactly that, by adding a (temporary) entry
                                // in the supplementary codemap, which will later be merged
                                // into the main codemap.
                                long code = value.Key;
                                byte len  = value.Value;
                                code                 = (code << len) | code;
                                len                <<= 1;
                                tempsize_est        += len * item.Value;
                                supCodemap[item.Key] = new KeyValuePair <long, byte>(code, (byte)(0x80 | len));
                            }
                        }
                        else
                        {
                            // We stand a chance of breaking the nibble run.
                            byte n = item.Key.Count;

                            // This is a linear optimization problem subjected to 2
                            // constraints. If the number of repeats of the current nibble
                            // run is N, then we have N dimensions.
                            // Reference to table of linear coefficients. This table has
                            // N columns for each line.
                            byte[,] myLinearCoeffs = linearCoeffs[n - 2];
                            int rows = myLinearCoeffs.GetLength(0);

                            byte nibble = item.Key.Nibble;

                            // List containing the code length of each nibble run, or 13
                            // if the nibble run is not in the codemap.
                            var runlen = new List <long>();

                            // Initialize the list.
                            for (byte i = 0; i < n; i++)
                            {
                                // Is this run in the codemap?
                                KeyValuePair <long, byte> value;
                                if (tempCodemap.TryGetValue(new NibbleRun(nibble, i), out value))
                                {
                                    // It is.
                                    // Put code length in the vector.
                                    runlen.Add(value.Value);
                                }
                                else
                                {
                                    // It is not.
                                    // Put inline length in the vector.
                                    runlen.Add(6 + 7);
                                }
                            }

                            // Now go through the linear coefficient table and tally up
                            // the total code size, looking for the best case.
                            // The best size is initialized to be the inlined case.
                            long bestSize = 6 + 7;
                            int  bestLine = -1;
                            for (int i = 0; i < rows; i++)
                            {
                                // Tally up the code length for this coefficient line.
                                long len = 0;
                                for (byte j = 0; j < n; j++)
                                {
                                    byte c = myLinearCoeffs[i, j];
                                    if (c == 0)
                                    {
                                        continue;
                                    }

                                    len += c * runlen[j];
                                }

                                // Is the length better than the best yet?
                                if (len < bestSize)
                                {
                                    // If yes, store it as the best.
                                    bestSize = len;
                                    bestLine = i;
                                }
                            }

                            // Have we found a better code than inlining?
                            if (bestLine >= 0)
                            {
                                // We have; use it. To do so, we have to build the code
                                // and add it to the supplementary code table.
                                long code = 0, len = 0;
                                for (byte i = 0; i < n; i++)
                                {
                                    byte c = myLinearCoeffs[bestLine, i];
                                    if (c == 0)
                                    {
                                        continue;
                                    }

                                    // Is this run in the codemap?
                                    KeyValuePair <long, byte> value;
                                    if (tempCodemap.TryGetValue(new NibbleRun(nibble, i), out value))
                                    {
                                        // It is; it MUST be, as the other case is impossible
                                        // by construction.
                                        for (int j = 0; j < c; j++)
                                        {
                                            len   += value.Value;
                                            code <<= value.Value;
                                            code  |= value.Key;
                                        }
                                    }
                                }

                                if (len != bestSize)
                                {
                                    // ERROR! DANGER! THIS IS IMPOSSIBLE!
                                    // But just in case...
                                    tempsize_est += (6 + 7) * item.Value;
                                }
                                else
                                {
                                    // By construction, best_size is at most 12.
                                    byte c = (byte)bestSize;

                                    // Add it to supplementary code map.
                                    supCodemap[item.Key] = new KeyValuePair <long, byte>(code, (byte)(0x80 | c));
                                    tempsize_est        += bestSize * item.Value;
                                }
                            }
                            else
                            {
                                // No, we will have to inline it.
                                tempsize_est += (6 + 7) * item.Value;
                            }
                        }
                    }
                }

                // Merge the supplementary code map into the temporary code map.
                foreach (var item in supCodemap)
                {
                    tempCodemap[item.Key] = item.Value;
                }

                // Round up to a full byte.
                tempsize_est = (tempsize_est + 7) & ~7;

                // Is this iteration better than the best?
                if (tempsize_est < sizeEstimate)
                {
                    // If yes, save the codemap and file size.
                    codeMap      = tempCodemap;
                    sizeEstimate = tempsize_est;
                }
            }

            // We now have a prefix-free code map associating the RLE-encoded nibble
            // runs with their code. Now we write the file.
            // Write header.
            BigEndian.Write2(output, (ushort)((Convert.ToInt32(xor) << 15) | ((int)inputLength >> 5)));
            byte lastNibble = 0xff;

            foreach (var item in codeMap)
            {
                byte length = item.Value.Value;

                // length with bit 7 set is a special device for further reducing file size, and
                // should NOT be on the table.
                if ((length & 0x80) != 0)
                {
                    continue;
                }

                NibbleRun nibbleRun = item.Key;
                if (nibbleRun.Nibble != lastNibble)
                {
                    // 0x80 marks byte as setting a new nibble.
                    NeutralEndian.Write1(output, (byte)(0x80 | nibbleRun.Nibble));
                    lastNibble = nibbleRun.Nibble;
                }

                long code = item.Value.Key;
                NeutralEndian.Write1(output, (byte)((nibbleRun.Count << 4) | length));
                NeutralEndian.Write1(output, (byte)code);
            }

            // Mark end of header.
            NeutralEndian.Write1(output, 0xff);

            // Write the encoded bitstream.
            UInt8_E_L_OutputBitStream bitStream = new UInt8_E_L_OutputBitStream(output);

            // The RLE-encoded source makes for a far faster encode as we simply
            // use the nibble runs as an index into the map, meaning a quick binary
            // search gives us the code to use (if in the map) or tells us that we
            // need to use inline RLE.
            foreach (var nibbleRun in rleSource)
            {
                KeyValuePair <long, byte> value;
                if (codeMap.TryGetValue(nibbleRun, out value))
                {
                    long code = value.Key;
                    byte len  = value.Value;

                    // len with bit 7 set is a device to bypass the code table at the
                    // start of the file. We need to clear the bit here before writing
                    // the code to the file.
                    len &= 0x7f;

                    // We can have codes in the 9-12 range due to the break up of large
                    // inlined runs into smaller non-inlined runs. Deal with those high
                    // bits first, if needed.
                    if (len > 8)
                    {
                        bitStream.Write((byte)((code >> 8) & 0xff), len - 8);
                        len = 8;
                    }

                    bitStream.Write((byte)(code & 0xff), len);
                }
                else
                {
                    bitStream.Write(0x3f, 6);
                    bitStream.Write(nibbleRun.Count, 3);
                    bitStream.Write(nibbleRun.Nibble, 4);
                }
            }

            // Fill remainder of last byte with zeroes and write if needed.
            bitStream.Flush(false);
        }
Пример #20
0
        static void Main(string[] args)
        {
            MultiSet <string> urlCount
                = new MultiSet <string>();
            MultiSet <string> domainCount
                = new MultiSet <string>();
            Dictionary <string, Set <string> > domainToUrlMapping
                = new Dictionary <string, Set <string> >();
            Dictionary <string, Dictionary <string, Set <string> > > data
                = new Dictionary <string, Dictionary <string, Set <string> > >();
            Dictionary <string, Dictionary <string, Set <string> > > domainData
                = new Dictionary <string, Dictionary <string, Set <string> > >();

            using (SqlConnection connection = new SqlConnection(Utils.GetConfigValue("DbConnectionString")))
            {
                connection.Open();
                using (SqlCommand cmd = new SqlCommand(@"SELECT name, responseUrl from Documents", connection))
                {
                    cmd.CommandTimeout = 0;
                    using (SqlDataReader reader = cmd.ExecuteReader())
                    {
                        //foreach (string fileName in Directory.GetFiles(Utils.GetConfigValue("DataFolder", ".").TrimEnd('\\'), "*.xml.gz", SearchOption.AllDirectories))
                        while (reader.Read())
                        {
                            //Console.WriteLine(fileName);
                            //Document doc = new Document("", "");
                            //doc.ReadXmlCompressed(fileName);
                            //Console.WriteLine(doc.Name);
                            Console.WriteLine(reader.GetValue <string>("name"));
                            //string url = doc.Features.GetFeatureValue("responseUrl");
                            string url = reader.GetValue <string>("responseUrl");
                            //Console.WriteLine(url);
                            string             left;
                            ArrayList <string> path;
                            ArrayList <KeyDat <string, string> > qParsed;
                            ParseUrl(url, out left, out path, out qParsed);
                            string urlKey = UrlAsString(left, path, qParsed, new Set <string>());
                            urlCount.Add(urlKey);
                            domainCount.Add(left);
                            if (!domainToUrlMapping.ContainsKey(left))
                            {
                                domainToUrlMapping.Add(left, new Set <string>());
                            }
                            domainToUrlMapping[left].Add(urlKey);
                            if (!data.ContainsKey(urlKey))
                            {
                                data.Add(urlKey, new Dictionary <string, Set <string> >());
                            }
                            if (!domainData.ContainsKey(left))
                            {
                                domainData.Add(left, new Dictionary <string, Set <string> >());
                            }
                            Dictionary <string, Set <string> > urlInfo    = data[urlKey];
                            Dictionary <string, Set <string> > domainInfo = domainData[left];
                            foreach (KeyDat <string, string> item in qParsed)
                            {
                                //Console.WriteLine(item.Key + "=" + item.Dat);
                                if (!urlInfo.ContainsKey(item.Key))
                                {
                                    urlInfo.Add(item.Key, new Set <string>());
                                }
                                urlInfo[item.Key].Add(item.Dat);
                                if (!domainInfo.ContainsKey(item.Key))
                                {
                                    domainInfo.Add(item.Key, new Set <string>());
                                }
                                domainInfo[item.Key].Add(item.Dat);
                            }
                        }
                    }
                }
            }

            Set <string> paramShitList
                = new Set <string>(Utils.GetConfigValue("ExcludeUrlArgs", "utm_campaign,feedName,mod,rss_id,comment,commentid,partner").Split(','));

            StreamWriter w = new StreamWriter(Utils.GetConfigValue("OutputFileName", "reportDomains.txt"));

            foreach (KeyValuePair <string, Dictionary <string, Set <string> > > item in domainData)
            {
                bool found = false;
                foreach (KeyValuePair <string, Set <string> > paramInfo in item.Value)
                {
                    if (paramInfo.Value.Count > 1 && !paramShitList.Contains(paramInfo.Key.ToLower()))
                    {
                        found = true;
                        break;
                    }
                }
                if (found)
                {
                    bool          __found = false;
                    StringBuilder s       = new StringBuilder();
                    s.AppendLine("********************** Domain Info **********************");
                    s.AppendLine();
                    s.AppendLine(item.Key + " (" + domainCount.GetCount(item.Key) + ")");
                    foreach (KeyValuePair <string, Set <string> > paramInfo in item.Value)
                    {
                        if (!paramShitList.Contains(paramInfo.Key) && paramInfo.Value.Count > 1)
                        {
                            s.AppendLine("\t" + paramInfo.Key + "\t" + paramInfo.Value.Count + "\t" + paramInfo.Value);
                        }
                    }
                    s.AppendLine();
                    s.AppendLine("*** Details ***");
                    s.AppendLine();
                    foreach (string url in domainToUrlMapping[item.Key])
                    {
                        bool _found = false;
                        foreach (KeyValuePair <string, Set <string> > paramInfo in data[url])
                        {
                            if (paramInfo.Value.Count > 1 && !paramShitList.Contains(paramInfo.Key))
                            {
                                _found = true;
                                break;
                            }
                        }
                        if (_found)
                        {
                            __found = true;
                            s.AppendLine(url + " (" + urlCount.GetCount(url) + ")");
                            foreach (KeyValuePair <string, Set <string> > paramInfo in data[url])
                            {
                                if (paramInfo.Value.Count > 1)
                                {
                                    s.AppendLine("\t" + paramInfo.Key + "\t" + paramInfo.Value.Count + "\t" + paramInfo.Value);
                                }
                            }
                            s.AppendLine();
                        }
                    }
                    s.AppendLine();
                    if (__found)
                    {
                        w.Write(s.ToString());
                    }
                }
            }
            w.Close();
        }