Base128Encoder.EncodeList C# (CSharp) Exemples de code

Exemple #1

0

Afficher le fichier

Fichier : TestCases.cs Projet : mjyeaney/IndexCompression

    /// <summary>
    /// Make sure we find exactly the known data we're after
    /// </summary>
    static void CanLocateKnownData(TestContext context)
    {
        var index = new Dictionary<string, byte[]>();
        var encoder = new Base128Encoder();

        // The term "cat" matches documents 2,4,6,8
        var docsWithCat = new List<uint>() { 2, 4, 6, 8 };
        index["cat"] = encoder.EncodeList(docsWithCat);

        // The term "dog" matches documents 6,8,10,12
        var docsWithDog = new List<uint>() { 6, 8, 10, 12 };
        index["dog"] = encoder.EncodeList(docsWithDog);

        // query for docs that have both "cat" and "dog"
        var results = new HashSet<uint>();
        var firstPass = true;

        foreach (var pair in index)
        {
            var idList = encoder.DecodeList(pair.Value);
            if (firstPass)
            {
                firstPass = false;
                results.UnionWith(idList);
            }
            else
            {
                results.IntersectWith(idList);
            }
        }

        // Should have only found 6 and 8
        Assert.AreEqual(2, results.Count);
        Assert.AreEqual((uint)6, results.ElementAt(0));
        Assert.AreEqual((uint)8, results.ElementAt(1));
    }

Exemple #2

0

Afficher le fichier

Fichier : TestCases.cs Projet : mjyeaney/IndexCompression

    /// <summary>
    /// Helper method to add index entries
    /// </summary>
    static void addIndexValue(string termName, 
        string termValue, 
        uint docId, 
        uint position, 
        Dictionary<uint, TermValuePair> termHashMap, 
        Dictionary<uint, DocPositionPair> docHashMap, 
        Dictionary<uint, byte[]> index, 
        Base128Encoder encoder)
    {
        var termPair = new TermValuePair(termName, termValue);
        var termPairHash = termPair.ComputeHashValue();

        if (!termHashMap.ContainsKey(termPairHash))
        {
            termHashMap[termPairHash] = termPair;
        }

        var docPair = new DocPositionPair(docId, position);
        var docPairHash = docPair.ComputeHashValue();

        if (!docHashMap.ContainsKey(docPairHash))
        {
            docHashMap[docPairHash] = docPair;
        }

        if (!index.ContainsKey(termPairHash))
        {
            index[termPairHash] = encoder.EncodeList(new List<uint>() { docPairHash });
        }
        else
        {
            var currentList = encoder.DecodeList(index[termPairHash]);
            currentList.Add(docPairHash);
            index[termPairHash] = encoder.EncodeList(currentList);
        }
    }

Exemple #3

0

Afficher le fichier

Fichier : TestCases.cs Projet : mjyeaney/IndexCompression

    /// <summary>
    /// Tests the varint decoding performance.
    /// </summary>
    static void LoadTestVarintDecoding(TestContext context)
    {
        // Test config
        var TERMS = 50000;
        var DOCS_PER_TERM = 10000;
        var TOTAL_DOCS = 100000;

        // test support
        var sw = new Stopwatch();
        var rng = new Random();

        // Our simple inverted index and an encoder
        // Note the index does not map term to list-of-docid...
        // Instead, it maps term to compressed range.
        var index = new Dictionary<string, byte[]>();
        var encoder = new Base128Encoder();

        ////////////////////////////////////////////////////////
        //
        // Create index data
        //

        for (var j = 0; j < TERMS; j++)
        {
            var docLinks = new List<uint>();

            // add document links
            for (var m = 0; m < DOCS_PER_TERM; m++)
            {
                docLinks.Add((uint)rng.Next(1, TOTAL_DOCS));
            }

            // compress the docID list
            // We'll just use 'j' in string form as our term
            index[j.ToString()] = encoder.EncodeList(docLinks);
        }

        ////////////////////////////////////////////////////////
        //
        // Search index data
        //

        var decodeCount = 0;
        sw.Start();

        // how about just a simple linear scan for now
        foreach (var termPair in index)
        {
            var decodedList = encoder.DecodeList(termPair.Value);
            decodeCount += decodedList.Count;
        }

        ////////////////////////////////////////////////////////
        //
        // Results summary
        //

        sw.Stop();
        context.WriteLine(string.Format("Decoded {0} keys in {1} ms", decodeCount, sw.ElapsedMilliseconds));
    }

Exemple #4

0

Afficher le fichier

Fichier : TestCases.cs Projet : mjyeaney/IndexCompression

    /// <summary>
    /// Tests a decode / set-operation scenario
    /// </summary>
    static void LoadTestScenario(TestContext context)
    {
        // Test config
        var TERMS = 50000;
        var DOCS_PER_TERM = 10000;
        var TOTAL_DOCS = 100000;

        // test support
        var sw = new Stopwatch();
        var rng = new Random();

        // Our simple inverted index and an encoder
        // Note the index does not map term to list-of-docid...
        // Instead, it maps term to compressed range.
        var index = new Dictionary<string, byte[]>();
        var encoder = new Base128Encoder();

        ////////////////////////////////////////////////////////
        //
        // Create index data
        //

        for (var j = 0; j < TERMS; j++)
        {
            var docLinks = new List<uint>();

            // add document links
            for (var m = 0; m < DOCS_PER_TERM; m++)
            {
                docLinks.Add((uint)rng.Next(1, TOTAL_DOCS));
            }

            // compress the docID list
            // We'll just use 'j' in string form as our term
            index[j.ToString()] = encoder.EncodeList(docLinks);
        }

        ////////////////////////////////////////////////////////
        //
        // Search index data
        //

        // Let's search for 'terms' that begin with '5000'
        // We then want all docs that are in both terms
        var query = new Predicate<string>((s) => s.StartsWith("5000"));
        var outSet = new HashSet<uint>();
        var firstPass = true;
        sw.Start();

        // how about just a simple linear scan for now
        foreach (var termPair in index)
        {
            if (query(termPair.Key))
            {
                // decode
                var decodedList = encoder.DecodeList(termPair.Value);

                // can't intersect with an empty set - always result in nullset.
                if (firstPass)
                {
                    firstPass = false;
                    outSet.UnionWith(decodedList);
                }
                else
                {
                    outSet.IntersectWith(decodedList);
                }
            }
        }

        ////////////////////////////////////////////////////////
        //
        // Results summary
        //

        sw.Stop();
        context.WriteLine(string.Format("Found {0} results", outSet.Count));
        context.WriteLine(string.Format("Query time: {0}ms", sw.ElapsedMilliseconds));
    }

Exemple #5

0

Afficher le fichier

Fichier : TestCases.cs Projet : mjyeaney/IndexCompression

    /// <summary>
    /// Make sure we can get a list back in the same forms.
    /// </summary>
    static void ListEncodesAndDecodes(TestContext context)
    {
        var rawList = new List<uint>();

        for (var j = 0; j < 100; j++)
        {
            rawList.Add((uint)j);
        }

        var encoder = new Base128Encoder();
        var decodedList = encoder.DecodeList(encoder.EncodeList(rawList));

        rawList.Sort();
        decodedList.Sort();

        Assert.AreEqual(rawList.Count, decodedList.Count);

        for (var m = 0; m < rawList.Count; m++)
        {
            Assert.AreEqual(rawList[m], decodedList[m]);
        }
    }

Exemple #6

0

Afficher le fichier

Fichier : TestCases.cs Projet : mjyeaney/IndexCompression

    /// <summary>
    /// Just a check to see how much space we're saving
    /// </summary>
    static void CompressionYieldsLessStorage(TestContext context)
    {
        // create a list of 2,000,000 uints
        // at 4bytes per uint, this should be ~7.8 - 8.0 MB
        var TEST_SIZE = 2000000;
        var docIds = new List<UInt32>(TEST_SIZE);
        var rng = new Random();

        for (var j = 0; j < TEST_SIZE; j++)
        {
            docIds.Add((uint)rng.Next(1, Int32.MaxValue));
        }

        // now compress that list
        var encoder = new Base128Encoder();
        var packedData = encoder.EncodeList(docIds);

        // How much space did we save
        context.WriteLine(String.Format("Raw size: {0} bytes", TEST_SIZE * 4));
        context.WriteLine(String.Format("Packed size: {0} bytes", packedData.Length));
    }

C# (CSharp) Base128Encoder.EncodeList Exemples