Ejemplo n.º 1
0
        public void TestCardinality1()
        {
            BitSet set = new BitSet();

            Assert.AreEqual(0, set.Cardinality());
            for (int i = 0; i < 128; i++)
            {
                set.Set(i);
                Assert.AreEqual(i + 1, set.Cardinality());
                Assert.AreEqual(0, set.NextSetBit(0));
                if (i > 0)
                {
                    Assert.AreEqual(i * 1 - 1, set.NextSetBit(i * 1 - 1));
                }
            }
        }
Ejemplo n.º 2
0
        public void TestFilteredDocSetIterator()
        {
            var set1 = new IntArrayDocIdSet();

            for (int i = 0; i < 100; i++)
            {
                set1.AddDoc(2 * i); // 100 even numbers
            }

            var filteredIter = new MyFilteredDocSetIterator(set1.Iterator());

            var bs = new BitSet(200);

            for (int i = 0; i < 100; ++i)
            {
                int n = 10 * i;
                if (n < 200)
                {
                    bs.Set(n);
                }
            }

            try
            {
                int doc;
                while ((doc = filteredIter.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                {
                    if (!bs.Get(doc))
                    {
                        Assert.Fail("failed: " + doc + " not in expected set");
                        return;
                    }
                    else
                    {
                        bs.Clear(doc);
                    }
                }
                if (bs.Cardinality() > 0)
                {
                    Assert.Fail("failed: leftover cardinality: " + bs.Cardinality());
                }
            }
            catch (Exception e)
            {
                Assert.Fail(e.Message);
            }
        }
Ejemplo n.º 3
0
        protected virtual void Condense(float[] floats)
        {
            if (floats.Length != _capacity)
            {
                throw new ArgumentException("bad input float array of length " + floats.Length + " for capacity: " + _capacity);
            }
            var bits = new BitSet(floats.Length);
            int on   = 0;

            for (int i = 0; i < floats.Length; i++)
            {
                if (floats[i] != 0f)
                {
                    bits.Set(i);
                    on++;
                }
            }
            if (((float)on) / ((float)floats.Length) < ON_RATIO_CUTOFF)
            {
                // it's worth compressing
                if (0 == on)
                {
                    // it's worth super-compressing
                    _floats          = null;
                    _bits            = null;
                    _referencePoints = null;
                    // capacity is good.
                }
                else
                {
                    _bits            = bits;
                    _floats          = new float[_bits.Cardinality()];
                    _referencePoints = new int[floats.Length / REFERENCE_POINT_EVERY];
                    int i         = 0;
                    int floatsIdx = 0;
                    int refIdx    = 0;
                    while (i < floats.Length && (i = _bits.NextSetBit(i)) >= 0)
                    {
                        _floats[floatsIdx] = floats[i];
                        while (refIdx < i / REFERENCE_POINT_EVERY)
                        {
                            _referencePoints[refIdx++] = floatsIdx;
                        }
                        floatsIdx++;
                        i++;
                    }
                    while (refIdx < _referencePoints.Length)
                    {
                        _referencePoints[refIdx++] = floatsIdx;
                    }
                }
            }
            else
            {
                // it's not worth compressing
                _floats = floats;
                _bits   = null;
            }
        }
Ejemplo n.º 4
0
        public void TestFilteredDocSetIterator()
        {
            var set1 = new IntArrayDocIdSet();
            for (int i = 0; i < 100; i++)
            {
                set1.AddDoc(2 * i); // 100 even numbers
            }

            var filteredIter = new MyFilteredDocSetIterator(set1.Iterator());

            var bs = new BitSet(200);
            for (int i = 0; i < 100; ++i)
            {
                int n = 10 * i;
                if (n < 200)
                {
                    bs.Set(n);
                }
            }

            try
            {
                int doc;
                while ((doc = filteredIter.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                {
                    if (!bs.Get(doc))
                    {
                        Assert.Fail("failed: " + doc + " not in expected set");
                        return;
                    }
                    else
                    {
                        bs.Clear(doc);
                    }
                }
                if (bs.Cardinality() > 0)
                {
                    Assert.Fail("failed: leftover cardinality: " + bs.Cardinality());
                }
            }
            catch (Exception e)
            {
                Assert.Fail(e.Message);
            }
        }
Ejemplo n.º 5
0
 // Stores status of decommissioning.
 // If node is not decommissioning, do not use this object for anything.
 public virtual int UpdateBlockReportContext(BlockReportContext context)
 {
     if (curBlockReportId != context.GetReportId())
     {
         curBlockReportId       = context.GetReportId();
         curBlockReportRpcsSeen = new BitSet(context.GetTotalRpcs());
     }
     curBlockReportRpcsSeen.Set(context.GetCurRpc());
     return(curBlockReportRpcsSeen.Cardinality());
 }
Ejemplo n.º 6
0
        /// <summary>
        /// Returns the unique alternative predicted by all alternative subsets in
        /// <code>altsets</code>
        /// . If no such alternative exists, this method returns
        /// <see cref="ATN.InvalidAltNumber">ATN.InvalidAltNumber</see>
        /// .
        /// </summary>
        /// <param name="altsets">a collection of alternative subsets</param>
        public static int GetUniqueAlt(IEnumerable <BitSet> altsets)
        {
            BitSet all = GetAlts(altsets);

            if (all.Cardinality() == 1)
            {
                return(all.NextSetBit(0));
            }
            return(ATN.InvalidAltNumber);
        }
Ejemplo n.º 7
0
        /// <summary>
        /// Returns the unique alternative predicted by all alternative subsets in
        /// <paramref name="altsets"/>
        /// . If no such alternative exists, this method returns
        /// <see cref="ATN.INVALID_ALT_NUMBER"/>
        /// .
        /// </summary>
        /// <param name="altsets">a collection of alternative subsets</param>
        public static int GetUniqueAlt(IEnumerable <BitSet> altsets)
        {
            BitSet all = GetAlts(altsets);

            if (all.Cardinality() == 1)
            {
                return(all.NextSetBit(0));
            }
            return(ATN.INVALID_ALT_NUMBER);
        }
Ejemplo n.º 8
0
            public BitSet GetAncestors(int rule)
            {
                BitSet ancestors = new BitSet();

                ancestors.Or(parents[rule]);
                while (true)
                {
                    int cardinality = ancestors.Cardinality();
                    for (int i = ancestors.NextSetBit(0); i >= 0; i = ancestors.NextSetBit(i + 1))
                    {
                        ancestors.Or(parents[i]);
                    }
                    if (ancestors.Cardinality() == cardinality)
                    {
                        // nothing changed
                        break;
                    }
                }
                return(ancestors);
            }
Ejemplo n.º 9
0
            public BitSet GetDescendants(int rule)
            {
                BitSet descendants = new BitSet();

                descendants.Or(children[rule]);
                while (true)
                {
                    int cardinality = descendants.Cardinality();
                    for (int i = descendants.NextSetBit(0); i >= 0; i = descendants.NextSetBit(i + 1))
                    {
                        descendants.Or(children[i]);
                    }
                    if (descendants.Cardinality() == cardinality)
                    {
                        // nothing changed
                        break;
                    }
                }
                return(descendants);
            }
Ejemplo n.º 10
0
        public static int GetSingleViableAlt(IEnumerable <BitSet> altsets)
        {
            BitSet viableAlts = new BitSet();

            foreach (BitSet alts in altsets)
            {
                int minAlt = alts.NextSetBit(0);
                viableAlts.Set(minAlt);
                if (viableAlts.Cardinality() > 1)
                {
                    // more than 1 viable alt
                    return(ATN.INVALID_ALT_NUMBER);
                }
            }
            return(viableAlts.NextSetBit(0));
        }
Ejemplo n.º 11
0
        public static int GetSingleViableAlt([NotNull] IEnumerable <BitSet> altsets)
        {
            BitSet viableAlts = new BitSet();

            foreach (BitSet alts in altsets)
            {
                int minAlt = alts.NextSetBit(0);
                viableAlts.Set(minAlt);
                if (viableAlts.Cardinality() > 1)
                {
                    // more than 1 viable alt
                    return(ATN.InvalidAltNumber);
                }
            }
            return(viableAlts.NextSetBit(0));
        }
Ejemplo n.º 12
0
        /// <exception cref="System.IO.IOException"/>
        public virtual void TestFormat()
        {
            Log.Info("Test started");
            Log.Info("Max split count           = " + MaxSplitCount);
            Log.Info("Split count increment     = " + SplitCountIncr);
            Log.Info("Max bytes per file        = " + MaxBytes);
            Log.Info("Max number of files       = " + MaxNumFiles);
            Log.Info("Number of files increment = " + NumFilesIncr);
            MultiFileInputFormat <Text, Text> format = new TestMultiFileInputFormat.DummyMultiFileInputFormat
                                                           (this);
            FileSystem fs = FileSystem.GetLocal(job);

            for (int numFiles = 1; numFiles < MaxNumFiles; numFiles += (NumFilesIncr / 2) + rand
                                                                       .Next(NumFilesIncr / 2))
            {
                Path   dir  = InitFiles(fs, numFiles, -1);
                BitSet bits = new BitSet(numFiles);
                for (int i = 1; i < MaxSplitCount; i += rand.Next(SplitCountIncr) + 1)
                {
                    Log.Info("Running for Num Files=" + numFiles + ", split count=" + i);
                    MultiFileSplit[] splits = (MultiFileSplit[])format.GetSplits(job, i);
                    bits.Clear();
                    foreach (MultiFileSplit split in splits)
                    {
                        long splitLength = 0;
                        foreach (Path p in split.GetPaths())
                        {
                            long length = fs.GetContentSummary(p).GetLength();
                            NUnit.Framework.Assert.AreEqual(length, lengths[p.GetName()]);
                            splitLength += length;
                            string name  = p.GetName();
                            int    index = System.Convert.ToInt32(Sharpen.Runtime.Substring(name, name.LastIndexOf
                                                                                                ("file_") + 5));
                            NUnit.Framework.Assert.IsFalse(bits.Get(index));
                            bits.Set(index);
                        }
                        NUnit.Framework.Assert.AreEqual(splitLength, split.GetLength());
                    }
                }
                NUnit.Framework.Assert.AreEqual(bits.Cardinality(), numFiles);
                fs.Delete(dir, true);
            }
            Log.Info("Test Finished");
        }
Ejemplo n.º 13
0
        public void TestGetScriptExtensions()
        {
            BitSet scripts = new BitSet(UScript.CodeLimit);

            /* invalid code points */
            if (UScript.GetScriptExtensions(-1, scripts) != UScript.Unknown || scripts.Cardinality() != 1 ||
                !scripts.Get(UScript.Unknown))
            {
                Errln("UScript.getScriptExtensions(-1) is not {UNKNOWN}");
            }
            if (UScript.GetScriptExtensions(0x110000, scripts) != UScript.Unknown || scripts.Cardinality() != 1 ||
                !scripts.Get(UScript.Unknown))
            {
                Errln("UScript.getScriptExtensions(0x110000) is not {UNKNOWN}");
            }

            /* normal usage */
            if (UScript.GetScriptExtensions(0x063f, scripts) != UScript.Arabic || scripts.Cardinality() != 1 ||
                !scripts.Get(UScript.Arabic))
            {
                Errln("UScript.getScriptExtensions(U+063F) is not {ARABIC}");
            }
            if (UScript.GetScriptExtensions(0x0640, scripts) > -3 || scripts.Cardinality() < 3 ||
                !scripts.Get(UScript.Arabic) || !scripts.Get(UScript.Syriac) || !scripts.Get(UScript.Mandaic)
                )
            {
                Errln("UScript.getScriptExtensions(U+0640) failed");
            }
            if (UScript.GetScriptExtensions(0xfdf2, scripts) != -2 || scripts.Cardinality() != 2 ||
                !scripts.Get(UScript.Arabic) || !scripts.Get(UScript.Thaana))
            {
                Errln("UScript.getScriptExtensions(U+FDF2) failed");
            }
            if (UScript.GetScriptExtensions(0xff65, scripts) != -6 || scripts.Cardinality() != 6 ||
                !scripts.Get(UScript.Bopomofo) || !scripts.Get(UScript.Yi))
            {
                Errln("UScript.getScriptExtensions(U+FF65) failed");
            }
        }
Ejemplo n.º 14
0
        public virtual void TestSplitableCodecs()
        {
            Job           job  = Job.GetInstance(defaultConf);
            Configuration conf = job.GetConfiguration();
            // Create the codec
            CompressionCodec codec = null;

            try
            {
                codec = (CompressionCodec)ReflectionUtils.NewInstance(conf.GetClassByName("org.apache.hadoop.io.compress.BZip2Codec"
                                                                                          ), conf);
            }
            catch (TypeLoadException)
            {
                throw new IOException("Illegal codec!");
            }
            Path file = new Path(workDir, "test" + codec.GetDefaultExtension());
            int  seed = new Random().Next();

            Log.Info("seed = " + seed);
            Random random = new Random(seed);

            localFs.Delete(workDir, true);
            FileInputFormat.SetInputPaths(job, workDir);
            int MaxLength = 500000;

            FileInputFormat.SetMaxInputSplitSize(job, MaxLength / 20);
            // for a variety of lengths
            for (int length = 0; length < MaxLength; length += random.Next(MaxLength / 4) + 1)
            {
                Log.Info("creating; entries = " + length);
                // create a file with length entries
                TextWriter writer = new OutputStreamWriter(codec.CreateOutputStream(localFs.Create
                                                                                        (file)));
                try
                {
                    for (int i = 0; i < length; i++)
                    {
                        writer.Write(Sharpen.Extensions.ToString(i * 2));
                        writer.Write("\t");
                        writer.Write(Sharpen.Extensions.ToString(i));
                        writer.Write("\n");
                    }
                }
                finally
                {
                    writer.Close();
                }
                // try splitting the file in a variety of sizes
                KeyValueTextInputFormat format = new KeyValueTextInputFormat();
                NUnit.Framework.Assert.IsTrue("KVTIF claims not splittable", format.IsSplitable(job
                                                                                                , file));
                for (int i_1 = 0; i_1 < 3; i_1++)
                {
                    int numSplits = random.Next(MaxLength / 2000) + 1;
                    Log.Info("splitting: requesting = " + numSplits);
                    IList <InputSplit> splits = format.GetSplits(job);
                    Log.Info("splitting: got =        " + splits.Count);
                    // check each split
                    BitSet bits = new BitSet(length);
                    for (int j = 0; j < splits.Count; j++)
                    {
                        Log.Debug("split[" + j + "]= " + splits[j]);
                        TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job
                                                                                                        .GetConfiguration());
                        RecordReader <Text, Text> reader = format.CreateRecordReader(splits[j], context);
                        Type clazz = reader.GetType();
                        MapContext <Text, Text, Text, Text> mcontext = new MapContextImpl <Text, Text, Text
                                                                                           , Text>(job.GetConfiguration(), context.GetTaskAttemptID(), reader, null, null,
                                                                                                   MapReduceTestUtil.CreateDummyReporter(), splits[j]);
                        reader.Initialize(splits[j], mcontext);
                        Text key   = null;
                        Text value = null;
                        try
                        {
                            int count = 0;
                            while (reader.NextKeyValue())
                            {
                                key   = reader.GetCurrentKey();
                                value = reader.GetCurrentValue();
                                int k = System.Convert.ToInt32(key.ToString());
                                int v = System.Convert.ToInt32(value.ToString());
                                NUnit.Framework.Assert.AreEqual("Bad key", 0, k % 2);
                                NUnit.Framework.Assert.AreEqual("Mismatched key/value", k / 2, v);
                                Log.Debug("read " + k + "," + v);
                                NUnit.Framework.Assert.IsFalse(k + "," + v + " in multiple partitions.", bits.Get
                                                                   (v));
                                bits.Set(v);
                                count++;
                            }
                            if (count > 0)
                            {
                                Log.Info("splits[" + j + "]=" + splits[j] + " count=" + count);
                            }
                            else
                            {
                                Log.Debug("splits[" + j + "]=" + splits[j] + " count=" + count);
                            }
                        }
                        finally
                        {
                            reader.Close();
                        }
                    }
                    NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality
                                                        ());
                }
            }
        }
Ejemplo n.º 15
0
        /// <exception cref="System.Exception"/>
        public virtual void TestFormat()
        {
            JobConf    job      = new JobConf(conf);
            FileSystem fs       = FileSystem.GetLocal(conf);
            Path       dir      = new Path(Runtime.GetProperty("test.build.data", ".") + "/mapred");
            Path       file     = new Path(dir, "test.seq");
            Reporter   reporter = Reporter.Null;
            int        seed     = new Random().Next();
            //LOG.info("seed = "+seed);
            Random random = new Random(seed);

            fs.Delete(dir, true);
            FileInputFormat.SetInputPaths(job, dir);
            // for a variety of lengths
            for (int length = 0; length < MaxLength; length += random.Next(MaxLength / 10) +
                                                               1)
            {
                //LOG.info("creating; entries = " + length);
                // create a file with length entries
                SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, file, typeof(IntWritable
                                                                                              ), typeof(LongWritable));
                try
                {
                    for (int i = 0; i < length; i++)
                    {
                        IntWritable  key   = new IntWritable(i);
                        LongWritable value = new LongWritable(10 * i);
                        writer.Append(key, value);
                    }
                }
                finally
                {
                    writer.Close();
                }
                // try splitting the file in a variety of sizes
                InputFormat <Text, Text> format = new SequenceFileAsTextInputFormat();
                for (int i_1 = 0; i_1 < 3; i_1++)
                {
                    int numSplits = random.Next(MaxLength / (SequenceFile.SyncInterval / 20)) + 1;
                    //LOG.info("splitting: requesting = " + numSplits);
                    InputSplit[] splits = format.GetSplits(job, numSplits);
                    //LOG.info("splitting: got =        " + splits.length);
                    // check each split
                    BitSet bits = new BitSet(length);
                    for (int j = 0; j < splits.Length; j++)
                    {
                        RecordReader <Text, Text> reader = format.GetRecordReader(splits[j], job, reporter
                                                                                  );
                        Type readerClass = reader.GetType();
                        NUnit.Framework.Assert.AreEqual("reader class is SequenceFileAsTextRecordReader."
                                                        , typeof(SequenceFileAsTextRecordReader), readerClass);
                        Text value = reader.CreateValue();
                        Text key   = reader.CreateKey();
                        try
                        {
                            int count = 0;
                            while (reader.Next(key, value))
                            {
                                // if (bits.get(key.get())) {
                                // LOG.info("splits["+j+"]="+splits[j]+" : " + key.get());
                                // LOG.info("@"+reader.getPos());
                                // }
                                int keyInt = System.Convert.ToInt32(key.ToString());
                                NUnit.Framework.Assert.IsFalse("Key in multiple partitions.", bits.Get(keyInt));
                                bits.Set(keyInt);
                                count++;
                            }
                        }
                        finally
                        {
                            //LOG.info("splits["+j+"]="+splits[j]+" count=" + count);
                            reader.Close();
                        }
                    }
                    NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality
                                                        ());
                }
            }
        }
Ejemplo n.º 16
0
        // A reporter that does nothing
        /// <exception cref="System.Exception"/>
        public virtual void TestFormat()
        {
            JobConf job    = new JobConf(defaultConf);
            Random  random = new Random();
            long    seed   = random.NextLong();

            Log.Info("seed = " + seed);
            random.SetSeed(seed);
            localFs.Delete(workDir, true);
            FileInputFormat.SetInputPaths(job, workDir);
            int length   = 10000;
            int numFiles = 10;

            CreateFiles(length, numFiles, random);
            // create a combined split for the files
            CombineTextInputFormat format = new CombineTextInputFormat();
            LongWritable           key    = new LongWritable();
            Text value = new Text();

            for (int i = 0; i < 3; i++)
            {
                int numSplits = random.Next(length / 20) + 1;
                Log.Info("splitting: requesting = " + numSplits);
                InputSplit[] splits = format.GetSplits(job, numSplits);
                Log.Info("splitting: got =        " + splits.Length);
                // we should have a single split as the length is comfortably smaller than
                // the block size
                NUnit.Framework.Assert.AreEqual("We got more than one splits!", 1, splits.Length);
                InputSplit split = splits[0];
                NUnit.Framework.Assert.AreEqual("It should be CombineFileSplit", typeof(CombineFileSplit
                                                                                        ), split.GetType());
                // check the split
                BitSet bits = new BitSet(length);
                Log.Debug("split= " + split);
                RecordReader <LongWritable, Text> reader = format.GetRecordReader(split, job, voidReporter
                                                                                  );
                try
                {
                    int count = 0;
                    while (reader.Next(key, value))
                    {
                        int v = System.Convert.ToInt32(value.ToString());
                        Log.Debug("read " + v);
                        if (bits.Get(v))
                        {
                            Log.Warn("conflict with " + v + " at position " + reader.GetPos());
                        }
                        NUnit.Framework.Assert.IsFalse("Key in multiple partitions.", bits.Get(v));
                        bits.Set(v);
                        count++;
                    }
                    Log.Info("splits=" + split + " count=" + count);
                }
                finally
                {
                    reader.Close();
                }
                NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality
                                                    ());
            }
        }
Ejemplo n.º 17
0
        /// <exception cref="System.Exception"/>
        public virtual void TestFormat()
        {
            JobConf  job      = new JobConf(conf);
            Reporter reporter = Reporter.Null;
            Random   random   = new Random();
            long     seed     = random.NextLong();

            Log.Info("seed = " + seed);
            random.SetSeed(seed);
            localFs.Delete(workDir, true);
            FileInputFormat.SetInputPaths(job, workDir);
            int length   = 10000;
            int numFiles = 10;

            // create a file with various lengths
            CreateFiles(length, numFiles, random);
            // create a combine split for the files
            InputFormat <IntWritable, BytesWritable> format = new CombineSequenceFileInputFormat
                                                              <IntWritable, BytesWritable>();
            IntWritable   key   = new IntWritable();
            BytesWritable value = new BytesWritable();

            for (int i = 0; i < 3; i++)
            {
                int numSplits = random.Next(length / (SequenceFile.SyncInterval / 20)) + 1;
                Log.Info("splitting: requesting = " + numSplits);
                InputSplit[] splits = format.GetSplits(job, numSplits);
                Log.Info("splitting: got =        " + splits.Length);
                // we should have a single split as the length is comfortably smaller than
                // the block size
                NUnit.Framework.Assert.AreEqual("We got more than one splits!", 1, splits.Length);
                InputSplit split = splits[0];
                NUnit.Framework.Assert.AreEqual("It should be CombineFileSplit", typeof(CombineFileSplit
                                                                                        ), split.GetType());
                // check each split
                BitSet bits = new BitSet(length);
                RecordReader <IntWritable, BytesWritable> reader = format.GetRecordReader(split, job
                                                                                          , reporter);
                try
                {
                    while (reader.Next(key, value))
                    {
                        NUnit.Framework.Assert.IsFalse("Key in multiple partitions.", bits.Get(key.Get())
                                                       );
                        bits.Set(key.Get());
                    }
                }
                finally
                {
                    reader.Close();
                }
                NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality
                                                    ());
            }
        }
Ejemplo n.º 18
0
        /// <exception cref="System.IO.IOException"/>
        public virtual void TestSplitableCodecs()
        {
            JobConf conf = new JobConf(defaultConf);
            int     seed = new Random().Next();
            // Create the codec
            CompressionCodec codec = null;

            try
            {
                codec = (CompressionCodec)ReflectionUtils.NewInstance(conf.GetClassByName("org.apache.hadoop.io.compress.BZip2Codec"
                                                                                          ), conf);
            }
            catch (TypeLoadException)
            {
                throw new IOException("Illegal codec!");
            }
            Path file = new Path(workDir, "test" + codec.GetDefaultExtension());
            // A reporter that does nothing
            Reporter reporter = Reporter.Null;

            Log.Info("seed = " + seed);
            Random     random  = new Random(seed);
            FileSystem localFs = FileSystem.GetLocal(conf);

            localFs.Delete(workDir, true);
            FileInputFormat.SetInputPaths(conf, workDir);
            int MaxLength = 500000;

            // for a variety of lengths
            for (int length = MaxLength / 2; length < MaxLength; length += random.Next(MaxLength
                                                                                       / 4) + 1)
            {
                Log.Info("creating; entries = " + length);
                // create a file with length entries
                TextWriter writer = new OutputStreamWriter(codec.CreateOutputStream(localFs.Create
                                                                                        (file)));
                try
                {
                    for (int i = 0; i < length; i++)
                    {
                        writer.Write(Sharpen.Extensions.ToString(i));
                        writer.Write("\n");
                    }
                }
                finally
                {
                    writer.Close();
                }
                // try splitting the file in a variety of sizes
                TextInputFormat format = new TextInputFormat();
                format.Configure(conf);
                LongWritable key   = new LongWritable();
                Text         value = new Text();
                for (int i_1 = 0; i_1 < 3; i_1++)
                {
                    int numSplits = random.Next(MaxLength / 2000) + 1;
                    Log.Info("splitting: requesting = " + numSplits);
                    InputSplit[] splits = format.GetSplits(conf, numSplits);
                    Log.Info("splitting: got =        " + splits.Length);
                    // check each split
                    BitSet bits = new BitSet(length);
                    for (int j = 0; j < splits.Length; j++)
                    {
                        Log.Debug("split[" + j + "]= " + splits[j]);
                        RecordReader <LongWritable, Text> reader = format.GetRecordReader(splits[j], conf,
                                                                                          reporter);
                        try
                        {
                            int counter = 0;
                            while (reader.Next(key, value))
                            {
                                int v = System.Convert.ToInt32(value.ToString());
                                Log.Debug("read " + v);
                                if (bits.Get(v))
                                {
                                    Log.Warn("conflict with " + v + " in split " + j + " at position " + reader.GetPos
                                                 ());
                                }
                                NUnit.Framework.Assert.IsFalse("Key in multiple partitions.", bits.Get(v));
                                bits.Set(v);
                                counter++;
                            }
                            if (counter > 0)
                            {
                                Log.Info("splits[" + j + "]=" + splits[j] + " count=" + counter);
                            }
                            else
                            {
                                Log.Debug("splits[" + j + "]=" + splits[j] + " count=" + counter);
                            }
                        }
                        finally
                        {
                            reader.Close();
                        }
                    }
                    NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality
                                                        ());
                }
            }
        }
Ejemplo n.º 19
0
        /// <exception cref="System.Exception"/>
        public virtual void TestFormat()
        {
            JobConf job  = new JobConf();
            Path    file = new Path(workDir, "test.txt");
            // A reporter that does nothing
            Reporter reporter = Reporter.Null;
            int      seed     = new Random().Next();

            Log.Info("seed = " + seed);
            Random random = new Random(seed);

            localFs.Delete(workDir, true);
            FileInputFormat.SetInputPaths(job, workDir);
            // for a variety of lengths
            for (int length = 0; length < MaxLength; length += random.Next(MaxLength / 10) +
                                                               1)
            {
                Log.Debug("creating; entries = " + length);
                // create a file with length entries
                TextWriter writer = new OutputStreamWriter(localFs.Create(file));
                try
                {
                    for (int i = 0; i < length; i++)
                    {
                        writer.Write(Sharpen.Extensions.ToString(i * 2));
                        writer.Write("\t");
                        writer.Write(Sharpen.Extensions.ToString(i));
                        writer.Write("\n");
                    }
                }
                finally
                {
                    writer.Close();
                }
                // try splitting the file in a variety of sizes
                KeyValueTextInputFormat format = new KeyValueTextInputFormat();
                format.Configure(job);
                for (int i_1 = 0; i_1 < 3; i_1++)
                {
                    int numSplits = random.Next(MaxLength / 20) + 1;
                    Log.Debug("splitting: requesting = " + numSplits);
                    InputSplit[] splits = format.GetSplits(job, numSplits);
                    Log.Debug("splitting: got =        " + splits.Length);
                    // check each split
                    BitSet bits = new BitSet(length);
                    for (int j = 0; j < splits.Length; j++)
                    {
                        Log.Debug("split[" + j + "]= " + splits[j]);
                        RecordReader <Text, Text> reader = format.GetRecordReader(splits[j], job, reporter
                                                                                  );
                        Type readerClass = reader.GetType();
                        NUnit.Framework.Assert.AreEqual("reader class is KeyValueLineRecordReader.", typeof(
                                                            KeyValueLineRecordReader), readerClass);
                        Text key        = reader.CreateKey();
                        Type keyClass   = key.GetType();
                        Text value      = reader.CreateValue();
                        Type valueClass = value.GetType();
                        NUnit.Framework.Assert.AreEqual("Key class is Text.", typeof(Text), keyClass);
                        NUnit.Framework.Assert.AreEqual("Value class is Text.", typeof(Text), valueClass);
                        try
                        {
                            int count = 0;
                            while (reader.Next(key, value))
                            {
                                int v = System.Convert.ToInt32(value.ToString());
                                Log.Debug("read " + v);
                                if (bits.Get(v))
                                {
                                    Log.Warn("conflict with " + v + " in split " + j + " at position " + reader.GetPos
                                                 ());
                                }
                                NUnit.Framework.Assert.IsFalse("Key in multiple partitions.", bits.Get(v));
                                bits.Set(v);
                                count++;
                            }
                            Log.Debug("splits[" + j + "]=" + splits[j] + " count=" + count);
                        }
                        finally
                        {
                            reader.Close();
                        }
                    }
                    NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality
                                                        ());
                }
            }
        }
Ejemplo n.º 20
0
 protected virtual void Condense(float[] floats)
 {
     if (floats.Length != _capacity)
     {
         throw new ArgumentException("bad input float array of length " + floats.Length + " for capacity: " + _capacity);
     }
     var bits = new BitSet(floats.Length);
     int on = 0;
     for (int i = 0; i < floats.Length; i++)
     {
         if (floats[i] != 0f)
         {
             bits.Set(i);
             on++;
         }
     }
     if (((float)on) / ((float)floats.Length) < ON_RATIO_CUTOFF)
     {
         // it's worth compressing
         if (0 == on)
         {
             // it's worth super-compressing
             _floats = null;
             _bits = null;
             _referencePoints = null;
             // capacity is good.
         }
         else
         {
             _bits = bits;
             _floats = new float[_bits.Cardinality()];
             _referencePoints = new int[floats.Length / REFERENCE_POINT_EVERY];
             int i = 0;
             int floatsIdx = 0;
             int refIdx = 0;
             while (i < floats.Length && (i = _bits.NextSetBit(i)) >= 0)
             {
                 _floats[floatsIdx] = floats[i];
                 while (refIdx < i / REFERENCE_POINT_EVERY)
                 {
                     _referencePoints[refIdx++] = floatsIdx;
                 }
                 floatsIdx++;
                 i++;
             }
             while (refIdx < _referencePoints.Length)
             {
                 _referencePoints[refIdx++] = floatsIdx;
             }
         }
     }
     else
     {
         // it's not worth compressing
         _floats = floats;
         _bits = null;
     }
 }
Ejemplo n.º 21
0
        public virtual void TestFormat()
        {
            Job  job  = Job.GetInstance(new Configuration(defaultConf));
            Path file = new Path(workDir, "test.txt");
            int  seed = new Random().Next();

            Log.Info("seed = " + seed);
            Random random = new Random(seed);

            localFs.Delete(workDir, true);
            FileInputFormat.SetInputPaths(job, workDir);
            int MaxLength = 10000;

            // for a variety of lengths
            for (int length = 0; length < MaxLength; length += random.Next(MaxLength / 10) +
                                                               1)
            {
                Log.Debug("creating; entries = " + length);
                // create a file with length entries
                TextWriter writer = new OutputStreamWriter(localFs.Create(file));
                try
                {
                    for (int i = 0; i < length; i++)
                    {
                        writer.Write(Sharpen.Extensions.ToString(i * 2));
                        writer.Write("\t");
                        writer.Write(Sharpen.Extensions.ToString(i));
                        writer.Write("\n");
                    }
                }
                finally
                {
                    writer.Close();
                }
                // try splitting the file in a variety of sizes
                KeyValueTextInputFormat format = new KeyValueTextInputFormat();
                for (int i_1 = 0; i_1 < 3; i_1++)
                {
                    int numSplits = random.Next(MaxLength / 20) + 1;
                    Log.Debug("splitting: requesting = " + numSplits);
                    IList <InputSplit> splits = format.GetSplits(job);
                    Log.Debug("splitting: got =        " + splits.Count);
                    // check each split
                    BitSet bits = new BitSet(length);
                    for (int j = 0; j < splits.Count; j++)
                    {
                        Log.Debug("split[" + j + "]= " + splits[j]);
                        TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job
                                                                                                        .GetConfiguration());
                        RecordReader <Text, Text> reader = format.CreateRecordReader(splits[j], context);
                        Type clazz = reader.GetType();
                        NUnit.Framework.Assert.AreEqual("reader class is KeyValueLineRecordReader.", typeof(
                                                            KeyValueLineRecordReader), clazz);
                        MapContext <Text, Text, Text, Text> mcontext = new MapContextImpl <Text, Text, Text
                                                                                           , Text>(job.GetConfiguration(), context.GetTaskAttemptID(), reader, null, null,
                                                                                                   MapReduceTestUtil.CreateDummyReporter(), splits[j]);
                        reader.Initialize(splits[j], mcontext);
                        Text key   = null;
                        Text value = null;
                        try
                        {
                            int count = 0;
                            while (reader.NextKeyValue())
                            {
                                key   = reader.GetCurrentKey();
                                clazz = key.GetType();
                                NUnit.Framework.Assert.AreEqual("Key class is Text.", typeof(Text), clazz);
                                value = reader.GetCurrentValue();
                                clazz = value.GetType();
                                NUnit.Framework.Assert.AreEqual("Value class is Text.", typeof(Text), clazz);
                                int k = System.Convert.ToInt32(key.ToString());
                                int v = System.Convert.ToInt32(value.ToString());
                                NUnit.Framework.Assert.AreEqual("Bad key", 0, k % 2);
                                NUnit.Framework.Assert.AreEqual("Mismatched key/value", k / 2, v);
                                Log.Debug("read " + v);
                                NUnit.Framework.Assert.IsFalse("Key in multiple partitions.", bits.Get(v));
                                bits.Set(v);
                                count++;
                            }
                            Log.Debug("splits[" + j + "]=" + splits[j] + " count=" + count);
                        }
                        finally
                        {
                            reader.Close();
                        }
                    }
                    NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality
                                                        ());
                }
            }
        }
Ejemplo n.º 22
0
        /// <exception cref="System.Exception"/>
        public virtual void TestFormat()
        {
            Job    job    = Job.GetInstance(new Configuration(defaultConf));
            Random random = new Random();
            long   seed   = random.NextLong();

            Log.Info("seed = " + seed);
            random.SetSeed(seed);
            localFs.Delete(workDir, true);
            FileInputFormat.SetInputPaths(job, workDir);
            int length   = 10000;
            int numFiles = 10;

            // create files with various lengths
            CreateFiles(length, numFiles, random);
            // create a combined split for the files
            CombineTextInputFormat format = new CombineTextInputFormat();

            for (int i = 0; i < 3; i++)
            {
                int numSplits = random.Next(length / 20) + 1;
                Log.Info("splitting: requesting = " + numSplits);
                IList <InputSplit> splits = format.GetSplits(job);
                Log.Info("splitting: got =        " + splits.Count);
                // we should have a single split as the length is comfortably smaller than
                // the block size
                NUnit.Framework.Assert.AreEqual("We got more than one splits!", 1, splits.Count);
                InputSplit split = splits[0];
                NUnit.Framework.Assert.AreEqual("It should be CombineFileSplit", typeof(CombineFileSplit
                                                                                        ), split.GetType());
                // check the split
                BitSet bits = new BitSet(length);
                Log.Debug("split= " + split);
                TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job
                                                                                                .GetConfiguration());
                RecordReader <LongWritable, Text> reader = format.CreateRecordReader(split, context
                                                                                     );
                NUnit.Framework.Assert.AreEqual("reader class is CombineFileRecordReader.", typeof(
                                                    CombineFileRecordReader), reader.GetType());
                MapContext <LongWritable, Text, LongWritable, Text> mcontext = new MapContextImpl <
                    LongWritable, Text, LongWritable, Text>(job.GetConfiguration(), context.GetTaskAttemptID
                                                                (), reader, null, null, MapReduceTestUtil.CreateDummyReporter(), split);
                reader.Initialize(split, mcontext);
                try
                {
                    int count = 0;
                    while (reader.NextKeyValue())
                    {
                        LongWritable key = reader.GetCurrentKey();
                        NUnit.Framework.Assert.IsNotNull("Key should not be null.", key);
                        Text value = reader.GetCurrentValue();
                        int  v     = System.Convert.ToInt32(value.ToString());
                        Log.Debug("read " + v);
                        NUnit.Framework.Assert.IsFalse("Key in multiple partitions.", bits.Get(v));
                        bits.Set(v);
                        count++;
                    }
                    Log.Debug("split=" + split + " count=" + count);
                }
                finally
                {
                    reader.Close();
                }
                NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality
                                                    ());
            }
        }
Ejemplo n.º 23
0
        /// <exception cref="System.Exception"/>
        public virtual void TestFormat()
        {
            Job        job    = Job.GetInstance(conf);
            FileSystem fs     = FileSystem.GetLocal(conf);
            Path       dir    = new Path(Runtime.GetProperty("test.build.data", ".") + "/mapred");
            Path       file   = new Path(dir, "test.seq");
            int        seed   = new Random().Next();
            Random     random = new Random(seed);

            fs.Delete(dir, true);
            FileInputFormat.SetInputPaths(job, dir);
            // for a variety of lengths
            for (int length = 0; length < MaxLength; length += random.Next(MaxLength / 10) +
                                                               1)
            {
                // create a file with length entries
                SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, file, typeof(IntWritable
                                                                                              ), typeof(LongWritable));
                try
                {
                    for (int i = 0; i < length; i++)
                    {
                        IntWritable  key   = new IntWritable(i);
                        LongWritable value = new LongWritable(10 * i);
                        writer.Append(key, value);
                    }
                }
                finally
                {
                    writer.Close();
                }
                TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job
                                                                                                .GetConfiguration());
                // try splitting the file in a variety of sizes
                InputFormat <Text, Text> format = new SequenceFileAsTextInputFormat();
                for (int i_1 = 0; i_1 < 3; i_1++)
                {
                    // check each split
                    BitSet bits      = new BitSet(length);
                    int    numSplits = random.Next(MaxLength / (SequenceFile.SyncInterval / 20)) + 1;
                    FileInputFormat.SetMaxInputSplitSize(job, fs.GetFileStatus(file).GetLen() / numSplits
                                                         );
                    foreach (InputSplit split in format.GetSplits(job))
                    {
                        RecordReader <Text, Text>           reader   = format.CreateRecordReader(split, context);
                        MapContext <Text, Text, Text, Text> mcontext = new MapContextImpl <Text, Text, Text
                                                                                           , Text>(job.GetConfiguration(), context.GetTaskAttemptID(), reader, null, null,
                                                                                                   MapReduceTestUtil.CreateDummyReporter(), split);
                        reader.Initialize(split, mcontext);
                        Type readerClass = reader.GetType();
                        NUnit.Framework.Assert.AreEqual("reader class is SequenceFileAsTextRecordReader."
                                                        , typeof(SequenceFileAsTextRecordReader), readerClass);
                        Text key;
                        try
                        {
                            int count = 0;
                            while (reader.NextKeyValue())
                            {
                                key = reader.GetCurrentKey();
                                int keyInt = System.Convert.ToInt32(key.ToString());
                                NUnit.Framework.Assert.IsFalse("Key in multiple partitions.", bits.Get(keyInt));
                                bits.Set(keyInt);
                                count++;
                            }
                        }
                        finally
                        {
                            reader.Close();
                        }
                    }
                    NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality
                                                        ());
                }
            }
        }