Пример #1
0
        /// <summary>
        /// Initializes a new instance of the <see cref="POIFSDocument"/> class.
        /// </summary>
        /// <param name="name">the name of the POIFSDocument</param>
        /// <param name="stream">the InputStream we read data from</param>
        public POIFSDocument(string name, Stream stream)
        {
            DocumentBlock block;
            IList         list = new ArrayList();

            this._size = 0;
            do
            {
                block = new DocumentBlock(stream);
                int size = block.Size;
                if (size > 0)
                {
                    list.Add(block);
                    this._size += size;
                }
            }while (!block.PartiallyRead);
            DocumentBlock[] blocks = (DocumentBlock[])((ArrayList)list).ToArray(typeof(DocumentBlock));
            this._big_store         = new BigBlockStore(this, blocks);
            this._property          = new DocumentProperty(name, this._size);
            this._property.Document = this;
            if (this._property.ShouldUseSmallBlocks)
            {
                this._small_store = new SmallBlockStore(this, SmallDocumentBlock.Convert(blocks, this._size));
                this._big_store   = new BigBlockStore(this, new DocumentBlock[0]);
            }
            else
            {
                this._small_store = new SmallBlockStore(this, new BlockWritable[0]);
            }
        }
Пример #2
0
 public IDocumentAnomalyDetector CreateSimple(DocumentBlock document, int windowSize = 3)
 {
     return(new DocumentAnomalyDetector(
                document,
                new AnomalyFilterFactory(documentVector),
                new DocumentReconstructor(),
                windowSize));
 }
Пример #3
0
 private static DocumentBlock[] ConvertRawBlocksToBigBlocks(ListManagedBlock[] blocks)
 {
     DocumentBlock[] result = new DocumentBlock[blocks.Length];
     for (int i = 0; i < result.Length; i++)
     {
         result[i] = new DocumentBlock((RawDataBlock)blocks[i]);
     }
     return(result);
 }
        public void AnomalySentimentDocument()
        {
            document = new DocumentBlock(JsonConvert.DeserializeObject <Document[]>(File.ReadAllText(Path.Combine(TestContext.CurrentContext.TestDirectory, "Data", "docs.json"))));
            var distances = instance.CreateSimple(document);
            var result    = distances.Detect(FilterTypes.Svm, FilterTypes.Sentiment);

            Assert.AreEqual(784, document.Sentences.Length);
            Assert.AreEqual(601, result.Document.Sentences.Count);
            Assert.AreEqual(36, result.Anomaly.Length);
        }
Пример #5
0
 /// <summary>
 /// read data from the internal stores
 /// </summary>
 /// <param name="buffer">the buffer to write to</param>
 /// <param name="offset">the offset into our storage to read from</param>
 public virtual void Read(byte[] buffer, int offset)
 {
     if (this._property.ShouldUseSmallBlocks)
     {
         SmallDocumentBlock.Read(this._small_store.Blocks, buffer, offset);
     }
     else
     {
         DocumentBlock.Read(this._big_store.Blocks, buffer, offset);
     }
 }
        public void AnomalySvm()
        {
            document = new DocumentBlock(JsonConvert.DeserializeObject <Document[]>(File.ReadAllText(Path.Combine(TestContext.CurrentContext.TestDirectory, "Data", "docs.json"))));
            var distances = instance.CreateSimple(document);
            var result    = distances.Detect(FilterTypes.Svm);

            Assert.AreEqual(784, document.Sentences.Length);
            Assert.AreEqual(627, result.Document.Sentences.Count);
            Assert.AreEqual(28, result.Anomaly.Length);
            Assert.AreEqual(9, result.Anomaly[0].Sentences.Length);
            Assert.AreEqual("Kevin DiCiurcio, CFA Joshua M.", result.Anomaly[0].Sentences[0].Text);
        }
Пример #7
0
        public void TestConvert1()
        {
            MemoryStream stream    = new MemoryStream(_testdata);
            ArrayList    documents = new ArrayList();

            while (true)
            {
                DocumentBlock block = new DocumentBlock(stream);

                documents.Add(block);
                if (block.PartiallyRead)
                {
                    break;
                }
            }
            SmallDocumentBlock[] results =
                SmallDocumentBlock
                .Convert((BlockWritable[])documents
                         .ToArray(typeof(DocumentBlock)), _testdata_size);

            Assert.AreEqual((_testdata_size + 63) / 64, results.Length, "checking correct result size: ");
            MemoryStream output = new MemoryStream();

            for (int j = 0; j < results.Length; j++)
            {
                results[j].WriteBlocks(output);
            }
            byte[] output_array = output.ToArray();

            Assert.AreEqual(64 * results.Length,
                            output_array.Length, "checking correct output size: ");
            int index = 0;

            for (; index < _testdata_size; index++)
            {
                Assert.AreEqual(_testdata[index],
                                output_array[index], "checking output " + index);
            }
            for (; index < output_array.Length; index++)
            {
                Assert.AreEqual((byte)0xff,
                                output_array[index], "checking output " + index);
            }
        }
Пример #8
0
        public DataInputBlock GetDataInputBlock(int offset)
        {
            if (offset >= _size)
            {
                if (offset > _size)
                {
                    throw new Exception("Request for Offset " + offset + " doc size is " + _size);
                }

                return(null);
            }

            if (_property.ShouldUseSmallBlocks)
            {
                return(SmallDocumentBlock.GetDataInputBlock(_small_store.Blocks, offset));
            }

            return(DocumentBlock.GetDataInputBlock(_big_store.Blocks, offset));
        }
Пример #9
0
        public void TestConvert1()
        {
            MemoryStream         stream    = new MemoryStream(testData);
            List <DocumentBlock> documents = new List <DocumentBlock>();

            while (true)
            {
                DocumentBlock block = new DocumentBlock(stream, POIFSConstants.SMALLER_BIG_BLOCK_SIZE_DETAILS);

                documents.Add(block);
                if (block.PartiallyRead)
                {
                    break;
                }
            }
            SmallDocumentBlock[] results =
                SmallDocumentBlock.Convert(POIFSConstants.SMALLER_BIG_BLOCK_SIZE_DETAILS,
                                           documents.ToArray(), testDataSize);

            Assert.AreEqual((testDataSize + 63) / 64, results.Length, "checking correct result size: ");
            MemoryStream output = new MemoryStream();

            for (int j = 0; j < results.Length; j++)
            {
                results[j].WriteBlocks(output);
            }
            byte[] output_array = output.ToArray();

            Assert.AreEqual(64 * results.Length,
                            output_array.Length, "checking correct output size: ");
            int index = 0;

            for (; index < testDataSize; index++)
            {
                Assert.AreEqual(testData[index],
                                output_array[index], "checking output " + index);
            }
            for (; index < output_array.Length; index++)
            {
                Assert.AreEqual((byte)0xff,
                                output_array[index], "checking output " + index);
            }
        }
Пример #10
0
        public void TestConstructor()
        {
            MemoryStream input = new MemoryStream(_testdata);
            int          index = 0;
            int          size  = 0;

            while (true)
            {
                byte[] data = new byte[Math.Min(_testdata.Length - index, 512)];

                Array.Copy(_testdata, index, data, 0, data.Length);
                DocumentBlock block = new DocumentBlock(input, POIFSConstants.SMALLER_BIG_BLOCK_SIZE_DETAILS);

                verifyOutput(block, data);
                size += block.Size;
                if (block.PartiallyRead)
                {
                    break;
                }
                index += 512;
            }
            Assert.AreEqual(_testdata.Length, size);
        }
Пример #11
0
        public POIFSDocument(string name, POIFSBigBlockSize bigBlockSize, Stream stream)
        {
            List <DocumentBlock> blocks = new List <DocumentBlock>();

            _size            = 0;
            _bigBigBlockSize = bigBlockSize;
            while (true)
            {
                DocumentBlock block     = new DocumentBlock(stream, bigBlockSize);
                int           blockSize = block.Size;

                if (blockSize > 0)
                {
                    blocks.Add(block);
                    _size += blockSize;
                }
                if (block.PartiallyRead)
                {
                    break;
                }
            }

            DocumentBlock[] bigBlocks = blocks.ToArray();
            _big_store         = new BigBlockStore(bigBlockSize, bigBlocks);
            _property          = new DocumentProperty(name, _size);
            _property.Document = this;

            if (_property.ShouldUseSmallBlocks)
            {
                _small_store = new SmallBlockStore(bigBlockSize, SmallDocumentBlock.Convert(bigBlockSize, bigBlocks, _size));
                _big_store   = new BigBlockStore(bigBlockSize, new DocumentBlock[0]);
            }
            else
            {
                _small_store = new SmallBlockStore(bigBlockSize, EMPTY_SMALL_BLOCK_ARRAY);
            }
        }
Пример #12
0
        public void TestRead()
        {
            MemoryStream stream    = new MemoryStream(_testdata);
            ArrayList    documents = new ArrayList();

            while (true)
            {
                DocumentBlock block = new DocumentBlock(stream);

                documents.Add(block);
                if (block.PartiallyRead)
                {
                    break;
                }
            }
            SmallDocumentBlock[] blocks =
                SmallDocumentBlock
                .Convert((BlockWritable[])documents
                         .ToArray(typeof(DocumentBlock)), _testdata_size);

            for (int j = 1; j <= _testdata_size; j += 38)
            {
                byte[] buffer = new byte[j];
                int    offset = 0;

                for (int k = 0; k < (_testdata_size / j); k++)
                {
                    SmallDocumentBlock.Read(blocks, buffer, offset);
                    for (int n = 0; n < buffer.Length; n++)
                    {
                        Assert.AreEqual(_testdata[(k * j) + n], buffer[n],
                                        "checking byte " + (k * j) + n);
                    }
                    offset += j;
                }
            }
        }
 public void Setup()
 {
     document = new DocumentBlock(Global.InitDocument("cv002_17424.txt"));
     instance = new AnomalyFactory(new EmbeddingVectorSource(WordModel.Load(Path.Combine(TestContext.CurrentContext.TestDirectory, @"Data\model.bin"))));
 }