/// <summary> /// Initializes a new instance of the <see cref="POIFSDocument"/> class. /// </summary> /// <param name="name">the name of the POIFSDocument</param> /// <param name="stream">the InputStream we read data from</param> public POIFSDocument(string name, Stream stream) { DocumentBlock block; IList list = new ArrayList(); this._size = 0; do { block = new DocumentBlock(stream); int size = block.Size; if (size > 0) { list.Add(block); this._size += size; } }while (!block.PartiallyRead); DocumentBlock[] blocks = (DocumentBlock[])((ArrayList)list).ToArray(typeof(DocumentBlock)); this._big_store = new BigBlockStore(this, blocks); this._property = new DocumentProperty(name, this._size); this._property.Document = this; if (this._property.ShouldUseSmallBlocks) { this._small_store = new SmallBlockStore(this, SmallDocumentBlock.Convert(blocks, this._size)); this._big_store = new BigBlockStore(this, new DocumentBlock[0]); } else { this._small_store = new SmallBlockStore(this, new BlockWritable[0]); } }
public IDocumentAnomalyDetector CreateSimple(DocumentBlock document, int windowSize = 3) { return(new DocumentAnomalyDetector( document, new AnomalyFilterFactory(documentVector), new DocumentReconstructor(), windowSize)); }
private static DocumentBlock[] ConvertRawBlocksToBigBlocks(ListManagedBlock[] blocks) { DocumentBlock[] result = new DocumentBlock[blocks.Length]; for (int i = 0; i < result.Length; i++) { result[i] = new DocumentBlock((RawDataBlock)blocks[i]); } return(result); }
public void AnomalySentimentDocument() { document = new DocumentBlock(JsonConvert.DeserializeObject <Document[]>(File.ReadAllText(Path.Combine(TestContext.CurrentContext.TestDirectory, "Data", "docs.json")))); var distances = instance.CreateSimple(document); var result = distances.Detect(FilterTypes.Svm, FilterTypes.Sentiment); Assert.AreEqual(784, document.Sentences.Length); Assert.AreEqual(601, result.Document.Sentences.Count); Assert.AreEqual(36, result.Anomaly.Length); }
/// <summary> /// read data from the internal stores /// </summary> /// <param name="buffer">the buffer to write to</param> /// <param name="offset">the offset into our storage to read from</param> public virtual void Read(byte[] buffer, int offset) { if (this._property.ShouldUseSmallBlocks) { SmallDocumentBlock.Read(this._small_store.Blocks, buffer, offset); } else { DocumentBlock.Read(this._big_store.Blocks, buffer, offset); } }
public void AnomalySvm() { document = new DocumentBlock(JsonConvert.DeserializeObject <Document[]>(File.ReadAllText(Path.Combine(TestContext.CurrentContext.TestDirectory, "Data", "docs.json")))); var distances = instance.CreateSimple(document); var result = distances.Detect(FilterTypes.Svm); Assert.AreEqual(784, document.Sentences.Length); Assert.AreEqual(627, result.Document.Sentences.Count); Assert.AreEqual(28, result.Anomaly.Length); Assert.AreEqual(9, result.Anomaly[0].Sentences.Length); Assert.AreEqual("Kevin DiCiurcio, CFA Joshua M.", result.Anomaly[0].Sentences[0].Text); }
public void TestConvert1() { MemoryStream stream = new MemoryStream(_testdata); ArrayList documents = new ArrayList(); while (true) { DocumentBlock block = new DocumentBlock(stream); documents.Add(block); if (block.PartiallyRead) { break; } } SmallDocumentBlock[] results = SmallDocumentBlock .Convert((BlockWritable[])documents .ToArray(typeof(DocumentBlock)), _testdata_size); Assert.AreEqual((_testdata_size + 63) / 64, results.Length, "checking correct result size: "); MemoryStream output = new MemoryStream(); for (int j = 0; j < results.Length; j++) { results[j].WriteBlocks(output); } byte[] output_array = output.ToArray(); Assert.AreEqual(64 * results.Length, output_array.Length, "checking correct output size: "); int index = 0; for (; index < _testdata_size; index++) { Assert.AreEqual(_testdata[index], output_array[index], "checking output " + index); } for (; index < output_array.Length; index++) { Assert.AreEqual((byte)0xff, output_array[index], "checking output " + index); } }
public DataInputBlock GetDataInputBlock(int offset) { if (offset >= _size) { if (offset > _size) { throw new Exception("Request for Offset " + offset + " doc size is " + _size); } return(null); } if (_property.ShouldUseSmallBlocks) { return(SmallDocumentBlock.GetDataInputBlock(_small_store.Blocks, offset)); } return(DocumentBlock.GetDataInputBlock(_big_store.Blocks, offset)); }
public void TestConvert1() { MemoryStream stream = new MemoryStream(testData); List <DocumentBlock> documents = new List <DocumentBlock>(); while (true) { DocumentBlock block = new DocumentBlock(stream, POIFSConstants.SMALLER_BIG_BLOCK_SIZE_DETAILS); documents.Add(block); if (block.PartiallyRead) { break; } } SmallDocumentBlock[] results = SmallDocumentBlock.Convert(POIFSConstants.SMALLER_BIG_BLOCK_SIZE_DETAILS, documents.ToArray(), testDataSize); Assert.AreEqual((testDataSize + 63) / 64, results.Length, "checking correct result size: "); MemoryStream output = new MemoryStream(); for (int j = 0; j < results.Length; j++) { results[j].WriteBlocks(output); } byte[] output_array = output.ToArray(); Assert.AreEqual(64 * results.Length, output_array.Length, "checking correct output size: "); int index = 0; for (; index < testDataSize; index++) { Assert.AreEqual(testData[index], output_array[index], "checking output " + index); } for (; index < output_array.Length; index++) { Assert.AreEqual((byte)0xff, output_array[index], "checking output " + index); } }
public void TestConstructor() { MemoryStream input = new MemoryStream(_testdata); int index = 0; int size = 0; while (true) { byte[] data = new byte[Math.Min(_testdata.Length - index, 512)]; Array.Copy(_testdata, index, data, 0, data.Length); DocumentBlock block = new DocumentBlock(input, POIFSConstants.SMALLER_BIG_BLOCK_SIZE_DETAILS); verifyOutput(block, data); size += block.Size; if (block.PartiallyRead) { break; } index += 512; } Assert.AreEqual(_testdata.Length, size); }
public POIFSDocument(string name, POIFSBigBlockSize bigBlockSize, Stream stream) { List <DocumentBlock> blocks = new List <DocumentBlock>(); _size = 0; _bigBigBlockSize = bigBlockSize; while (true) { DocumentBlock block = new DocumentBlock(stream, bigBlockSize); int blockSize = block.Size; if (blockSize > 0) { blocks.Add(block); _size += blockSize; } if (block.PartiallyRead) { break; } } DocumentBlock[] bigBlocks = blocks.ToArray(); _big_store = new BigBlockStore(bigBlockSize, bigBlocks); _property = new DocumentProperty(name, _size); _property.Document = this; if (_property.ShouldUseSmallBlocks) { _small_store = new SmallBlockStore(bigBlockSize, SmallDocumentBlock.Convert(bigBlockSize, bigBlocks, _size)); _big_store = new BigBlockStore(bigBlockSize, new DocumentBlock[0]); } else { _small_store = new SmallBlockStore(bigBlockSize, EMPTY_SMALL_BLOCK_ARRAY); } }
public void TestRead() { MemoryStream stream = new MemoryStream(_testdata); ArrayList documents = new ArrayList(); while (true) { DocumentBlock block = new DocumentBlock(stream); documents.Add(block); if (block.PartiallyRead) { break; } } SmallDocumentBlock[] blocks = SmallDocumentBlock .Convert((BlockWritable[])documents .ToArray(typeof(DocumentBlock)), _testdata_size); for (int j = 1; j <= _testdata_size; j += 38) { byte[] buffer = new byte[j]; int offset = 0; for (int k = 0; k < (_testdata_size / j); k++) { SmallDocumentBlock.Read(blocks, buffer, offset); for (int n = 0; n < buffer.Length; n++) { Assert.AreEqual(_testdata[(k * j) + n], buffer[n], "checking byte " + (k * j) + n); } offset += j; } } }
public void Setup() { document = new DocumentBlock(Global.InitDocument("cv002_17424.txt")); instance = new AnomalyFactory(new EmbeddingVectorSource(WordModel.Load(Path.Combine(TestContext.CurrentContext.TestDirectory, @"Data\model.bin")))); }