public void UsingLookupWithAttributes() { Prepare(); var orderSource = new MemorySource <Order>(); orderSource.DataAsList.Add(new Order() { OrderNumber = 815, CustomerName = "John" }); orderSource.DataAsList.Add(new Order() { OrderNumber = 4711, CustomerName = "Jim" }); var lookupSource = new DbSource <CustomerWithAttr>(SqlConnection, "CustomerTable"); var lookup = new LookupTransformation <Order, CustomerWithAttr>(); lookup.Source = lookupSource; var dest = new MemoryDestination <Order>(); orderSource.LinkTo(lookup).LinkTo(dest); Network.Execute(orderSource); foreach (var row in dest.Data) { Console.WriteLine($"Order:{row.OrderNumber} Name:{row.CustomerName} Id:{row.CustomerId}"); } //Output //Order:815 Name:John Id:1 //Order:4711 Name:Jim Id:2 }
static Network CreateNetwork(string targetFileName) { var source = new MemorySource <MyLinkingRow>(); for (int i = 0; i <= 5; i++) { source.DataAsList.Add(new MyLinkingRow() { Col1 = i, Col2 = $"Test{i}" }); } var row = new RowTransformation <MyLinkingRow>(); row.TransformationFunc = row => { Console.WriteLine($"Sending row {row.Col1}|{row.Col2} into {targetFileName}"); Task.Delay(10).Wait(); return(row); }; var dest = new XmlDestination <MyLinkingRow>(targetFileName, ResourceType.File); source.LinkTo(row).LinkTo(dest); return(new Network(source)); }
public void UsingRowTransformation() { Prepare(); var orderSource = new MemorySource <Order>(); orderSource.DataAsList.Add(new Order() { OrderNumber = 815, CustomerName = "John" }); orderSource.DataAsList.Add(new Order() { OrderNumber = 4711, CustomerName = "Jim" }); var rowTrans = new RowTransformation <Order>( row => { int?id = SqlTask.ExecuteScalar <int>(SqlConnection, sql: $"SELECT Id FROM CustomerTable WHERE Name='{row.CustomerName}'"); row.CustomerId = id; return(row); }); /* Delete below here */ var dest = new MemoryDestination <Order>(); orderSource.LinkTo(rowTrans).LinkTo(dest); orderSource.Execute(); }
public void UnknownTableViaTableDefinition() { //Arrange TableDefinition def = new TableDefinition("UnknownTable", new List <TableColumn>() { new TableColumn("id", "INT") }); //Arrange string[] data = { "1", "2" }; MemorySource <string[]> source = new MemorySource <string[]>(); source.Data.Add(data); DbDestination <string[]> dest = new DbDestination <string[]>(def, SqlConnection); source.LinkTo(dest); //Act & Assert Assert.Throws <System.InvalidOperationException>(() => { try { source.Execute(); dest.Wait(); } catch (AggregateException e) { throw e.InnerException; } }); }
internal ImageFrameCollection(Image <TPixel> parent, int width, int height, MemorySource <TPixel> memorySource) { this.parent = parent ?? throw new ArgumentNullException(nameof(parent)); // Frames are already cloned within the caller this.frames.Add(new ImageFrame <TPixel>(parent.GetConfiguration(), width, height, memorySource)); }
public void ValidateSchemaForArray() { //Arrange var source = new MemorySource <string[]>(); source.DataAsList.Add(new string[] { _validXml }); source.DataAsList.Add(new string[] { _invalidXml }); source.DataAsList.Add(new string[] { _validXml }); MemoryDestination <string[]> dest = new MemoryDestination <string[]>(); MemoryDestination <ETLBoxError> error = new MemoryDestination <ETLBoxError>(); //Act XmlSchemaValidation <string[]> schemaValidation = new XmlSchemaValidation <string[]>(); schemaValidation.XmlSelector = row => row[0]; schemaValidation.XmlSchema = xsdMarkup; source.LinkTo(schemaValidation); schemaValidation.LinkTo(dest); schemaValidation.LinkErrorTo(error); source.Execute(); dest.Wait(); error.Wait(); //Assert Assert.True(dest.Data.Count == 2); Assert.True(error.Data.Count == 1); }
public void MixedTypes(IConnectionManager connection) { CreateTestTable(connection, "datatypedestination"); //Arrange MemorySource <MyDataTypeRow> source = new MemorySource <MyDataTypeRow>( new List <MyDataTypeRow>() { new MyDataTypeRow() { IntCol = 1, LongCol = -1, DecimalCol = 2.3M, DoubleCol = 5.4, DateTimeCol = new DateTime(2010, 1, 1, 10, 10, 10), DateCol = new DateTime(2020, 1, 1), StringCol = "Test", CharCol = 'T', DecimalStringCol = "13.4566", NullCol = null, EnumCol = EnumType.Value2 } }); //Act DbDestination <MyDataTypeRow> dest = new DbDestination <MyDataTypeRow>(connection, "datatypedestination"); source.LinkTo(dest); source.Execute(); dest.Wait(); //Assert AssertFirstRow(connection, "datatypedestination"); }
public void UnknownTable() { //Arrange string[] data = { "1", "2" }; MemorySource source = new MemorySource(); source.Data.Add(data); DBDestination dest = new DBDestination(SqlConnection, "UnknownTable"); source.LinkTo(dest); //Act & Assert Assert.Throws <ETLBoxException>(() => { try { source.Execute(); dest.Wait(); } catch (AggregateException e) { throw e.InnerException; } }); }
internal WeightsWindow(int index, int left, Buffer2D <float> buffer, int length) { this.flatStartIndex = (index * buffer.Width) + left; this.Left = left; this.buffer = buffer.MemorySource; this.Length = length; }
public void WriteIntoHttpClient() { //Arrange Mock<HttpMessageHandler> handlerMock = CreateHandlerMoq(); HttpClient httpClient = CreateHttpClient(handlerMock); MemorySource<MySimpleRow> source = new MemorySource<MySimpleRow>(); source.DataAsList.Add(new MySimpleRow() { Col1 = 1, Col2 = "Test1" }); //Act JsonDestination<MySimpleRow> dest = new JsonDestination<MySimpleRow>("http://test.test", ResourceType.Http); dest.HttpClient = httpClient; source.LinkTo(dest); source.Execute(); dest.Wait(); //Assert handlerMock.Protected().Verify( "SendAsync", Times.Exactly(1), ItExpr.Is<HttpRequestMessage>(req => req.Method == HttpMethod.Get && req.RequestUri.Equals(new Uri("http://test.test")) ), ItExpr.IsAny<CancellationToken>() ); }
public void ExceptionInStoreKeyFunction() { //Arrange MemorySource <MyRow> source = new MemorySource <MyRow>(); source.DataAsList = new List <MyRow>() { new MyRow { Id = 1, ClassName = "Class1", DetailValue = 3.5 } }; //Act Aggregation <MyRow, MyAggRow> agg = new Aggregation <MyRow, MyAggRow>( (row, aggValue) => aggValue.AggValue += row.DetailValue, row => row.ClassName, (key, agg) => throw new Exception("Test") ); MemoryDestination <MyAggRow> dest = new MemoryDestination <MyAggRow>(); source.LinkTo(agg); agg.LinkTo(dest); //Assert Assert.Throws <AggregateException>(() => { source.Execute(); dest.Wait(); }); }
public void ExceptionInAggregationFunction() { //Arrange MemorySource <MyRow> source = new MemorySource <MyRow>(); source.DataAsList = new List <MyRow>() { new MyRow { Id = 1, DetailValue = 3.5 }, }; //Act Aggregation <MyRow, MyAggRow> agg = new Aggregation <MyRow, MyAggRow>( (row, aggRow) => throw new Exception("Test") ); MemoryDestination <MyAggRow> dest = new MemoryDestination <MyAggRow>(); //Assert source.LinkTo(agg); agg.LinkTo(dest); Assert.Throws <AggregateException>(() => { source.Execute(); dest.Wait(); }); }
public void IgnoreWithStringArray() { //Arrange MemorySource source = new MemorySource(); source.Data = new List <string[]>() { null, new string[] { "1", "Test1" }, null, new string[] { "2", "Test2" }, new string[] { "3", "Test3" }, null }; //Act JsonDestination dest = new JsonDestination("./IgnoreNullValuesStringArray.json"); source.LinkTo(dest); source.Execute(); dest.Wait(); //Assert Assert.Equal(File.ReadAllText("./IgnoreNullValuesStringArray.json"), File.ReadAllText("res/JsonDestination/TwoColumnsStringArray.json")); }
private static JpegColorConverter.ComponentValues CreateRandomValues( int componentCount, int inputBufferLength, int seed, float minVal = 0f, float maxVal = 255f) { var rnd = new Random(seed); var buffers = new Buffer2D <float> [componentCount]; for (int i = 0; i < componentCount; i++) { float[] values = new float[inputBufferLength]; for (int j = 0; j < inputBufferLength; j++) { values[j] = (float)rnd.NextDouble() * (maxVal - minVal) + minVal; } // no need to dispose when buffer is not array owner var memory = new Memory <float>(values); var source = new MemorySource <float>(memory); buffers[i] = new Buffer2D <float>(source, values.Length, 1); } return(new JpegColorConverter.ComponentValues(buffers, 0)); }
public void GroupingUsingStringArray() { //Arrange MemorySource <string[]> source = new MemorySource <string[]>(); source.DataAsList.Add(new string[] { "Class1", "3.5" }); source.DataAsList.Add(new string[] { "Class1", "6.5" }); source.DataAsList.Add(new string[] { "Class2", "10" }); Aggregation <string[], MyAggRow> agg = new Aggregation <string[], MyAggRow>( (row, aggValue) => aggValue.AggValue += Convert.ToDouble(row[1]), row => row[0], (key, agg) => agg.GroupName = (string)key ); MemoryDestination <MyAggRow> dest = new MemoryDestination <MyAggRow>(); //Act source.LinkTo(agg); agg.LinkTo(dest); source.Execute(); dest.Wait(); //Assert Assert.Collection <MyAggRow>(dest.Data, ar => Assert.True(ar.AggValue == 10 && ar.GroupName == "Class1"), ar => Assert.True(ar.AggValue == 10 && ar.GroupName == "Class2") ); }
public void NoLookupSource() { //Arrange MemorySource <MyDataRow> source = new MemorySource <MyDataRow>(); source.DataAsList.Add(new MyDataRow() { Col1 = 1, Col2 = "Test1" }); //Act var lookup = new LookupTransformation <MyDataRow, MyLookupRow>(); MemoryDestination <MyDataRow> dest = new MemoryDestination <MyDataRow>(); //Assert Assert.Throws <ETLBoxException>(() => { try { source.LinkTo(lookup); lookup.LinkTo(dest); source.Execute(); dest.Wait(); } catch (AggregateException e) { throw e.InnerException; } }); //Assert }
public void AggregateSimple() { //Arrange MemorySource <MyRow> source = new MemorySource <MyRow>(); source.DataAsList = new List <MyRow>() { new MyRow { Id = 1, DetailValue = 3.5 }, new MyRow { Id = 2, DetailValue = 4.5 }, new MyRow { Id = 3, DetailValue = 2.0 }, }; Aggregation <MyRow, MyAggRow> agg = new Aggregation <MyRow, MyAggRow>( (row, aggRow) => aggRow.AggValue += row.DetailValue ); MemoryDestination <MyAggRow> dest = new MemoryDestination <MyAggRow>(); //Act source.LinkTo(agg); agg.LinkTo(dest); source.Execute(); dest.Wait(); //Assert Assert.Collection <MyAggRow>(dest.Data, ar => Assert.True(ar.AggValue == 10) ); }
/// <summary> /// Initializes a new instance of the <see cref="Image{TPixel}"/> class /// wrapping an external <see cref="MemorySource{T}"/> /// </summary> /// <param name="configuration">The configuration providing initialization code which allows extending the library.</param> /// <param name="memorySource">The memory source.</param> /// <param name="width">The width of the image in pixels.</param> /// <param name="height">The height of the image in pixels.</param> /// <param name="metadata">The images metadata.</param> internal Image(Configuration configuration, MemorySource <TPixel> memorySource, int width, int height, ImageMetaData metadata) { this.configuration = configuration; this.PixelType = new PixelTypeInfo(Unsafe.SizeOf <TPixel>() * 8); this.MetaData = metadata; this.Frames = new ImageFrameCollection <TPixel>(this, width, height, memorySource); }
public void IgnoreWithStringArray() { //Arrange TwoColumnsTableFixture d2c = new TwoColumnsTableFixture(SqlConnection, "DestIgnoreNullValuesStringArray"); MemorySource <string[]> source = new MemorySource <string[]>(); source.DataAsList = new List <string[]>() { null, new string[] { "1", "Test1" }, null, new string[] { "2", "Test2" }, new string[] { "3", "Test3" }, null }; DbDestination <string[]> dest = new DbDestination <string[]>(SqlConnection, "DestIgnoreNullValuesStringArray"); //Act source.LinkTo(dest); source.Execute(); dest.Wait(); //Assert d2c.AssertTestData(); }
public void DataIsFromList() { //Arrange TwoColumnsTableFixture dest2Columns = new TwoColumnsTableFixture("MemoryDestination"); MemorySource <MySimpleRow> source = new MemorySource <MySimpleRow>(); DBDestination <MySimpleRow> dest = new DBDestination <MySimpleRow>(SqlConnection, "MemoryDestination"); //Act source.Data = new List <MySimpleRow>() { new MySimpleRow() { Col1 = 1, Col2 = "Test1" }, new MySimpleRow() { Col1 = 2, Col2 = "Test2" }, new MySimpleRow() { Col1 = 3, Col2 = "Test3" } }; source.LinkTo(dest); source.Execute(); dest.Wait(); //Assert dest2Columns.AssertTestData(); }
public void IgnoreWithStringArray() { //Arrange MemorySource <string[]> source = new MemorySource <string[]>(); source.DataAsList = new List <string[]>() { null, new string[] { "1", "Test1" }, null, new string[] { "2", "Test2" }, new string[] { "3", "Test3" }, null }; //Act CsvDestination <string[]> dest = new CsvDestination <string[]>("./IgnoreNullValuesStringArray.csv"); source.LinkTo(dest); source.Execute(); dest.Wait(); //Assert Assert.Equal(File.ReadAllText("./IgnoreNullValuesStringArray.csv"), File.ReadAllText("res/CsvDestination/TwoColumnsNoHeader.csv")); }
public void IgnoreWithObject() { //Arrange TwoColumnsTableFixture d2c = new TwoColumnsTableFixture(SqlConnection, "DestIgnoreNullValues"); MemorySource <MySimpleRow> source = new MemorySource <MySimpleRow>(); source.DataAsList = new List <MySimpleRow>() { null, new MySimpleRow() { Col1 = 1, Col2 = "Test1" }, null, new MySimpleRow() { Col1 = 2, Col2 = "Test2" }, new MySimpleRow() { Col1 = 3, Col2 = "Test3" }, null }; DbDestination <MySimpleRow> dest = new DbDestination <MySimpleRow>(SqlConnection, "DestIgnoreNullValues"); //Act source.LinkTo(dest); source.Execute(); dest.Wait(); //Assert d2c.AssertTestData(); }
public void NoErrorHandling() { //Arrange MemorySource <MySimpleRow> source = new MemorySource <MySimpleRow>(); source.DataAsList = new List <MySimpleRow>() { new MySimpleRow() { Col1 = "X" }, new MySimpleRow() { Col1 = "1" }, new MySimpleRow() { Col1 = null } }; CsvDestination <MySimpleRow> dest = new CsvDestination <MySimpleRow>("ErrorFileNoError.csv"); //Act //Assert Assert.ThrowsAny <Exception>(() => { source.LinkTo(dest); source.Execute(); dest.Wait(); }); }
public void IgnoreWithObject() { //Arrange MemorySource <MySimpleRow> source = new MemorySource <MySimpleRow>(); source.DataAsList = new List <MySimpleRow>() { null, new MySimpleRow() { Col1 = 1, Col2 = "Test1" }, null, new MySimpleRow() { Col1 = 2, Col2 = "Test2" }, new MySimpleRow() { Col1 = 3, Col2 = "Test3" }, null }; //Act CsvDestination <MySimpleRow> dest = new CsvDestination <MySimpleRow>("./IgnoreNullValues.csv"); source.LinkTo(dest); source.Execute(); dest.Wait(); //Assert Assert.Equal(File.ReadAllText("./IgnoreNullValues.csv"), File.ReadAllText("res/CsvDestination/TwoColumns.csv")); }
public void SimpleMergeWithDynamic() { //Arrange MemorySource source = new MemorySource(); source.DataAsList.Add(CreateDynamicRow(1, "Test1")); source.DataAsList.Add(CreateDynamicRow(2, "Test2")); source.DataAsList.Add(CreateDynamicRow(3, "Test3")); TwoColumnsTableFixture d2c = new TwoColumnsTableFixture(SqlConnection, "DBMergeDynamicDestination"); d2c.InsertTestDataSet3(); //Act DbMerge dest = new DbMerge(SqlConnection, "DBMergeDynamicDestination"); dest.MergeProperties.IdPropertyNames.Add("Col1"); dest.MergeProperties.ComparePropertyNames.Add("Col2"); source.LinkTo(dest); source.Execute(); dest.Wait(); //Assert Assert.Equal(3, RowCountTask.Count(SqlConnection, "DBMergeDynamicDestination", $"{d2c.QB}Col1{d2c.QE} BETWEEN 1 AND 7 AND {d2c.QB}Col2{d2c.QE} LIKE 'Test%'")); d2c.AssertTestData(); Assert.Collection <ExpandoObject>(dest.DeltaTable, row => { dynamic r = row as ExpandoObject; Assert.True(r.ChangeAction == ChangeAction.Exists && r.Col1 == 1); }, row => { dynamic r = row as ExpandoObject; Assert.True(r.ChangeAction == ChangeAction.Update && r.Col1 == 2); }, row => { dynamic r = row as ExpandoObject; Assert.True(r.ChangeAction == ChangeAction.Insert && r.Col1 == 3); }, row => { dynamic r = row as ExpandoObject; Assert.True(r.ChangeAction == ChangeAction.Delete && r.Col1 == 4); }, row => { dynamic r = row as ExpandoObject; Assert.True(r.ChangeAction == ChangeAction.Delete && r.Col1 == 10); } ); }
public void DeltaLoadWithDeletion() { //Arrange MemorySource source = new MemorySource(); source.DataAsList.Add(CreateDynamicRow(2, "Test2")); source.DataAsList.Add(CreateDynamicRow(3, "Test3")); source.DataAsList.Add(CreateDynamicRow(4, delete: true)); source.DataAsList.Add(CreateDynamicRow(10, delete: true)); TwoColumnsTableFixture d2c = new TwoColumnsTableFixture(SqlConnection, "DBMergeDynamicDeltaDestination"); d2c.InsertTestDataSet3(); //Act DbMerge dest = new DbMerge(SqlConnection, "DBMergeDynamicDeltaDestination") { DeltaMode = MergeMode.Delta }; dest.MergeProperties.IdPropertyNames.Add("Col1"); dest.MergeProperties.ComparePropertyNames.Add("Col2"); dest.MergeProperties.DeletionProperties.Add("Delete", true); source.LinkTo(dest); source.Execute(); dest.Wait(); //Assert d2c.AssertTestData(); Assert.Collection <ExpandoObject>(dest.DeltaTable, row => { dynamic r = row as ExpandoObject; Assert.True(r.ChangeAction == ChangeAction.Update && r.Col1 == 2); }, row => { dynamic r = row as ExpandoObject; Assert.True(r.ChangeAction == ChangeAction.Insert && r.Col1 == 3); }, row => { dynamic r = row as ExpandoObject; Assert.True(r.ChangeAction == ChangeAction.Delete && r.Col1 == 4); }, row => { dynamic r = row as ExpandoObject; Assert.True(r.ChangeAction == ChangeAction.Delete && r.Col1 == 10); } ); }
public void DbDestination() { //Arrange string[] data = { "1", "2" }; MemorySource <string[]> source = new MemorySource <string[]>(); source.DataAsList.Add(data); DbDestination <string[]> dest = new DbDestination <string[]>("test"); source.LinkTo(dest); //Act & Assert Assert.Throws <ETLBoxException>(() => { try { source.Execute(); dest.Wait(); } catch (AggregateException e) { throw e.InnerException; } }); }
private static void RunExceptionFlowWithType <T>() { //Arrange MemorySource <InputDataRow> source = new MemorySource <InputDataRow>(); source.DataAsList.Add(new InputDataRow() { LookupId = 1 }); MemorySource <T> lookupSource = new MemorySource <T>(); var lookup = new LookupTransformation <InputDataRow, T>(lookupSource); MemoryDestination <InputDataRow> dest = new MemoryDestination <InputDataRow>(); source.LinkTo(lookup); lookup.LinkTo(dest); //Act && Assert Assert.Throws <ETLBoxException>(() => { try { source.Execute(); dest.Wait(); } catch (AggregateException e) { throw e.InnerException; } }); }
public void MixedTypesWithDynamic(IConnectionManager connection) { CreateTestTable(connection, "datatypedestinationdynamic"); //Arrange MemorySource source = new MemorySource(); dynamic d1 = new ExpandoObject(); d1.IntCol = 1; d1.LongCol = -1; d1.DecimalCol = 2.3M; d1.DoubleCol = 5.4; d1.DateTimeCol = new DateTime(2010, 1, 1, 10, 10, 10); d1.DateCol = new DateTime(2020, 1, 1); d1.StringCol = "Test"; d1.CharCol = 'T'; d1.DecimalStringCol = "13.4566"; d1.NullCol = null; d1.EnumCol = EnumType.Value2; //Act DbDestination dest = new DbDestination(connection, "datatypedestinationdynamic"); source.LinkTo(dest); source.Execute(); dest.Wait(); //Assert AssertFirstRow(connection, "datatypedestinationdynamic"); }
public void ValidateSchemaForDynamicObject() { //Arrange var source = new MemorySource(); dynamic n1 = new ExpandoObject(); n1.Xml = _validXml; source.DataAsList.Add(n1); dynamic n2 = new ExpandoObject(); n2.Xml = _validXml; source.DataAsList.Add(n2); dynamic n3 = new ExpandoObject(); n3.Xml = _invalidXml; source.DataAsList.Add(n3); MemoryDestination dest = new MemoryDestination(); MemoryDestination <ETLBoxError> error = new MemoryDestination <ETLBoxError>(); //Act XmlSchemaValidation schemaValidation = new XmlSchemaValidation(); schemaValidation.XmlSelector = row => { dynamic r = row as ExpandoObject; return(r.Xml); }; schemaValidation.XmlSchema = xsdMarkup; source.LinkTo(schemaValidation); schemaValidation.LinkTo(dest); schemaValidation.LinkErrorTo(error); source.Execute(); dest.Wait(); error.Wait(); //Assert Assert.True(dest.Data.Count == 2); Assert.True(error.Data.Count == 1); }
private void btnCheckForUpdates_Click(object sender, EventArgs e) { // For the purpose of this demonstration, we are loading the update feed from a local file and passing // it to UpdateManager using MemorySource. // Without passing this IUpdateSource object to CheckForUpdates, it will attempt to retrieve an // update feed from the feed URL specified in SimpleWebSource (which we did not provide) string feedXml = System.IO.File.ReadAllText("SampleUpdateFeed.xml"); IUpdateSource feedSource = new MemorySource(feedXml); CheckForUpdates(feedSource); }
/*public IDataSource<string, ThreeTuple<ContinuousDistribution, ContinuousDistribution, ContinuousDistribution>> ImputeEmotionalContent(List<List<string>> texts, uint repeats) { MemorySource<string, ThreeTuple<ContinuousDistribution, ContinuousDistribution, ContinuousDistribution>> inputed = new MemorySource<string, ThreeTuple<ContinuousDistribution, ContinuousDistribution, ContinuousDistribution>>(); ComboSource<string, ThreeTuple<ContinuousDistribution, ContinuousDistribution, ContinuousDistribution>> combo = new ComboSource<string, ThreeTuple<ContinuousDistribution, ContinuousDistribution, ContinuousDistribution>>(source, inputed); for (uint ii = 0; ii < repeats; ii++) { Dictionary<string, double> wordVnumers = new Dictionary<string, double>(), wordVdenoms = new Dictionary<string, double>(), wordAnumers = new Dictionary<string, double>(), wordAdenoms = new Dictionary<string, double>(), wordDnumers = new Dictionary<string, double>(), wordDdenoms = new Dictionary<string, double>(), wordVsumvar = new Dictionary<string, double>(), wordVcounts = new Dictionary<string, double>(), wordAsumvar = new Dictionary<string, double>(), wordAcounts = new Dictionary<string, double>(), wordDsumvar = new Dictionary<string, double>(), wordDcounts = new Dictionary<string, double>(); uint jj = 0; foreach (List<string> words in texts) { jj++; if (jj % 1000 == 0) Console.WriteLine("#" + jj); double textVnumer = 0, textVdenom = 0, textAnumer = 0, textAdenom = 0, textDnumer = 0, textDdenom = 0; double textVsumvar = 0, textVcount = 0, textAsumvar = 0, textAcount = 0, textDsumvar = 0, textDcount = 0; foreach (string word in words) { if (word.StartsWith(" ") || word.Length <= 2) continue; ThreeTuple<ContinuousDistribution, ContinuousDistribution, ContinuousDistribution> vad; if (!TryGetWordOrStem(combo, word, out vad)) continue; textVnumer += vad.one.Mean / vad.one.Variance; textVdenom += 1 / vad.one.Variance; textVsumvar += vad.one.Variance; textVcount++; textAnumer += vad.two.Mean / vad.two.Variance; textAdenom += 1 / vad.two.Variance; textAsumvar += vad.two.Variance; textAcount++; textDnumer += vad.three.Mean / vad.three.Variance; textDdenom += 1 / vad.three.Variance; textDsumvar += vad.three.Variance; textDcount++; } double vmean = textVnumer / textVdenom, amean = textAnumer / textAdenom, dmean = textDnumer / textDdenom; double vvar = textVsumvar / textVcount, avar = textAsumvar / textAcount, dvar = textDsumvar / textDcount; if (double.IsNaN(vmean) || double.IsNaN(amean) || double.IsNaN(dmean)) continue; foreach (string word in words) { if (word.StartsWith(" ") || word.Length <= 2) continue; ThreeTuple<ContinuousDistribution, ContinuousDistribution, ContinuousDistribution> vad; if (TryGetWordOrStem(source, word, out vad)) continue; string stem = stemmer.stemTerm(word); AddTextNumerDenom(stem, wordVnumers, wordVdenoms, wordVsumvar, wordVcounts, vmean, vvar); AddTextNumerDenom(stem, wordAnumers, wordAdenoms, wordAsumvar, wordAcounts, amean, avar); AddTextNumerDenom(stem, wordDnumers, wordDdenoms, wordDsumvar, wordDcounts, dmean, dvar); } } foreach (string stem in wordVnumers.Keys) { ContinuousDistribution valence = new ClippedGaussianDistribution(wordVnumers[stem] / wordVdenoms[stem], wordVsumvar[stem] / wordVcounts[stem], 0, 1); ContinuousDistribution arousal = new ClippedGaussianDistribution(wordAnumers[stem] / wordAdenoms[stem], wordAsumvar[stem] / wordAcounts[stem], 0, 1); ContinuousDistribution dominance = new ClippedGaussianDistribution(wordDnumers[stem] / wordDdenoms[stem], wordDsumvar[stem] / wordDcounts[stem], 0, 1); inputed[stem] = new ThreeTuple<ContinuousDistribution, ContinuousDistribution, ContinuousDistribution>(valence, arousal, dominance); } } source = combo; return inputed; } public void AddTextNumerDenom(string stem, Dictionary<string, double> wordXnumers, Dictionary<string, double> wordXdenoms, Dictionary<string, double> wordXsumvar, Dictionary<string, double> wordXcounts, double xmean, double xvar) { double numer, denom, sumvar, count; if (!wordXnumers.TryGetValue(stem, out numer)) { numer = 0; denom = 0; sumvar = 0; count = 0; } else { wordXdenoms.TryGetValue(stem, out denom); wordXsumvar.TryGetValue(stem, out sumvar); wordXcounts.TryGetValue(stem, out count); xvar += (xmean - numer / denom) * (xmean - numer / denom); } numer += xmean / xvar; denom += 1 / xvar; sumvar += xvar; count++; wordXnumers[stem] = numer; wordXdenoms[stem] = denom; wordXsumvar[stem] = sumvar; wordXcounts[stem] = count; }*/ public IDataSource<string, ThreeTuple<ContinuousDistribution, ContinuousDistribution, ContinuousDistribution>> ImputeEmotionalContent(List<List<string>> texts, uint repeats, string imputesave) { MemorySource<string, ThreeTuple<ContinuousDistribution, ContinuousDistribution, ContinuousDistribution>> imputed = new MemorySource<string, ThreeTuple<ContinuousDistribution, ContinuousDistribution, ContinuousDistribution>>(); ComboSource<string, ThreeTuple<ContinuousDistribution, ContinuousDistribution, ContinuousDistribution>> combo = new ComboSource<string, ThreeTuple<ContinuousDistribution, ContinuousDistribution, ContinuousDistribution>>(source, imputed); for (uint ii = 0; ii < repeats; ii++) { Dictionary<string, List<KeyValuePair<double, double>>> sentencesV = new Dictionary<string, List<KeyValuePair<double, double>>>(), sentencesA = new Dictionary<string, List<KeyValuePair<double, double>>>(), sentencesD = new Dictionary<string, List<KeyValuePair<double, double>>>(); uint jj = 0; foreach (List<string> words in texts) { jj++; if (jj % 1000 == 0) Console.WriteLine("#" + jj); AnalyzeWords(words, combo, sentencesV, sentencesA, sentencesD); } AnalyzeSentences(imputed, sentencesV, sentencesA, sentencesD, imputesave); } source = combo; return imputed; }
public IDataSource<string, ThreeTuple<ContinuousDistribution, ContinuousDistribution, ContinuousDistribution>> ImputeEmotionalContentFromFile(string filename, uint column, uint repeats, string imputesave) { MemorySource<string, ThreeTuple<ContinuousDistribution, ContinuousDistribution, ContinuousDistribution>> imputed = new MemorySource<string, ThreeTuple<ContinuousDistribution, ContinuousDistribution, ContinuousDistribution>>(); ComboSource<string, ThreeTuple<ContinuousDistribution, ContinuousDistribution, ContinuousDistribution>> combo = new ComboSource<string, ThreeTuple<ContinuousDistribution, ContinuousDistribution, ContinuousDistribution>>(source, imputed); // Check for existing imputed file DataReader imputereader = new DataReader(imputesave); uint kk = 0; for (string[] row = imputereader.ReadRow(); row != null; row = imputereader.ReadRow()) { kk++; if (kk % 1000 == 0) Console.WriteLine("#" + kk); double meanv = double.Parse(row[1]), varv = double.Parse(row[2]), meana = double.Parse(row[3]), vara = double.Parse(row[4]), meand = double.Parse(row[5]), vard = double.Parse(row[6]); ContinuousDistribution valence = new ClippedGaussianDistribution(meanv, varv, 0, 1); ContinuousDistribution arousal = new ClippedGaussianDistribution(meana, vara, 0, 1); ContinuousDistribution dominance = new ClippedGaussianDistribution(meand, vard, 0, 1); imputed[row[0]] = new ThreeTuple<ContinuousDistribution, ContinuousDistribution, ContinuousDistribution>(valence, arousal, dominance); } imputereader.Close(); for (uint ii = 0; ii < repeats; ii++) { Dictionary<string, List<KeyValuePair<double, double>>> sentencesV = new Dictionary<string, List<KeyValuePair<double, double>>>(), sentencesA = new Dictionary<string, List<KeyValuePair<double, double>>>(), sentencesD = new Dictionary<string, List<KeyValuePair<double, double>>>(); DataReader reader = new DataReader(filename); uint jj = 0; for (string[] row = reader.ReadRow(); row != null; row = reader.ReadRow()) { jj++; if (jj % 1000 == 0) Console.WriteLine("#" + jj + ": " + sentencesV.Count + ", " + imputed.Count); List<string> words = TwitterUtilities.SplitWords(row[column].ToLower()); AnalyzeWords(words, combo, sentencesV, sentencesA, sentencesD); } reader.Close(); AnalyzeSentences(imputed, sentencesV, sentencesA, sentencesD, imputesave); } source = combo; return imputed; }
public void InitializeNouns(PluginEnvironment env, string basedir) { int key_len = 256; char[] tokenizer = new char[] { ' ' }; // Noun type source IDataSource<string, Nouns.NounType> ambigSource = new AlphabeticFileSet<Nouns.NounType>(basedir + "person_ambig.txt", tokenizer, key_len, Nouns.NounType.ProperEither); IDataSource<string, Nouns.NounType> femaleSource = new AlphabeticFileSet<Nouns.NounType>(basedir + "person_female.txt", tokenizer, key_len, Nouns.NounType.ProperFemale); IDataSource<string, Nouns.NounType> maleSource = new AlphabeticFileSet<Nouns.NounType>(basedir + "person_male.txt", tokenizer, key_len, Nouns.NounType.ProperMale); IDataSource<string, Nouns.NounType> citySource = new AlphabeticFileSet<Nouns.NounType>(basedir + "city.txt", tokenizer, key_len, Nouns.NounType.ProperCity); IDataSource<string, Nouns.NounType> countrySource = new AlphabeticFileSet<Nouns.NounType>(basedir + "country.txt", tokenizer, key_len, Nouns.NounType.ProperCountry); IDataSource<string, Nouns.NounType> regionSource = new AlphabeticFileSet<Nouns.NounType>(basedir + "region.txt", tokenizer, key_len, Nouns.NounType.ProperProvince); IDataSource<string, Nouns.NounType> countSource = new AlphabeticFileSet<Nouns.NounType>(basedir + "count_nouns.txt", tokenizer, key_len, Nouns.NounType.Count); IDataSource<string, Nouns.NounType> massSource = new AlphabeticFileSet<Nouns.NounType>(basedir + "mass_nouns.txt", tokenizer, key_len, Nouns.NounType.Mass); IDataSource<string, Nouns.NounType> theSource = new AlphabeticFileSet<Nouns.NounType>(basedir + "the_nouns.txt", tokenizer, key_len, Nouns.NounType.The); env.SetDataSource<string, Nouns.NounType>(Nouns.NounTypeSourceName, new ComboSource<string, Nouns.NounType>(new ComboSource<string, Nouns.NounType>( new ComboSource<string, Nouns.NounType>(new ComboSource<string, Nouns.NounType>(ambigSource, femaleSource), maleSource), new ComboSource<string, Nouns.NounType>(new ComboSource<string, Nouns.NounType>(citySource, countrySource), regionSource)), new ComboSource<string, Nouns.NounType>(new ComboSource<string, Nouns.NounType>(countSource, massSource), theSource))); IDataSource<string, Nouns.Gender> feminineSource = new AlphabeticFileSet<Nouns.Gender>(basedir + "nouns_female.txt", tokenizer, key_len, Nouns.Gender.Female); IDataSource<string, Nouns.Gender> masculineSource = new AlphabeticFileSet<Nouns.Gender>(basedir + "nouns_male.txt", tokenizer, key_len, Nouns.Gender.Male); IDataSource<string, Nouns.Gender> eitherSource = new AlphabeticFileSet<Nouns.Gender>(basedir + "nouns_malefem.txt", tokenizer, key_len, Nouns.Gender.Either); env.SetDataSource<string, Nouns.Gender>(Nouns.GenderSourceName, new ComboSource<string, Nouns.Gender>(new ComboSource<string, Nouns.Gender>(feminineSource, masculineSource), eitherSource)); MemorySource<string, string> toSingular = new MemorySource<string, string>(); MemorySource<string, string> toPlural = new MemorySource<string, string>(); ReadNounNumber(basedir + "nouns_number.txt", toSingular, toPlural); env.SetDataSource<string, Nouns.Number>(Nouns.NumberSourceName, new ComboSource<string, Nouns.Number>(new MapDataSource<string, string, Nouns.Number>(toSingular, ToShared, Nouns.Number.Plural), new MapDataSource<string, string, Nouns.Number>(toPlural, ToShared, Nouns.Number.Singular))); env.AddAction(new ChangeNounHandler(Nouns.Number.Singular, toSingular)); env.AddAction(new ChangeNounHandler(Nouns.Number.Plural, toPlural)); }
public void AnalyzeSentences(MemorySource<string, ThreeTuple<ContinuousDistribution, ContinuousDistribution, ContinuousDistribution>> inputed, Dictionary<string, List<KeyValuePair<double, double>>> sentencesV, Dictionary<string, List<KeyValuePair<double, double>>> sentencesA, Dictionary<string, List<KeyValuePair<double, double>>> sentencesD, string imputesave) { using (var stream = File.CreateText(imputesave)) { foreach (string stem in sentencesV.Keys) { double vmean = WeightedStatistics.Mean(sentencesV[stem]), amean = WeightedStatistics.Mean(sentencesA[stem]), dmean = WeightedStatistics.Mean(sentencesD[stem]); ClippedGaussianDistribution valence = new ClippedGaussianDistribution(vmean, WeightedStatistics.Variance(sentencesV[stem], vmean, true), 0, 1); ClippedGaussianDistribution arousal = new ClippedGaussianDistribution(amean, WeightedStatistics.Variance(sentencesA[stem], amean, true), 0, 1); ClippedGaussianDistribution dominance = new ClippedGaussianDistribution(dmean, WeightedStatistics.Variance(sentencesD[stem], dmean, true), 0, 1); inputed[stem] = new ThreeTuple<ContinuousDistribution, ContinuousDistribution, ContinuousDistribution>(valence, arousal, dominance); stream.WriteLine(stem + "," + valence.InternalMean + "," + valence.InternalVariance + "," + arousal.InternalMean + "," + arousal.InternalVariance + "," + dominance.InternalMean + "," + dominance.InternalVariance); } } }