private static double GetChi2Stat(Dictionary <FType, StatItem> col1Stats, Dictionary <FType, StatItem> col2Stats, Dictionary <TupleData, StatItem> commonStats, int rowscount) { double chi2 = 0; foreach (var k1 in col1Stats.Keys) { foreach (var k2 in col2Stats.Keys) { // составляем пару var t = new TupleData(new List <object> { k1, k2 }); // количество пар (может быть нуль) int pn = 0; if (commonStats.ContainsKey(t)) { pn = commonStats[t].Count; } // модифицированные признаки double p1 = col1Stats[k1].ItemProb; // вероятность первого double p2 = col2Stats[k2].ItemProb; // вероятность второго // статиситка хи-квадрат double chidiff = (pn - rowscount * p1 * p2); chi2 += (chidiff * chidiff) / (rowscount * p1 * p2); } } return(chi2); }
private string GetSanitizedDataType(TupleData tuple) { string dataType = (tuple["DATA_TYPE"] as string)?.Trim(); if (dataType != null) { dataType = Regex.Replace(dataType, @"\(\d+\)", ""); } return(dataType); }
private async Task AddPrimaryKeysAsync(ModelBuilder builder, DbCommand command) { foreach (TupleData tuple in await TupleData.FromDbCommandAsync(command)) { string tableName = tuple["TABLE_NAME"] as string; string columnName = tuple["COLUMN_NAME"] as string; string keyName = $"PK_{tableName}"; int keyIndex = int.Parse(tuple["ORDINAL_POSITION"]?.ToString()); builder.AddKey(null, tableName, columnName, keyName, keyIndex); } }
private async Task AddPrimaryKeysAsync(ModelBuilder builder, DbCommand command) { foreach (TupleData tuple in await TupleData.FromDbCommandAsync(command)) { string tableSchema = tuple["table_schema"] as string; string tableName = tuple["table_name"] as string; string columnName = tuple["column_name"] as string; string keyName = tuple["constraint_name"] as string; int keyIndex = int.Parse(tuple["ordinal_position"]?.ToString()); builder.AddKey(tableSchema, tableName, columnName, keyName, keyIndex); } }
public async Task AddForeignKeysAsync(ModelBuilder builder, DbCommand command) { foreach (TupleData tuple in await TupleData.FromDbCommandAsync(command)) { string tableName = tuple["tbl_name"] as string; string columnName = tuple["from"] as string; string uniqueName = $"pk_{tuple["table"]}"; string foreignName = $"fk_{tableName}_{tuple["table"]}_{tuple["id"]}"; int keyIndex = (int)(long)tuple["seq"] + 1; builder.AddReference(null, tableName, columnName, foreignName, uniqueName, keyIndex); } }
private async Task AddTablesAndColumnsAsync(ModelBuilder builder, DbCommand command) { foreach (TupleData tuple in await TupleData.FromDbCommandAsync(command)) { string tableName = tuple["TABLE_NAME"] as string; string columnName = tuple["COLUMN_NAME"] as string; string typeName = tuple["DATA_TYPE"] as string; bool isNullable = (tuple["IS_NULLABLE"] as string == "YES"); bool isIdentity = ((string)tuple["EXTRA"]).Contains("auto_increment"); builder.AddColumn(null, tableName, columnName, typeName, isNullable: isNullable, isIdentity: isIdentity); } }
public async Task AddForeignKeysAsync(ModelBuilder builder, DbCommand command) { foreach (TupleData tuple in await TupleData.FromDbCommandAsync(command)) { string tableName = tuple["TABLE_NAME"] as string; string columnName = tuple["COLUMN_NAME"] as string; string uniqueName = $"PK_{tuple["REFERENCED_TABLE_NAME"]}"; string foreignName = tuple["CONSTRAINT_NAME"] as string; int keyIndex = int.Parse(tuple["ORDINAL_POSITION"]?.ToString()); builder.AddReference(null, tableName, columnName, foreignName, uniqueName, keyIndex); } }
private static TupleData CreateValueTuple(string[] cval, DataRow <double> row) { var vals = new List <object>(2); for (int i = 0; i < cval.Length; i++) { int cidx = _loader.RowIdxByColumn[cval[i]]; double dval = row.Coeffs[cidx]; vals.Add(dval); } var vtuple = new TupleData(vals); return(vtuple); }
public async Task AddForeignKeysAsync(ModelBuilder builder, DbCommand command) { foreach (TupleData tuple in await TupleData.FromDbCommandAsync(command)) { string tableSchema = tuple["table_schema"] as string; string tableName = tuple["table_name"] as string; string columnName = tuple["column_name"] as string; string uniqueName = tuple["unique_constraint_name"] as string; string foreignName = tuple["constraint_name"] as string; int keyIndex = int.Parse(tuple["ordinal_position"]?.ToString()); builder.AddReference(tableSchema, tableName, columnName, foreignName, uniqueName, keyIndex); } }
public async Task AddForeignKeysAsync(ModelBuilder builder, DbCommand command) { foreach (TupleData tuple in await TupleData.FromDbCommandAsync(command)) { string tableSchema = null; string tableName = tuple["TABLE_NAME"] as string; string columnName = tuple["COLUMN_NAME"] as string; string uniqueName = tuple["UNIQUE_CONSTRAINT_NAME"] as string; string foreignName = tuple["CONSTRAINT_NAME"] as string; int keyIndex = int.Parse(tuple["POSITION"]?.ToString()); builder.AddReference(tableSchema, tableName, columnName, foreignName, uniqueName, keyIndex); } }
private async Task AddTablesAndColumnAsync(ModelBuilder builder, DbCommand command) { foreach (TupleData tuple in await TupleData.FromDbCommandAsync(command)) { string tableSchema = null; string tableName = (tuple["TABLE_NAME"] as string).Trim(); string columnName = tuple["COLUMN_NAME"] as string; string typeName = this.GetSanitizedDataType(tuple); bool isNullable = (tuple["NULLABLE"] as string == "Y"); bool isIdentity = (tuple["IDENTITY_COLUMN"] as string == "YES"); bool ignoreTable = false; builder.AddColumn(tableSchema, tableName, columnName, typeName, isNullable: isNullable, isIdentity: isIdentity, ignoreTable: ignoreTable); } }
private async Task AddTablesAndColumnsAsync(ModelBuilder builder, DbCommand command) { foreach (TupleData tuple in await TupleData.FromDbCommandAsync(command)) { string tableSchema = tuple["TABLE_SCHEMA"] as string; string tableName = tuple["TABLE_NAME"] as string; string columnName = tuple["COLUMN_NAME"] as string; string typeName = tuple["DATA_TYPE"] as string; bool isNullable = (tuple["IS_NULLABLE"] as string == "YES"); bool isIdentity = ((int?)tuple["IS_IDENTITY"] == 1); bool ignoreTable = this.IsIgnoredTable(tableSchema, tableName); builder.AddColumn(tableSchema, tableName, columnName, typeName, isNullable: isNullable, isIdentity: isIdentity, ignoreTable: ignoreTable); } }
private async Task AddTablesAndColumnAsync(ModelBuilder builder, DbCommand command) { foreach (TupleData tuple in await TupleData.FromDbCommandAsync(command)) { string tableSchema = tuple["table_schema"] as string; string tableName = tuple["table_name"] as string; string columnName = tuple["column_name"] as string; string typeName = tuple["data_type"] as string; bool isNullable = (tuple["is_nullable"] as string == "YES"); bool isIdentity = (tuple["is_identity"] as string == "YES" || tuple["serial_seq"] != null); bool ignoreTable = false; builder.AddColumn(tableSchema, tableName, columnName, typeName, isNullable: isNullable, isIdentity: isIdentity, ignoreTable: ignoreTable); } }
private string GetNormalizedTypeName(TupleData tuple) { if (tuple["type"] is string typeName) { int sizeIndex = typeName.IndexOf('('); if (sizeIndex == -1) { return(typeName); } return(typeName.Remove(sizeIndex)); } return(null); }
private static string CreateHeader(string[] cols, int n) { var sb = new StringBuilder(); var iter = new CombinationIterator(cols, n); sb.Append(_loader.IdName); while (iter.MoveNext()) { var ftuple = new TupleData(iter.Current); sb.Append(";" + ftuple); } sb.Append(";" + _loader.TargetName); return(sb.ToString()); }
private async Task AddTablesAndColumnsAsync(ModelBuilder builder, DbCommand command) { foreach (TupleData tuple in await TupleData.FromDbCommandAsync(command)) { string sqlDef = tuple["sql"] as string; string tableName = tuple["tbl_name"] as string; string columnName = tuple["name"] as string; string typeName = this.GetNormalizedTypeName(tuple); int keyIndex = (int)(long)tuple["pk"]; bool isAutoIncrement = keyIndex > 0 && ((long)tuple["autoincr"] > 0 || this.HasAutoIncrementInSqlDefinition(columnName, sqlDef)); bool isNullable = (keyIndex == 0 && (long)tuple["notnull"] == 0); bool ignoreTable = this.IsIgnoredTable(tableName); builder.AddColumn(null, tableName, columnName, typeName, isNullable, isIdentity: isAutoIncrement, ignoreTable: ignoreTable); if (keyIndex > 0) { builder.AddKey(null, tableName, columnName, "pk_" + tableName, (int)keyIndex); } } }
static void Main(string[] args) { if (args.Length <= 1 || args.Length >= 5) { Logger.Log("usage: program.exe <datafile.csv> <full/short> [target_name [factor=1.0]]"); return; } string filename = args[0]; string stype = args[1].ToLower(); string targetname = args.Length >= 3 ? args[2] : null; // множетель преобразования для категорирования признаков double factor = double.Parse(args.Length >= 4 ? args[3].Replace(',', '.') : "1", CultureInfo.InvariantCulture); if (stype != "full" && stype != "short") { Logger.Log("type can be only 'full' or 'short'"); return; } if (stype == "short" && targetname == null) { Logger.Log("you must specify target_name in sort mode"); return; } Logger.Log("datafile = " + filename); Logger.Log("type = " + stype); Logger.Log("target_name = " + targetname); Logger.Log("factor = " + factor.ToString("F04")); if (!File.Exists(filename)) { Logger.Log("file " + filename + " not found"); return; } // загружаем данные var loader = targetname != null?(new DataLoader <FType>(targetname)) : new DataLoader <FType>(); //loader.MaxRowsLoaded = 10000; if (targetname != null) { loader.RemoveSkipColumn(targetname); } loader.Load(filename); var cols = loader.FileIdxByColumn.Keys.ToArray(); // выходной файл string statname = filename + "_stats.csv"; // если часть данных уже просчитана, смотрим какая, чтобы повторно не считать var counted = LoadCountedData(statname); // просчитанная статистика по признакам var factorStatDict = new Dictionary <string, FactorStat <FType> >(); // начинаем просчет using (var sw = new StreamWriter(new FileStream(statname, counted.Count > 0 ? FileMode.Append : FileMode.Create, FileAccess.Write), Encoding.UTF8)) { if (counted.Count == 0) { sw.WriteLine("Factor1;Factor2;src_cnt1;src_cnt2;mod_cnt1;mod_cnt2;src_chi2;src_chi2max;src_chi2coeff;mod_chi2;mod_chi2max;mod_chi2coeff;corr;corrabs;inf_val"); } for (int i = 0; i < cols.Length - 1; i++) { for (int j = i + 1; j < cols.Length; j++) { var col1 = cols[i]; // первый признак var col2 = cols[j]; // второй признак if (stype == "short") { if (targetname != null) { if (col1 != loader.TargetName && col2 != loader.TargetName) { continue; } } } if (counted.ContainsKey(col1) && counted[col1].ContainsKey(col2)) { continue; } int col1idx = loader.RowIdxByColumn[col1]; int col2idx = loader.RowIdxByColumn[col2]; // просчитаны ли уже статиситки bool stat1Exist = factorStatDict.ContainsKey(col1); bool stat2Exist = factorStatDict.ContainsKey(col2); // объекты статистик по признакам var col1Stats = stat1Exist ? factorStatDict[col1].ModifiedStat : new Dictionary <FType, StatItem>(); var col2Stats = stat2Exist ? factorStatDict[col2].ModifiedStat : new Dictionary <FType, StatItem>(); var scol1Stats = stat1Exist ? factorStatDict[col1].SourceStat : new Dictionary <FType, StatItem>(); var scol2Stats = stat2Exist ? factorStatDict[col2].SourceStat : new Dictionary <FType, StatItem>(); var f1stat = stat1Exist ? factorStatDict[col1] : new FactorStat <FType>(); var f2stat = stat2Exist ? factorStatDict[col2] : new FactorStat <FType>(); // статистики по парам признаков var commonStats = new Dictionary <TupleData, StatItem>(); // модифицированным var scommonStats = new Dictionary <TupleData, StatItem>(); // исходным // находим среднее, дисперсию и корреляцию по признакам var colStats = PairStat <FType> .GetPairStat(loader, col1, col2); int rowscount = loader.TotalDataLines; // всего строк int allTargets = 0; // всего целевых строк // собираем общую статистику по всем строкам foreach (var row in loader.Rows) { // исходные признаки FType fval1 = row.Values[col1idx]; FType fval2 = row.Values[col2idx]; // модифицированные признаки FType val1 = (long)(Math.Round((fval1 - colStats.F1Avg) / colStats.F1Stddev * factor)); FType val2 = (long)(Math.Round((fval2 - colStats.F2Avg) / colStats.F2Stddev * factor)); if (!stat1Exist) // восможно уже просчитана { if (!col1Stats.ContainsKey(val1)) { col1Stats.Add(val1, new StatItem()); } var stat1 = col1Stats[val1]; stat1.Count++; // статистика встречаемости значений первого признака (модифицированного) stat1.Targets += row.Target > 0 ? 1 : 0; if (!scol1Stats.ContainsKey(fval1)) { scol1Stats.Add(fval1, new StatItem()); } var sstat1 = scol1Stats[fval1]; sstat1.Count++; // статистика встречаемости значений первого признака (исходного) sstat1.Targets += row.Target > 0 ? 1 : 0; } if (!stat2Exist) // восможно уже просчитана { if (!col2Stats.ContainsKey(val2)) { col2Stats.Add(val2, new StatItem()); } var stat2 = col2Stats[val2]; stat2.Count++; // статистика встречаемости значений первого признака (модифицированного) stat2.Targets += row.Target > 0 ? 1 : 0; if (!scol2Stats.ContainsKey(fval2)) { scol2Stats.Add(fval2, new StatItem()); } var sstat2 = scol2Stats[fval2]; sstat2.Count++; // статистика встречаемости значений первого признака (исходного) sstat2.Targets += row.Target > 0 ? 1 : 0; } allTargets += row.Target > 0 ? 1 : 0; // статистики астречаемости пар признаков (модифицированные) var tuple = new TupleData(new List <object> { val1, val2 }); if (!commonStats.ContainsKey(tuple)) { commonStats.Add(tuple, new StatItem()); } var stat = commonStats[tuple]; stat.Count++; // пары признаков // статистики астречаемости пар признаков (исходные) var stuple = new TupleData(new List <object> { fval1, fval2 }); if (!scommonStats.ContainsKey(stuple)) { scommonStats.Add(stuple, new StatItem()); } var fstat = scommonStats[stuple]; fstat.Count++; // пары признаков } // сохраняем расчитанные признаки if (!stat1Exist) { f1stat.ModifiedStat = col1Stats; f1stat.SourceStat = scol1Stats; f1stat.ModifiedCount = col1Stats.Count; f1stat.SourceCount = scol1Stats.Count; } if (!stat2Exist) { f2stat.ModifiedStat = col2Stats; f2stat.SourceStat = scol2Stats; f2stat.ModifiedCount = col2Stats.Count; f2stat.SourceCount = scol2Stats.Count; } // далее идет расчет вероятностей встречи признаков if (!stat1Exist) { foreach (var v in col1Stats.Values) { // вероятность встретить значение первого признака v.ItemProb = v.Count / (FType)rowscount; } foreach (var v in scol1Stats.Values) { // вероятность встретить значение первого признака v.ItemProb = v.Count / (FType)rowscount; } } if (!stat2Exist) { foreach (var v in col2Stats.Values) { // вероятность встретить значение второго признака v.ItemProb = v.Count / (FType)rowscount; } foreach (var v in scol2Stats.Values) { // вероятность встретить значение второго признака v.ItemProb = v.Count / (FType)rowscount; } } foreach (var v in commonStats.Values) { // вероятность встретить пару v.ItemProb = v.Count / (FType)rowscount; } foreach (var v in scommonStats.Values) { // вероятность встретить пару v.ItemProb = v.Count / (FType)rowscount; } double chi2 = 0; // хи-квадрат по модифицированным признакам double schi2 = 0; // хи-квадрат по исхдным признакам // высчитываем статистики по модифицированным признакам chi2 = GetChi2Stat(col1Stats, col2Stats, commonStats, rowscount); // высчитываем статистики по исходным признакам schi2 = GetChi2Stat(scol1Stats, scol2Stats, scommonStats, rowscount); int cnt = (f1stat.ModifiedCount - 1) * (f2stat.ModifiedCount - 1); int scnt = (f1stat.SourceCount - 1) * (f2stat.SourceCount - 1); double chi2max = Util.InvChi2CDF(cnt, 0.95); double schi2max = Util.InvChi2CDF(scnt, 0.95); double chifactor = chi2 / chi2max; double schifactor = schi2 / schi2max; // information value double iv = 0; if (col1 == loader.TargetName || col2 == loader.TargetName) { if (col1 == loader.TargetName) { iv = GetInvormationValue(f2stat, allTargets, rowscount); } else { iv = GetInvormationValue(f1stat, allTargets, rowscount); } } sw.WriteLine("{0};{1};{2};{3};{4};{5};{6};{7};{8};{9};{10};{11};{12};{13};{14}", col1, col2, f1stat.SourceCount, f2stat.SourceCount, f1stat.ModifiedCount, f2stat.ModifiedCount, schi2.ToString("F09", CultureInfo.InvariantCulture), schi2max, schifactor, chi2.ToString("F09", CultureInfo.InvariantCulture), chi2max, chifactor, colStats.Correlation.ToString(), Math.Abs(Convert.ToDecimal(colStats.Correlation)).ToString(), iv.ToString("F09", CultureInfo.InvariantCulture) ); sw.Flush(); Logger.Log(col1 + "," + col2); } } sw.Close(); } }
async IAsyncEnumerator <PgOutputReplicationMessage> StartReplicationInternal(CancellationToken cancellationToken) { var stream = _connection.StartLogicalReplication( _slot, cancellationToken, _walLocation, _options.GetOptionPairs(), bypassingStream: true); var buf = _connection.Connector !.ReadBuffer; await foreach (var xLogData in stream.WithCancellation(cancellationToken)) { await buf.EnsureAsync(1); var messageCode = (BackendReplicationMessageCode)buf.ReadByte(); switch (messageCode) { case BackendReplicationMessageCode.Begin: { await buf.EnsureAsync(20); yield return(_beginMessage.Populate( xLogData.WalStart, xLogData.WalEnd, xLogData.ServerClock, new NpgsqlLogSequenceNumber(buf.ReadUInt64()), TimestampHandler.FromPostgresTimestamp(buf.ReadInt64()), buf.ReadUInt32() )); continue; } case BackendReplicationMessageCode.Commit: { await buf.EnsureAsync(25); yield return(_commitMessage.Populate( xLogData.WalStart, xLogData.WalEnd, xLogData.ServerClock, buf.ReadByte(), new NpgsqlLogSequenceNumber(buf.ReadUInt64()), new NpgsqlLogSequenceNumber(buf.ReadUInt64()), TimestampHandler.FromPostgresTimestamp(buf.ReadInt64()) )); continue; } case BackendReplicationMessageCode.Origin: { await buf.EnsureAsync(9); yield return(_originMessage.Populate( xLogData.WalStart, xLogData.WalEnd, xLogData.ServerClock, new NpgsqlLogSequenceNumber(buf.ReadUInt64()), await buf.ReadNullTerminatedString(async: true, cancellationToken))); continue; } case BackendReplicationMessageCode.Relation: { await buf.EnsureAsync(6); var relationId = buf.ReadUInt32(); var ns = await buf.ReadNullTerminatedString(async : true, cancellationToken); var relationName = await buf.ReadNullTerminatedString(async : true, cancellationToken); await buf.EnsureAsync(3); var relationReplicaIdentitySetting = (char)buf.ReadByte(); var numColumns = buf.ReadUInt16(); if (numColumns > _relationalMessageColumns.Length) { _relationalMessageColumns = new RelationMessage.Column[numColumns]; } for (var i = 0; i < numColumns; i++) { await buf.EnsureAsync(2); var flags = buf.ReadByte(); var columnName = await buf.ReadNullTerminatedString(async : true, cancellationToken); await buf.EnsureAsync(8); var dateTypeId = buf.ReadUInt32(); var typeModifier = buf.ReadInt32(); _relationalMessageColumns[i] = new RelationMessage.Column(flags, columnName, dateTypeId, typeModifier); } yield return(_relationMessage.Populate( xLogData.WalStart, xLogData.WalEnd, xLogData.ServerClock, relationId, ns, relationName, relationReplicaIdentitySetting, new ReadOnlyMemory <RelationMessage.Column>(_relationalMessageColumns, 0, numColumns) )); continue; } case BackendReplicationMessageCode.Type: { await buf.EnsureAsync(5); var typeId = buf.ReadUInt32(); var ns = await buf.ReadNullTerminatedString(async : true, cancellationToken); var name = await buf.ReadNullTerminatedString(async : true, cancellationToken); yield return(_typeMessage.Populate(xLogData.WalStart, xLogData.WalEnd, xLogData.ServerClock, typeId, ns, name)); continue; } case BackendReplicationMessageCode.Insert: { await buf.EnsureAsync(7); var relationId = buf.ReadUInt32(); var tupleDataType = (TupleType)buf.ReadByte(); Debug.Assert(tupleDataType == TupleType.NewTuple); var numColumns = buf.ReadUInt16(); var newRow = await ReadTupleDataAsync(ref _tupleDataArray1, numColumns); yield return(_insertMessage.Populate(xLogData.WalStart, xLogData.WalEnd, xLogData.ServerClock, relationId, newRow)); continue; } case BackendReplicationMessageCode.Update: { await buf.EnsureAsync(7); var relationId = buf.ReadUInt32(); var tupleType = (TupleType)buf.ReadByte(); var numColumns = buf.ReadUInt16(); switch (tupleType) { case TupleType.Key: var keyRow = await ReadTupleDataAsync(ref _tupleDataArray1, numColumns); await buf.EnsureAsync(3); tupleType = (TupleType)buf.ReadByte(); Debug.Assert(tupleType == TupleType.NewTuple); numColumns = buf.ReadUInt16(); var newRow = await ReadTupleDataAsync(ref _tupleDataArray2, numColumns); yield return(_indexUpdateMessage.Populate(xLogData.WalStart, xLogData.WalEnd, xLogData.ServerClock, relationId, newRow, keyRow)); continue; case TupleType.OldTuple: var oldRow = await ReadTupleDataAsync(ref _tupleDataArray1, numColumns); await buf.EnsureAsync(3); tupleType = (TupleType)buf.ReadByte(); Debug.Assert(tupleType == TupleType.NewTuple); numColumns = buf.ReadUInt16(); newRow = await ReadTupleDataAsync(ref _tupleDataArray2, numColumns); yield return(_fullUpdateMessage.Populate(xLogData.WalStart, xLogData.WalEnd, xLogData.ServerClock, relationId, newRow, oldRow)); continue; case TupleType.NewTuple: newRow = await ReadTupleDataAsync(ref _tupleDataArray1, numColumns); yield return(_updateMessage.Populate(xLogData.WalStart, xLogData.WalEnd, xLogData.ServerClock, relationId, newRow)); continue; default: throw new NotSupportedException($"The tuple type '{tupleType}' is not supported."); } } case BackendReplicationMessageCode.Delete: { await buf.EnsureAsync(7); var relationId = buf.ReadUInt32(); var tupleDataType = (TupleType)buf.ReadByte(); var numColumns = buf.ReadUInt16(); switch (tupleDataType) { case TupleType.Key: yield return(_keyDeleteMessage.Populate(xLogData.WalStart, xLogData.WalEnd, xLogData.ServerClock, relationId, await ReadTupleDataAsync(ref _tupleDataArray1, numColumns))); continue; case TupleType.OldTuple: yield return(_fullDeleteMessage.Populate(xLogData.WalStart, xLogData.WalEnd, xLogData.ServerClock, relationId, await ReadTupleDataAsync(ref _tupleDataArray1, numColumns))); continue; default: throw new NotSupportedException($"The tuple type '{tupleDataType}' is not supported."); } } case BackendReplicationMessageCode.Truncate: { await buf.EnsureAsync(9); // Don't dare to truncate more than 2147483647 tables at once! var numRels = checked ((int)buf.ReadUInt32()); var truncateOptions = (TruncateOptions)buf.ReadByte(); var relationIds = new uint[numRels]; await buf.EnsureAsync(checked (numRels * 4)); for (var i = 0; i < numRels; i++) { relationIds[i] = buf.ReadUInt32(); } yield return(_truncateMessage.Populate( xLogData.WalStart, xLogData.WalEnd, xLogData.ServerClock, truncateOptions, relationIds)); continue; } default: throw new NotSupportedException( $"Invalid message code {messageCode} in Logical Replication Protocol."); } } // We never get here - the above is an endless loop that terminates only with a cancellation exception ValueTask <ReadOnlyMemory <TupleData> > ReadTupleDataAsync(ref TupleData[] array, ushort numberOfColumns) { if (array.Length < numberOfColumns) { array = new TupleData[numberOfColumns]; } var nonRefArray = array; return(ReadTupleDataAsync2()); async ValueTask <ReadOnlyMemory <TupleData> > ReadTupleDataAsync2() { for (var i = 0; i < numberOfColumns; i++) { await buf.EnsureAsync(1); var subMessageKind = (TupleDataKind)buf.ReadByte(); switch (subMessageKind) { case TupleDataKind.Null: case TupleDataKind.UnchangedToastedValue: nonRefArray[i] = new TupleData(subMessageKind); continue; case TupleDataKind.TextValue: await buf.EnsureAsync(4); var len = buf.ReadInt32(); await buf.EnsureAsync(len); nonRefArray ![i] = new TupleData(buf.ReadString(len)); continue;
static void Main(string[] args) { if (args.Length < 4 || args.Length > 4) { Logger.Log("usage: program.exe <train.csv> <conf.csv> <id> <target_name>"); return; } string dataPath = args[0]; string confPath = args[1]; string id = args[2]; string target = args[3]; Logger.Log("data: " + dataPath); Logger.Log("conf : " + confPath); Logger.Log("id : " + id); Logger.Log("target : " + target); try { var fmgr = new FactorManager(); fmgr.Load(confPath, target); fmgr.TargDep = 10; fmgr.FactorDep = 100; fmgr.SelectFactors(); var cols = fmgr.VisibleFactors.ToArray(); //_loader.MaxRowsLoaded = 10000; _loader.AddTargetColumn(target); _loader.AddIdColumn(id); _loader.CollectDistrStat = true; _loader.Load(dataPath); var statDict = new Dictionary <TupleData, Dictionary <TupleData, StatItem> >(); // collecting stats int idx = 0; int n = 4; var iter = new CombinationIterator(cols, n); while (iter.MoveNext()) { idx++; var cval = iter.Current; var ftuple = new TupleData(cval); statDict.Add(ftuple, new Dictionary <TupleData, StatItem>()); foreach (var row in _loader.Rows) { var vtuple = CreateValueTuple(cval, row); if (!statDict[ftuple].ContainsKey(vtuple)) { statDict[ftuple].Add(vtuple, new StatItem()); } if (row.Target <= 1) { statDict[ftuple][vtuple].Count++; statDict[ftuple][vtuple].Targets += (int)row.Target; } } foreach (var t in statDict[ftuple].Keys) { statDict[ftuple][t].TargetProb = statDict[ftuple][t].Targets / (double)statDict[ftuple][t].Count; } Logger.Log(ftuple + " done;"); } // creating modified file using (var sw = new StreamWriter(new FileStream(dataPath + "_cat.csv", FileMode.Create, FileAccess.Write))) { idx = 0; sw.WriteLine(CreateHeader(cols, n)); sw.Flush(); double defProb = (double)_loader.TargetStat[1] / (_loader.TargetStat[1] + _loader.TargetStat[0]); foreach (var row in _loader.Rows) { idx++; var sb = new StringBuilder(); iter = new CombinationIterator(cols, n); sb.Append(row.Id); while (iter.MoveNext()) { var cval = iter.Current; var ftuple = new TupleData(cval); var t = CreateValueTuple(cval, row); double prob = statDict[ftuple].ContainsKey(t) ? statDict[ftuple][t].TargetProb : defProb; sb.Append(";" + prob.ToString("F05")); } sb.Append(";" + row.Target); sw.WriteLine(sb); if (idx % 12345 == 0) { Logger.Log(idx + " lines writed;"); sw.Flush(); } } Logger.Log(idx + " lines writed; done;"); } } catch (Exception e) { Logger.Log(e); } }