Пример #1
0
        private static double GetChi2Stat(Dictionary <FType, StatItem> col1Stats,
                                          Dictionary <FType, StatItem> col2Stats,
                                          Dictionary <TupleData, StatItem> commonStats,
                                          int rowscount)
        {
            double chi2 = 0;

            foreach (var k1 in col1Stats.Keys)
            {
                foreach (var k2 in col2Stats.Keys)
                {
                    // составляем пару
                    var t = new TupleData(new List <object> {
                        k1, k2
                    });
                    // количество пар (может быть нуль)
                    int pn = 0;
                    if (commonStats.ContainsKey(t))
                    {
                        pn = commonStats[t].Count;
                    }

                    // модифицированные признаки
                    double p1 = col1Stats[k1].ItemProb; // вероятность первого
                    double p2 = col2Stats[k2].ItemProb; // вероятность второго

                    // статиситка хи-квадрат
                    double chidiff = (pn - rowscount * p1 * p2);
                    chi2 += (chidiff * chidiff) / (rowscount * p1 * p2);
                }
            }

            return(chi2);
        }
Пример #2
0
        private string GetSanitizedDataType(TupleData tuple)
        {
            string dataType = (tuple["DATA_TYPE"] as string)?.Trim();

            if (dataType != null)
            {
                dataType = Regex.Replace(dataType, @"\(\d+\)", "");
            }

            return(dataType);
        }
Пример #3
0
        private async Task AddPrimaryKeysAsync(ModelBuilder builder, DbCommand command)
        {
            foreach (TupleData tuple in await TupleData.FromDbCommandAsync(command))
            {
                string tableName  = tuple["TABLE_NAME"] as string;
                string columnName = tuple["COLUMN_NAME"] as string;
                string keyName    = $"PK_{tableName}";
                int    keyIndex   = int.Parse(tuple["ORDINAL_POSITION"]?.ToString());

                builder.AddKey(null, tableName, columnName, keyName, keyIndex);
            }
        }
Пример #4
0
        private async Task AddPrimaryKeysAsync(ModelBuilder builder, DbCommand command)
        {
            foreach (TupleData tuple in await TupleData.FromDbCommandAsync(command))
            {
                string tableSchema = tuple["table_schema"] as string;
                string tableName   = tuple["table_name"] as string;
                string columnName  = tuple["column_name"] as string;
                string keyName     = tuple["constraint_name"] as string;
                int    keyIndex    = int.Parse(tuple["ordinal_position"]?.ToString());

                builder.AddKey(tableSchema, tableName, columnName, keyName, keyIndex);
            }
        }
Пример #5
0
        public async Task AddForeignKeysAsync(ModelBuilder builder, DbCommand command)
        {
            foreach (TupleData tuple in await TupleData.FromDbCommandAsync(command))
            {
                string tableName   = tuple["tbl_name"] as string;
                string columnName  = tuple["from"] as string;
                string uniqueName  = $"pk_{tuple["table"]}";
                string foreignName = $"fk_{tableName}_{tuple["table"]}_{tuple["id"]}";
                int    keyIndex    = (int)(long)tuple["seq"] + 1;

                builder.AddReference(null, tableName, columnName, foreignName, uniqueName, keyIndex);
            }
        }
Пример #6
0
        private async Task AddTablesAndColumnsAsync(ModelBuilder builder, DbCommand command)
        {
            foreach (TupleData tuple in await TupleData.FromDbCommandAsync(command))
            {
                string tableName  = tuple["TABLE_NAME"] as string;
                string columnName = tuple["COLUMN_NAME"] as string;
                string typeName   = tuple["DATA_TYPE"] as string;
                bool   isNullable = (tuple["IS_NULLABLE"] as string == "YES");
                bool   isIdentity = ((string)tuple["EXTRA"]).Contains("auto_increment");

                builder.AddColumn(null, tableName, columnName, typeName, isNullable: isNullable, isIdentity: isIdentity);
            }
        }
Пример #7
0
        public async Task AddForeignKeysAsync(ModelBuilder builder, DbCommand command)
        {
            foreach (TupleData tuple in await TupleData.FromDbCommandAsync(command))
            {
                string tableName   = tuple["TABLE_NAME"] as string;
                string columnName  = tuple["COLUMN_NAME"] as string;
                string uniqueName  = $"PK_{tuple["REFERENCED_TABLE_NAME"]}";
                string foreignName = tuple["CONSTRAINT_NAME"] as string;
                int    keyIndex    = int.Parse(tuple["ORDINAL_POSITION"]?.ToString());

                builder.AddReference(null, tableName, columnName, foreignName, uniqueName, keyIndex);
            }
        }
Пример #8
0
        private static TupleData CreateValueTuple(string[] cval, DataRow <double> row)
        {
            var vals = new List <object>(2);

            for (int i = 0; i < cval.Length; i++)
            {
                int    cidx = _loader.RowIdxByColumn[cval[i]];
                double dval = row.Coeffs[cidx];
                vals.Add(dval);
            }
            var vtuple = new TupleData(vals);

            return(vtuple);
        }
Пример #9
0
        public async Task AddForeignKeysAsync(ModelBuilder builder, DbCommand command)
        {
            foreach (TupleData tuple in await TupleData.FromDbCommandAsync(command))
            {
                string tableSchema = tuple["table_schema"] as string;
                string tableName   = tuple["table_name"] as string;
                string columnName  = tuple["column_name"] as string;
                string uniqueName  = tuple["unique_constraint_name"] as string;
                string foreignName = tuple["constraint_name"] as string;
                int    keyIndex    = int.Parse(tuple["ordinal_position"]?.ToString());

                builder.AddReference(tableSchema, tableName, columnName, foreignName, uniqueName, keyIndex);
            }
        }
Пример #10
0
        public async Task AddForeignKeysAsync(ModelBuilder builder, DbCommand command)
        {
            foreach (TupleData tuple in await TupleData.FromDbCommandAsync(command))
            {
                string tableSchema = null;
                string tableName   = tuple["TABLE_NAME"] as string;
                string columnName  = tuple["COLUMN_NAME"] as string;
                string uniqueName  = tuple["UNIQUE_CONSTRAINT_NAME"] as string;
                string foreignName = tuple["CONSTRAINT_NAME"] as string;
                int    keyIndex    = int.Parse(tuple["POSITION"]?.ToString());

                builder.AddReference(tableSchema, tableName, columnName, foreignName, uniqueName, keyIndex);
            }
        }
Пример #11
0
        private async Task AddTablesAndColumnAsync(ModelBuilder builder, DbCommand command)
        {
            foreach (TupleData tuple in await TupleData.FromDbCommandAsync(command))
            {
                string tableSchema = null;
                string tableName   = (tuple["TABLE_NAME"] as string).Trim();
                string columnName  = tuple["COLUMN_NAME"] as string;
                string typeName    = this.GetSanitizedDataType(tuple);
                bool   isNullable  = (tuple["NULLABLE"] as string == "Y");
                bool   isIdentity  = (tuple["IDENTITY_COLUMN"] as string == "YES");
                bool   ignoreTable = false;

                builder.AddColumn(tableSchema, tableName, columnName, typeName, isNullable: isNullable, isIdentity: isIdentity, ignoreTable: ignoreTable);
            }
        }
        private async Task AddTablesAndColumnsAsync(ModelBuilder builder, DbCommand command)
        {
            foreach (TupleData tuple in await TupleData.FromDbCommandAsync(command))
            {
                string tableSchema = tuple["TABLE_SCHEMA"] as string;
                string tableName   = tuple["TABLE_NAME"] as string;
                string columnName  = tuple["COLUMN_NAME"] as string;
                string typeName    = tuple["DATA_TYPE"] as string;
                bool   isNullable  = (tuple["IS_NULLABLE"] as string == "YES");
                bool   isIdentity  = ((int?)tuple["IS_IDENTITY"] == 1);
                bool   ignoreTable = this.IsIgnoredTable(tableSchema, tableName);

                builder.AddColumn(tableSchema, tableName, columnName, typeName, isNullable: isNullable, isIdentity: isIdentity, ignoreTable: ignoreTable);
            }
        }
Пример #13
0
        private async Task AddTablesAndColumnAsync(ModelBuilder builder, DbCommand command)
        {
            foreach (TupleData tuple in await TupleData.FromDbCommandAsync(command))
            {
                string tableSchema = tuple["table_schema"] as string;
                string tableName   = tuple["table_name"] as string;
                string columnName  = tuple["column_name"] as string;
                string typeName    = tuple["data_type"] as string;
                bool   isNullable  = (tuple["is_nullable"] as string == "YES");
                bool   isIdentity  = (tuple["is_identity"] as string == "YES" || tuple["serial_seq"] != null);
                bool   ignoreTable = false;

                builder.AddColumn(tableSchema, tableName, columnName, typeName, isNullable: isNullable, isIdentity: isIdentity, ignoreTable: ignoreTable);
            }
        }
Пример #14
0
        private string GetNormalizedTypeName(TupleData tuple)
        {
            if (tuple["type"] is string typeName)
            {
                int sizeIndex = typeName.IndexOf('(');

                if (sizeIndex == -1)
                {
                    return(typeName);
                }

                return(typeName.Remove(sizeIndex));
            }

            return(null);
        }
Пример #15
0
        private static string CreateHeader(string[] cols, int n)
        {
            var sb   = new StringBuilder();
            var iter = new CombinationIterator(cols, n);

            sb.Append(_loader.IdName);

            while (iter.MoveNext())
            {
                var ftuple = new TupleData(iter.Current);
                sb.Append(";" + ftuple);
            }

            sb.Append(";" + _loader.TargetName);
            return(sb.ToString());
        }
Пример #16
0
        private async Task AddTablesAndColumnsAsync(ModelBuilder builder, DbCommand command)
        {
            foreach (TupleData tuple in await TupleData.FromDbCommandAsync(command))
            {
                string sqlDef = tuple["sql"] as string;

                string tableName       = tuple["tbl_name"] as string;
                string columnName      = tuple["name"] as string;
                string typeName        = this.GetNormalizedTypeName(tuple);
                int    keyIndex        = (int)(long)tuple["pk"];
                bool   isAutoIncrement = keyIndex > 0 && ((long)tuple["autoincr"] > 0 || this.HasAutoIncrementInSqlDefinition(columnName, sqlDef));
                bool   isNullable      = (keyIndex == 0 && (long)tuple["notnull"] == 0);
                bool   ignoreTable     = this.IsIgnoredTable(tableName);

                builder.AddColumn(null, tableName, columnName, typeName, isNullable, isIdentity: isAutoIncrement, ignoreTable: ignoreTable);

                if (keyIndex > 0)
                {
                    builder.AddKey(null, tableName, columnName, "pk_" + tableName, (int)keyIndex);
                }
            }
        }
Пример #17
0
        static void Main(string[] args)
        {
            if (args.Length <= 1 || args.Length >= 5)
            {
                Logger.Log("usage: program.exe <datafile.csv> <full/short> [target_name [factor=1.0]]");
                return;
            }

            string filename   = args[0];
            string stype      = args[1].ToLower();
            string targetname = args.Length >= 3 ? args[2] : null;

            // множетель преобразования для категорирования признаков
            double factor = double.Parse(args.Length >= 4 ? args[3].Replace(',', '.') : "1", CultureInfo.InvariantCulture);

            if (stype != "full" && stype != "short")
            {
                Logger.Log("type can be only 'full' or 'short'");
                return;
            }

            if (stype == "short" && targetname == null)
            {
                Logger.Log("you must specify target_name in sort mode");
                return;
            }

            Logger.Log("datafile = " + filename);
            Logger.Log("type = " + stype);
            Logger.Log("target_name = " + targetname);
            Logger.Log("factor = " + factor.ToString("F04"));

            if (!File.Exists(filename))
            {
                Logger.Log("file " + filename + " not found");
                return;
            }

            // загружаем данные
            var loader = targetname != null?(new DataLoader <FType>(targetname)) : new DataLoader <FType>();

            //loader.MaxRowsLoaded = 10000;
            if (targetname != null)
            {
                loader.RemoveSkipColumn(targetname);
            }
            loader.Load(filename);
            var cols = loader.FileIdxByColumn.Keys.ToArray();

            // выходной файл
            string statname = filename + "_stats.csv";

            // если часть данных уже просчитана, смотрим какая, чтобы повторно не считать
            var counted = LoadCountedData(statname);

            // просчитанная статистика по признакам
            var factorStatDict = new Dictionary <string, FactorStat <FType> >();

            // начинаем просчет
            using (var sw = new StreamWriter(new FileStream(statname, counted.Count > 0 ? FileMode.Append : FileMode.Create, FileAccess.Write),
                                             Encoding.UTF8))
            {
                if (counted.Count == 0)
                {
                    sw.WriteLine("Factor1;Factor2;src_cnt1;src_cnt2;mod_cnt1;mod_cnt2;src_chi2;src_chi2max;src_chi2coeff;mod_chi2;mod_chi2max;mod_chi2coeff;corr;corrabs;inf_val");
                }

                for (int i = 0; i < cols.Length - 1; i++)
                {
                    for (int j = i + 1; j < cols.Length; j++)
                    {
                        var col1 = cols[i]; // первый признак
                        var col2 = cols[j]; // второй признак

                        if (stype == "short")
                        {
                            if (targetname != null)
                            {
                                if (col1 != loader.TargetName && col2 != loader.TargetName)
                                {
                                    continue;
                                }
                            }
                        }

                        if (counted.ContainsKey(col1) && counted[col1].ContainsKey(col2))
                        {
                            continue;
                        }

                        int col1idx = loader.RowIdxByColumn[col1];
                        int col2idx = loader.RowIdxByColumn[col2];

                        // просчитаны ли уже статиситки
                        bool stat1Exist = factorStatDict.ContainsKey(col1);
                        bool stat2Exist = factorStatDict.ContainsKey(col2);

                        // объекты статистик по признакам
                        var col1Stats  = stat1Exist ? factorStatDict[col1].ModifiedStat : new Dictionary <FType, StatItem>();
                        var col2Stats  = stat2Exist ? factorStatDict[col2].ModifiedStat : new Dictionary <FType, StatItem>();
                        var scol1Stats = stat1Exist ? factorStatDict[col1].SourceStat : new Dictionary <FType, StatItem>();
                        var scol2Stats = stat2Exist ? factorStatDict[col2].SourceStat : new Dictionary <FType, StatItem>();

                        var f1stat = stat1Exist ? factorStatDict[col1] : new FactorStat <FType>();
                        var f2stat = stat2Exist ? factorStatDict[col2] : new FactorStat <FType>();

                        // статистики по парам признаков
                        var commonStats  = new Dictionary <TupleData, StatItem>(); // модифицированным
                        var scommonStats = new Dictionary <TupleData, StatItem>(); // исходным

                        // находим среднее, дисперсию и корреляцию по признакам
                        var colStats = PairStat <FType> .GetPairStat(loader, col1, col2);

                        int rowscount  = loader.TotalDataLines; // всего строк
                        int allTargets = 0;                     // всего целевых строк

                        // собираем общую статистику по всем строкам
                        foreach (var row in loader.Rows)
                        {
                            // исходные признаки
                            FType fval1 = row.Values[col1idx];
                            FType fval2 = row.Values[col2idx];

                            // модифицированные признаки
                            FType val1 = (long)(Math.Round((fval1 - colStats.F1Avg) / colStats.F1Stddev * factor));
                            FType val2 = (long)(Math.Round((fval2 - colStats.F2Avg) / colStats.F2Stddev * factor));

                            if (!stat1Exist) // восможно уже просчитана
                            {
                                if (!col1Stats.ContainsKey(val1))
                                {
                                    col1Stats.Add(val1, new StatItem());
                                }
                                var stat1 = col1Stats[val1];
                                stat1.Count++; // статистика встречаемости значений первого признака (модифицированного)
                                stat1.Targets += row.Target > 0 ? 1 : 0;

                                if (!scol1Stats.ContainsKey(fval1))
                                {
                                    scol1Stats.Add(fval1, new StatItem());
                                }
                                var sstat1 = scol1Stats[fval1];
                                sstat1.Count++; // статистика встречаемости значений первого признака (исходного)
                                sstat1.Targets += row.Target > 0 ? 1 : 0;
                            }

                            if (!stat2Exist) // восможно уже просчитана
                            {
                                if (!col2Stats.ContainsKey(val2))
                                {
                                    col2Stats.Add(val2, new StatItem());
                                }
                                var stat2 = col2Stats[val2];
                                stat2.Count++; // статистика встречаемости значений первого признака (модифицированного)
                                stat2.Targets += row.Target > 0 ? 1 : 0;

                                if (!scol2Stats.ContainsKey(fval2))
                                {
                                    scol2Stats.Add(fval2, new StatItem());
                                }
                                var sstat2 = scol2Stats[fval2];
                                sstat2.Count++; // статистика встречаемости значений первого признака (исходного)
                                sstat2.Targets += row.Target > 0 ? 1 : 0;
                            }

                            allTargets += row.Target > 0 ? 1 : 0;

                            // статистики астречаемости пар признаков (модифицированные)
                            var tuple = new TupleData(new List <object> {
                                val1, val2
                            });
                            if (!commonStats.ContainsKey(tuple))
                            {
                                commonStats.Add(tuple, new StatItem());
                            }
                            var stat = commonStats[tuple];
                            stat.Count++;  // пары признаков

                            // статистики астречаемости пар признаков (исходные)
                            var stuple = new TupleData(new List <object> {
                                fval1, fval2
                            });
                            if (!scommonStats.ContainsKey(stuple))
                            {
                                scommonStats.Add(stuple, new StatItem());
                            }
                            var fstat = scommonStats[stuple];
                            fstat.Count++;  // пары признаков
                        }

                        // сохраняем расчитанные признаки
                        if (!stat1Exist)
                        {
                            f1stat.ModifiedStat  = col1Stats;
                            f1stat.SourceStat    = scol1Stats;
                            f1stat.ModifiedCount = col1Stats.Count;
                            f1stat.SourceCount   = scol1Stats.Count;
                        }
                        if (!stat2Exist)
                        {
                            f2stat.ModifiedStat  = col2Stats;
                            f2stat.SourceStat    = scol2Stats;
                            f2stat.ModifiedCount = col2Stats.Count;
                            f2stat.SourceCount   = scol2Stats.Count;
                        }


                        // далее идет расчет вероятностей встречи признаков
                        if (!stat1Exist)
                        {
                            foreach (var v in col1Stats.Values)
                            {
                                // вероятность встретить значение первого признака
                                v.ItemProb = v.Count / (FType)rowscount;
                            }

                            foreach (var v in scol1Stats.Values)
                            {
                                // вероятность встретить значение первого признака
                                v.ItemProb = v.Count / (FType)rowscount;
                            }
                        }

                        if (!stat2Exist)
                        {
                            foreach (var v in col2Stats.Values)
                            {
                                // вероятность встретить значение второго признака
                                v.ItemProb = v.Count / (FType)rowscount;
                            }

                            foreach (var v in scol2Stats.Values)
                            {
                                // вероятность встретить значение второго признака
                                v.ItemProb = v.Count / (FType)rowscount;
                            }
                        }

                        foreach (var v in commonStats.Values)
                        {
                            // вероятность встретить пару
                            v.ItemProb = v.Count / (FType)rowscount;
                        }

                        foreach (var v in scommonStats.Values)
                        {
                            // вероятность встретить пару
                            v.ItemProb = v.Count / (FType)rowscount;
                        }


                        double chi2  = 0; // хи-квадрат по модифицированным признакам
                        double schi2 = 0; // хи-квадрат по исхдным признакам

                        // высчитываем статистики по модифицированным признакам
                        chi2 = GetChi2Stat(col1Stats, col2Stats, commonStats, rowscount);

                        // высчитываем статистики по исходным признакам
                        schi2 = GetChi2Stat(scol1Stats, scol2Stats, scommonStats, rowscount);

                        int cnt  = (f1stat.ModifiedCount - 1) * (f2stat.ModifiedCount - 1);
                        int scnt = (f1stat.SourceCount - 1) * (f2stat.SourceCount - 1);

                        double chi2max    = Util.InvChi2CDF(cnt, 0.95);
                        double schi2max   = Util.InvChi2CDF(scnt, 0.95);
                        double chifactor  = chi2 / chi2max;
                        double schifactor = schi2 / schi2max;


                        // information value
                        double iv = 0;
                        if (col1 == loader.TargetName || col2 == loader.TargetName)
                        {
                            if (col1 == loader.TargetName)
                            {
                                iv = GetInvormationValue(f2stat, allTargets, rowscount);
                            }
                            else
                            {
                                iv = GetInvormationValue(f1stat, allTargets, rowscount);
                            }
                        }

                        sw.WriteLine("{0};{1};{2};{3};{4};{5};{6};{7};{8};{9};{10};{11};{12};{13};{14}",
                                     col1,
                                     col2,
                                     f1stat.SourceCount,
                                     f2stat.SourceCount,
                                     f1stat.ModifiedCount,
                                     f2stat.ModifiedCount,
                                     schi2.ToString("F09", CultureInfo.InvariantCulture),
                                     schi2max,
                                     schifactor,
                                     chi2.ToString("F09", CultureInfo.InvariantCulture),
                                     chi2max,
                                     chifactor,
                                     colStats.Correlation.ToString(),
                                     Math.Abs(Convert.ToDecimal(colStats.Correlation)).ToString(),
                                     iv.ToString("F09", CultureInfo.InvariantCulture)
                                     );
                        sw.Flush();

                        Logger.Log(col1 + "," + col2);
                    }
                }

                sw.Close();
            }
        }
Пример #18
0
        async IAsyncEnumerator <PgOutputReplicationMessage> StartReplicationInternal(CancellationToken cancellationToken)
        {
            var stream = _connection.StartLogicalReplication(
                _slot, cancellationToken, _walLocation, _options.GetOptionPairs(), bypassingStream: true);
            var buf = _connection.Connector !.ReadBuffer;

            await foreach (var xLogData in stream.WithCancellation(cancellationToken))
            {
                await buf.EnsureAsync(1);

                var messageCode = (BackendReplicationMessageCode)buf.ReadByte();
                switch (messageCode)
                {
                case BackendReplicationMessageCode.Begin:
                {
                    await buf.EnsureAsync(20);

                    yield return(_beginMessage.Populate(
                                     xLogData.WalStart,
                                     xLogData.WalEnd,
                                     xLogData.ServerClock,
                                     new NpgsqlLogSequenceNumber(buf.ReadUInt64()),
                                     TimestampHandler.FromPostgresTimestamp(buf.ReadInt64()),
                                     buf.ReadUInt32()
                                     ));

                    continue;
                }

                case BackendReplicationMessageCode.Commit:
                {
                    await buf.EnsureAsync(25);

                    yield return(_commitMessage.Populate(
                                     xLogData.WalStart,
                                     xLogData.WalEnd,
                                     xLogData.ServerClock,
                                     buf.ReadByte(),
                                     new NpgsqlLogSequenceNumber(buf.ReadUInt64()),
                                     new NpgsqlLogSequenceNumber(buf.ReadUInt64()),
                                     TimestampHandler.FromPostgresTimestamp(buf.ReadInt64())
                                     ));

                    continue;
                }

                case BackendReplicationMessageCode.Origin:
                {
                    await buf.EnsureAsync(9);

                    yield return(_originMessage.Populate(
                                     xLogData.WalStart,
                                     xLogData.WalEnd,
                                     xLogData.ServerClock,
                                     new NpgsqlLogSequenceNumber(buf.ReadUInt64()),
                                     await buf.ReadNullTerminatedString(async: true, cancellationToken)));

                    continue;
                }

                case BackendReplicationMessageCode.Relation:
                {
                    await buf.EnsureAsync(6);

                    var relationId = buf.ReadUInt32();
                    var ns         = await buf.ReadNullTerminatedString(async : true, cancellationToken);

                    var relationName = await buf.ReadNullTerminatedString(async : true, cancellationToken);

                    await buf.EnsureAsync(3);

                    var relationReplicaIdentitySetting = (char)buf.ReadByte();
                    var numColumns = buf.ReadUInt16();
                    if (numColumns > _relationalMessageColumns.Length)
                    {
                        _relationalMessageColumns = new RelationMessage.Column[numColumns];
                    }
                    for (var i = 0; i < numColumns; i++)
                    {
                        await buf.EnsureAsync(2);

                        var flags      = buf.ReadByte();
                        var columnName = await buf.ReadNullTerminatedString(async : true, cancellationToken);

                        await buf.EnsureAsync(8);

                        var dateTypeId   = buf.ReadUInt32();
                        var typeModifier = buf.ReadInt32();
                        _relationalMessageColumns[i] = new RelationMessage.Column(flags, columnName, dateTypeId, typeModifier);
                    }

                    yield return(_relationMessage.Populate(
                                     xLogData.WalStart,
                                     xLogData.WalEnd,
                                     xLogData.ServerClock,
                                     relationId,
                                     ns,
                                     relationName,
                                     relationReplicaIdentitySetting,
                                     new ReadOnlyMemory <RelationMessage.Column>(_relationalMessageColumns, 0, numColumns)
                                     ));

                    continue;
                }

                case BackendReplicationMessageCode.Type:
                {
                    await buf.EnsureAsync(5);

                    var typeId = buf.ReadUInt32();
                    var ns     = await buf.ReadNullTerminatedString(async : true, cancellationToken);

                    var name = await buf.ReadNullTerminatedString(async : true, cancellationToken);

                    yield return(_typeMessage.Populate(xLogData.WalStart, xLogData.WalEnd, xLogData.ServerClock, typeId, ns, name));

                    continue;
                }

                case BackendReplicationMessageCode.Insert:
                {
                    await buf.EnsureAsync(7);

                    var relationId    = buf.ReadUInt32();
                    var tupleDataType = (TupleType)buf.ReadByte();
                    Debug.Assert(tupleDataType == TupleType.NewTuple);
                    var numColumns = buf.ReadUInt16();
                    var newRow     = await ReadTupleDataAsync(ref _tupleDataArray1, numColumns);

                    yield return(_insertMessage.Populate(xLogData.WalStart, xLogData.WalEnd, xLogData.ServerClock, relationId, newRow));

                    continue;
                }

                case BackendReplicationMessageCode.Update:
                {
                    await buf.EnsureAsync(7);

                    var relationId = buf.ReadUInt32();
                    var tupleType  = (TupleType)buf.ReadByte();
                    var numColumns = buf.ReadUInt16();
                    switch (tupleType)
                    {
                    case TupleType.Key:
                        var keyRow = await ReadTupleDataAsync(ref _tupleDataArray1, numColumns);

                        await buf.EnsureAsync(3);

                        tupleType = (TupleType)buf.ReadByte();
                        Debug.Assert(tupleType == TupleType.NewTuple);
                        numColumns = buf.ReadUInt16();
                        var newRow = await ReadTupleDataAsync(ref _tupleDataArray2, numColumns);

                        yield return(_indexUpdateMessage.Populate(xLogData.WalStart, xLogData.WalEnd,
                                                                  xLogData.ServerClock, relationId, newRow, keyRow));

                        continue;

                    case TupleType.OldTuple:
                        var oldRow = await ReadTupleDataAsync(ref _tupleDataArray1, numColumns);

                        await buf.EnsureAsync(3);

                        tupleType = (TupleType)buf.ReadByte();
                        Debug.Assert(tupleType == TupleType.NewTuple);
                        numColumns = buf.ReadUInt16();
                        newRow     = await ReadTupleDataAsync(ref _tupleDataArray2, numColumns);

                        yield return(_fullUpdateMessage.Populate(xLogData.WalStart, xLogData.WalEnd,
                                                                 xLogData.ServerClock, relationId, newRow, oldRow));

                        continue;

                    case TupleType.NewTuple:
                        newRow = await ReadTupleDataAsync(ref _tupleDataArray1, numColumns);

                        yield return(_updateMessage.Populate(xLogData.WalStart, xLogData.WalEnd,
                                                             xLogData.ServerClock, relationId, newRow));

                        continue;

                    default:
                        throw new NotSupportedException($"The tuple type '{tupleType}' is not supported.");
                    }
                }

                case BackendReplicationMessageCode.Delete:
                {
                    await buf.EnsureAsync(7);

                    var relationId    = buf.ReadUInt32();
                    var tupleDataType = (TupleType)buf.ReadByte();
                    var numColumns    = buf.ReadUInt16();
                    switch (tupleDataType)
                    {
                    case TupleType.Key:
                        yield return(_keyDeleteMessage.Populate(xLogData.WalStart, xLogData.WalEnd, xLogData.ServerClock,
                                                                relationId, await ReadTupleDataAsync(ref _tupleDataArray1, numColumns)));

                        continue;

                    case TupleType.OldTuple:
                        yield return(_fullDeleteMessage.Populate(xLogData.WalStart, xLogData.WalEnd, xLogData.ServerClock,
                                                                 relationId, await ReadTupleDataAsync(ref _tupleDataArray1, numColumns)));

                        continue;

                    default:
                        throw new NotSupportedException($"The tuple type '{tupleDataType}' is not supported.");
                    }
                }

                case BackendReplicationMessageCode.Truncate:
                {
                    await buf.EnsureAsync(9);

                    // Don't dare to truncate more than 2147483647 tables at once!
                    var numRels         = checked ((int)buf.ReadUInt32());
                    var truncateOptions = (TruncateOptions)buf.ReadByte();
                    var relationIds     = new uint[numRels];
                    await buf.EnsureAsync(checked (numRels * 4));

                    for (var i = 0; i < numRels; i++)
                    {
                        relationIds[i] = buf.ReadUInt32();
                    }

                    yield return(_truncateMessage.Populate(
                                     xLogData.WalStart, xLogData.WalEnd, xLogData.ServerClock, truncateOptions, relationIds));

                    continue;
                }

                default:
                    throw new NotSupportedException(
                              $"Invalid message code {messageCode} in Logical Replication Protocol.");
                }
            }

            // We never get here - the above is an endless loop that terminates only with a cancellation exception

            ValueTask <ReadOnlyMemory <TupleData> > ReadTupleDataAsync(ref TupleData[] array, ushort numberOfColumns)
            {
                if (array.Length < numberOfColumns)
                {
                    array = new TupleData[numberOfColumns];
                }
                var nonRefArray = array;

                return(ReadTupleDataAsync2());

                async ValueTask <ReadOnlyMemory <TupleData> > ReadTupleDataAsync2()
                {
                    for (var i = 0; i < numberOfColumns; i++)
                    {
                        await buf.EnsureAsync(1);

                        var subMessageKind = (TupleDataKind)buf.ReadByte();
                        switch (subMessageKind)
                        {
                        case TupleDataKind.Null:
                        case TupleDataKind.UnchangedToastedValue:
                            nonRefArray[i] = new TupleData(subMessageKind);
                            continue;

                        case TupleDataKind.TextValue:
                            await buf.EnsureAsync(4);

                            var len = buf.ReadInt32();
                            await buf.EnsureAsync(len);

                            nonRefArray ![i] = new TupleData(buf.ReadString(len));
                            continue;
Пример #19
0
        static void Main(string[] args)
        {
            if (args.Length < 4 || args.Length > 4)
            {
                Logger.Log("usage: program.exe <train.csv> <conf.csv> <id> <target_name>");
                return;
            }

            string dataPath = args[0];
            string confPath = args[1];
            string id       = args[2];
            string target   = args[3];

            Logger.Log("data: " + dataPath);
            Logger.Log("conf : " + confPath);
            Logger.Log("id : " + id);
            Logger.Log("target : " + target);

            try
            {
                var fmgr = new FactorManager();
                fmgr.Load(confPath, target);
                fmgr.TargDep   = 10;
                fmgr.FactorDep = 100;
                fmgr.SelectFactors();
                var cols = fmgr.VisibleFactors.ToArray();

                //_loader.MaxRowsLoaded = 10000;
                _loader.AddTargetColumn(target);
                _loader.AddIdColumn(id);
                _loader.CollectDistrStat = true;
                _loader.Load(dataPath);

                var statDict = new Dictionary <TupleData, Dictionary <TupleData, StatItem> >();

                // collecting stats
                int idx  = 0;
                int n    = 4;
                var iter = new CombinationIterator(cols, n);
                while (iter.MoveNext())
                {
                    idx++;

                    var cval   = iter.Current;
                    var ftuple = new TupleData(cval);

                    statDict.Add(ftuple, new Dictionary <TupleData, StatItem>());

                    foreach (var row in _loader.Rows)
                    {
                        var vtuple = CreateValueTuple(cval, row);
                        if (!statDict[ftuple].ContainsKey(vtuple))
                        {
                            statDict[ftuple].Add(vtuple, new StatItem());
                        }
                        if (row.Target <= 1)
                        {
                            statDict[ftuple][vtuple].Count++;
                            statDict[ftuple][vtuple].Targets += (int)row.Target;
                        }
                    }

                    foreach (var t in statDict[ftuple].Keys)
                    {
                        statDict[ftuple][t].TargetProb = statDict[ftuple][t].Targets / (double)statDict[ftuple][t].Count;
                    }

                    Logger.Log(ftuple + " done;");
                }

                // creating modified file
                using (var sw = new StreamWriter(new FileStream(dataPath + "_cat.csv", FileMode.Create, FileAccess.Write)))
                {
                    idx = 0;
                    sw.WriteLine(CreateHeader(cols, n));
                    sw.Flush();
                    double defProb = (double)_loader.TargetStat[1] / (_loader.TargetStat[1] + _loader.TargetStat[0]);

                    foreach (var row in _loader.Rows)
                    {
                        idx++;

                        var sb = new StringBuilder();
                        iter = new CombinationIterator(cols, n);
                        sb.Append(row.Id);

                        while (iter.MoveNext())
                        {
                            var cval   = iter.Current;
                            var ftuple = new TupleData(cval);
                            var t      = CreateValueTuple(cval, row);

                            double prob = statDict[ftuple].ContainsKey(t) ? statDict[ftuple][t].TargetProb : defProb;

                            sb.Append(";" + prob.ToString("F05"));
                        }
                        sb.Append(";" + row.Target);
                        sw.WriteLine(sb);

                        if (idx % 12345 == 0)
                        {
                            Logger.Log(idx + " lines writed;");
                            sw.Flush();
                        }
                    }
                    Logger.Log(idx + " lines writed; done;");
                }
            }
            catch (Exception e)
            {
                Logger.Log(e);
            }
        }