//internal static List<(int id, string name, List<string> groups)> alphabets = new List<(int id, string name, List<string> groups)>() //{ // (0, /*program.string_debug*/($@"Normal",new List<string>(){ // /*program.string_debug*/($@"A", /*program.string_debug*/($@"R", /*program.string_debug*/($@"N", /*program.string_debug*/($@"D", /*program.string_debug*/($@"C", /*program.string_debug*/($@"Q", /*program.string_debug*/($@"E", /*program.string_debug*/($@"G", /*program.string_debug*/($@"H", /*program.string_debug*/($@"I", /*program.string_debug*/($@"L", /*program.string_debug*/($@"K", /*program.string_debug*/($@"M", /*program.string_debug*/($@"F", /*program.string_debug*/($@"P", /*program.string_debug*/($@"S", /*program.string_debug*/($@"T", /*program.string_debug*/($@"W", /*program.string_debug*/($@"Y", /*program.string_debug*/($@"V" // }), // (1, /*program.string_debug*/($@"Physicochemical",new List<string>(){ // /*program.string_debug*/($@"AVFPMILW", /*program.string_debug*/($@"DE", /*program.string_debug*/($@"RK", /*program.string_debug*/($@"STYHCNGQ" // }), // (2, /*program.string_debug*/($@"Hydrophobicity",new List<string>(){ // /*program.string_debug*/($@"AGILPV", /*program.string_debug*/($@"FYW", /*program.string_debug*/($@"DENQRHSTK", /*program.string_debug*/($@"CM" // }), // (3, /*program.string_debug*/($@"UniProtKb",new List<string>(){ // /*program.string_debug*/($@"LAGVIP", /*program.string_debug*/($@"DE", /*program.string_debug*/($@"ST", /*program.string_debug*/($@"RKH", /*program.string_debug*/($@"FYW", /*program.string_debug*/($@"NQ", /*program.string_debug*/($@"CM" // }), // (4, /*program.string_debug*/($@"PdbSum",new List<string>(){ // /*program.string_debug*/($@"HKR", /*program.string_debug*/($@"DE", /*program.string_debug*/($@"STNQ", /*program.string_debug*/($@"AVLIM", /*program.string_debug*/($@"FYW", /*program.string_debug*/($@"PG", /*program.string_debug*/($@"C" // }), // (5, /*program.string_debug*/($@"Venn",new List<string>(){ // /*program.string_debug*/($@"ILV", /*program.string_debug*/($@"TS", /*program.string_debug*/($@"AGCS", /*program.string_debug*/($@"VPAGCSTDN", /*program.string_debug*/($@"NQ", /*program.string_debug*/($@"HKR", /*program.string_debug*/($@"DE", /*program.string_debug*/($@"CSTNQDEHKRYW", /*program.string_debug*/($@"ILVAMFGP", /*program.string_debug*/($@"DEHKR", /*program.string_debug*/($@"ILVACTMFYWHK", /*program.string_debug*/($@"FYWH", /*program.string_debug*/($@"MC" // }) //}; internal static double[] pssm_to_vector1(List <info_blast_pssm_entry> pssm, enum_pssm_value_type pssm_value_type, bool normalise_all = false) { const string module_name = nameof(info_blast_pssm); const string method_name = nameof(pssm_to_vector1); var x = (pssm == null || pssm.Count == 0) ? new[] { 0d } : pssm.Select(a => a.score).ToArray(); if (x == null || x.Length == 0) { x = new[] { 0d } } ; if (normalise_all) { x = normalise_array(x); } //var sum = scores.Sum(); //var average = scores.Average(); if (pssm_value_type == enum_pssm_value_type.distances) { x = distances(x, normalise_all); } else if (pssm_value_type == enum_pssm_value_type.intervals) { x = intervals(x, normalise_all); } return(x); //(scores, sum, average); }
internal static List <(string alphabet, List <(string col_aa, int lag, double[] values)> x)> pssm_to_vector20col_DT(List <info_blast_pssm_entry> pssm, int max_lag, enum_pssm_value_type pssm_value_type, bool normalise_col = false, bool normalise_all = false, CancellationTokenSource cts = null) { const string method_name = nameof(pssm_to_vector20col_DT); // join all cols with same amino acid, calc average //var cols = pssm.Select(a => a.position_aa).Distinct().ToList(); //var cols = /*program.string_debug*/($@"ARNDCQEGHILKMFPSTWYV".ToCharArray(); //pssm.Select(a => a.position_aa).Distinct().ToList(); using var i_cts = new CancellationTokenSource(); if (cts == null) { cts = i_cts; } var result = new List <(string alphabet, List <(string col_aa, int lag, double[] values)> x)>(); var tasks = new List <Task <(string name, List <(string col_aa, int lag, double[] values)> vector)> >(); var tasks_start_time = DateTime.Now; foreach (var l_alphabet in feature_calcs.aa_alphabets) { var alphabet = l_alphabet; for (var l_lag = 1; l_lag <= max_lag; l_lag++) { var lag = l_lag; Task <(string name, List <(string col_aa, int lag, double[] values)> vector)> task = Task.Run(() => { if (cts != null && cts.IsCancellationRequested) { return(default);
internal static List <(string alphabet, List <(string col_aa, double[] values)> x)> pssm_to_vector20col(List <info_blast_pssm_entry> pssm, enum_pssm_value_type pssm_value_type, bool normalise_col = false, bool normalise_all = false) { const string module_name = nameof(info_blast_pssm); const string method_name = nameof(pssm_to_vector20col); // join all cols with same amino acid, calc average //var cols = pssm.Select(a => a.position_aa).Distinct().ToList(); //var cols = /*program.string_debug*/($@"ARNDCQEGHILKMFPSTWYV".ToCharArray(); //pssm.Select(a => a.position_aa).Distinct().ToList(); var result = new List <(string alphabet, List <(string col_aa, double[] values)> x)>(); foreach (var alphabet in feature_calcs.aa_alphabets) { var alphabet_groups = alphabet.groups; var vector = new List <(string col_aa, double[] values)>(); foreach (var c1 in alphabet_groups) { double[] x = (pssm == null || pssm.Count == 0) ? new[] { 0d } : pssm.Where(a => c1.group_amino_acids.Contains(a.position_aa, StringComparison.Ordinal)).Select(a => a.score).ToArray(); if (x == null || x.Length == 0) { x = new[] { 0d } } ; if (normalise_col) { x = normalise_array(x); } if (x == null || x.Length == 0) { x = new[] { 0d } } ; if (pssm_value_type == enum_pssm_value_type.distances) { x = distances(x, normalise_col); } else if (pssm_value_type == enum_pssm_value_type.intervals) { x = intervals(x, normalise_col); } if (x == null || x.Length == 0) { x = new[] { 0d } } ; //var sum = x.Sum(); //var average = x.Average(); vector.Add((c1.group_amino_acids, x)); } if (normalise_all) { var values = vector.SelectMany(a => a.values).Distinct().ToList(); var min = values.Min(); var max = values.Max(); vector.ForEach(a => a.values = normalise_array(a.values, min, max)); } result.Add((alphabet.name, vector)); } return(result); }