public void ngramset_edit_distance(List <string> mnemonics1, List <string> mnemonics2, int n) { Ngram ngram1 = new Ngram(mnemonics1, n); Ngram ngram2 = new Ngram(mnemonics2, n); int set_size = ngram1.ngramSet.Count; //int reduced_set_size = set_size - Math.Max(mnemonics1.FindIndex(delegate (string data) { return data == ""; }), mnemonics2.FindIndex(delegate (string data) { return data == ""; })); int ngram_size = n; int[,] matrix = ngram_matrix(ngram1.ngramSet, ngram2.ngramSet); var hungarian = new HungarianAlgorithm(matrix); int[] hungarian_indexes = hungarian.Run(); double ngram_edit_distance = getTotal(matrix, hungarian_indexes) / set_size / ngram_size; double edit_distance = calc_edit_distance(mnemonics1, mnemonics2); double slope1_ratio = calculate_slope1_ratio(hungarian_indexes); double index_similarity2 = calculate_continuous_equal_slope(hungarian_indexes, 2); double index_similarity3 = calculate_continuous_equal_slope(hungarian_indexes, 3); this.similarity = new ngram_similarity(ngram_edit_distance, edit_distance, slope1_ratio, index_similarity2, index_similarity3); }
private int[,] ngram_matrix(List <List <string> > set1, List <List <string> > set2) { int standard = set1.Count() / 2; int[,] matrix = new int[set1.Count(), set2.Count()]; Ngram ngram = new Ngram(); double count = set1[0].Count(); for (int i = 0; i < set1.Count(); i++) { for (int j = 0; j < set2.Count(); j++) { int lcs = ngram.lcs(set1[i], set2[j]); if (lcs < (count * 0.7)) { lcs = 0; } matrix[i, j] = (2 * standard) - lcs; } } return(matrix); }
public SimilarityAnalyzer(string output_file_name) { ngram = new Ngram(); timer = new Stopwatch(); }
public PackerIdentificator() { ngram = new Ngram(); }