public string predictAccentsWithMultiMatches(string sentence, int nResults, bool getWeight = true) { LinkedHashMap <string, double> output = new LinkedHashMap <string, double>(); string @in = Utils.normaliseString(sentence); string lowercaseIn = @in.ToLower(); string[] words = ("0 " + lowercaseIn + " 0").Split(' '); Graph graph = new VariableGraph(); Dictionary <int, string> idxWordMap = new Dictionary <int, string>(); int index = 0; int[] numberP = new int[words.Length]; string[,] possibleChange = new string[words.Length, maxp]; int[,] indices = new int[words.Length, maxp]; int nVertex = 0; index = buildGraph(words, graph, idxWordMap, index, numberP, possibleChange, indices, nVertex); //Yen Algorithm for kShortestPaths YenTopKShortestPathsAlg yenAlg = new YenTopKShortestPathsAlg(graph); List <Accent.KShortestPaths.Model.Path> shortest_paths_list = yenAlg.get_shortest_paths(graph.get_vertex(0), graph.get_vertex(index - 1), nResults); foreach (Accent.KShortestPaths.Model.Path path in shortest_paths_list) { List <BaseVertex> pathVertex = path.get_vertices(); string text = ""; for (int i = 1; i < pathVertex.Count - 1; i++) { BaseVertex vertext = pathVertex[i]; text += idxWordMap[vertext.get_id()] + " "; if (text.Contains("đầm dáng")) { text.Replace("đầm dáng", "đảm đang"); } if (text.Contains("chào bán")) { text = Regex.Replace(text, "chào bán", "chào bạn"); } if (text.Contains("bị đầu tay")) { text = Regex.Replace(text, "bị đầu tay", "bị đau tay"); } if (text.Contains("tay tôi bị đầu")) { text = Regex.Replace(text, "tay tôi bị đầu", "tay tôi bị đau"); } } output.Add(processOutput(@in, text.Trim()), path.get_weight()); } // Không lấy trọng số đo lường cho các trường hợp thêm dấu. if (!getWeight) { return(output.ToString2()); } return(output.ToString()); }
/// <summary> /// Return the weight associated with the input edge. /// </summary> /// <param name="source"> </param> /// <param name="sink"> /// @return </param> public override double get_edge_weight(BaseVertex source, BaseVertex sink) { int source_id = source.get_id(); int sink_id = sink.get_id(); if (_rem_vertex_id_set.Contains(source_id) || _rem_vertex_id_set.Contains(sink_id) || _rem_edge_set.Contains(new Pair <int, int>(source_id, sink_id))) { return(Graph.DISCONNECTED); } return(base.get_edge_weight(source, sink)); }
/// <summary> /// Get the set of vertices preceding the input vertex. /// </summary> /// <param name="vertex"> /// @return </param> public override HashSet <BaseVertex> get_precedent_vertices(BaseVertex vertex) { HashSet <BaseVertex> ret_set = new HashSet <BaseVertex>(); if (!_rem_vertex_id_set.Contains(vertex.get_id())) { int ending_vertex_id = vertex.get_id(); HashSet <BaseVertex> pre_vertex_set = base.get_precedent_vertices(vertex); foreach (BaseVertex cur_vertex in pre_vertex_set) { int starting_vertex_id = cur_vertex.get_id(); if (_rem_vertex_id_set.Contains(starting_vertex_id) || _rem_edge_set.Contains(new Pair <int, int>(starting_vertex_id, ending_vertex_id))) { continue; } // ret_set.Add(cur_vertex); } } return(ret_set); }
/* (non-Javadoc) * @see edu.asu.emit.qyan.alg.model.abstracts.BaseGraph#get_edge_weight(edu.asu.emit.qyan.alg.model.abstracts.BaseVertex, edu.asu.emit.qyan.alg.model.abstracts.BaseVertex) */ public virtual double get_edge_weight(BaseVertex source, BaseVertex sink) { return(_vertex_pair_weight_index.ContainsKey(new Pair <int, int>(source.get_id(), sink.get_id())) ? _vertex_pair_weight_index[new Pair <int, int>(source.get_id(), sink.get_id())] : DISCONNECTED); }
/* (non-Javadoc) * @see edu.asu.emit.qyan.alg.model.abstracts.BaseGraph#get_precedent_vertices(edu.asu.emit.qyan.alg.model.abstracts.BaseVertex) */ public virtual HashSet <BaseVertex> get_precedent_vertices(BaseVertex vertex) { return(_fanin_vertices_index.ContainsKey(vertex.get_id()) ? _fanin_vertices_index[vertex.get_id()] : new HashSet <BaseVertex>()); }
public Path next() { //3.1 prepare for removing vertices and arcs Path cur_path = _path_candidates.poll(); _result_list.Add(cur_path); BaseVertex cur_derivation = _path_derivation_vertex_index[cur_path]; //#################### Tính lại hash code theo công thức giống như java đã giup thuật toán chạy đúng hehe #####################// int cur_path_hash = get_hashcode(cur_path.get_vertices().GetRange(0, cur_path.get_vertices().IndexOf(cur_derivation))); //int cur_path_hash = cur_path.get_vertices().GetRange(0, cur_path.get_vertices().IndexOf(cur_derivation)).GetHashCode(); int count = _result_list.Count(); //3.2 remove the vertices and arcs in the graph for (int i = 0; i < count - 1; ++i) { Path cur_result_path = _result_list[i]; int cur_dev_vertex_id = cur_result_path.get_vertices().IndexOf(cur_derivation); if (cur_dev_vertex_id < 0) { continue; } // Note that the following condition makes sure all candidates should be considered. /// The algorithm in the paper is not correct for removing some candidates by mistake. /// int path_hash = get_hashcode(cur_result_path.get_vertices().GetRange(0, cur_dev_vertex_id)); //int path_hash = cur_result_path.get_vertices().GetRange(0, cur_dev_vertex_id).GetHashCode(); if (path_hash != cur_path_hash) { continue; } BaseVertex cur_succ_vertex = cur_result_path.get_vertices()[cur_dev_vertex_id + 1]; _graph.remove_edge(new Pair <int, int>(cur_derivation.get_id(), cur_succ_vertex.get_id())); } int path_length = cur_path.get_vertices().Count(); List <BaseVertex> cur_path_vertex_list = cur_path.get_vertices(); for (int i = 0; i < path_length - 1; ++i) { _graph.remove_vertex(cur_path_vertex_list[i].get_id()); _graph.remove_edge(new Pair <int, int>(cur_path_vertex_list[i].get_id(), cur_path_vertex_list[i + 1].get_id())); } //3.3 calculate the shortest tree rooted at target vertex in the graph DijkstraShortestPathAlg reverse_tree = new DijkstraShortestPathAlg(_graph); reverse_tree.get_shortest_path_flower(_target_vertex); //3.4 recover the deleted vertices and update the cost and identify the new candidate results bool is_done = false; for (int i = path_length - 2; i >= 0 && !is_done; --i) { //3.4.1 get the vertex to be recovered BaseVertex cur_recover_vertex = cur_path_vertex_list[i]; _graph.recover_removed_vertex(cur_recover_vertex.get_id()); //3.4.2 check if we should stop continuing in the next iteration if (cur_recover_vertex.get_id() == cur_derivation.get_id()) { is_done = true; } //3.4.3 calculate cost using forward star form Path sub_path = reverse_tree.update_cost_forward(cur_recover_vertex); //3.4.4 get one candidate result if possible if (sub_path != null) { ++_generated_path_num; //3.4.4.1 get the prefix from the concerned path double cost = 0; List <BaseVertex> pre_path_list = new List <BaseVertex>(); reverse_tree.correct_cost_backward(cur_recover_vertex); for (int j = 0; j < path_length; ++j) { BaseVertex cur_vertex = cur_path_vertex_list[j]; if (cur_vertex.get_id() == cur_recover_vertex.get_id()) { j = path_length; } else { cost += _graph.get_edge_weight_of_graph(cur_path_vertex_list[j], cur_path_vertex_list[j + 1]); pre_path_list.Add(cur_vertex); } } //((List<BaseVertex>)pre_path_list).AddRange(sub_path.get_vertices()); foreach (var item in sub_path.get_vertices()) { pre_path_list.Add(item); } //3.4.4.2 compose a candidate sub_path.set_weight(cost + sub_path.get_weight()); sub_path.get_vertices().Clear(); //sub_path.get_vertices().AddRange(pre_path_list); foreach (var item in pre_path_list) { sub_path.get_vertices().Add(item); } //3.4.4.3 put it in the candidate pool if new if (!_path_derivation_vertex_index.ContainsKey(sub_path)) { _path_candidates.add(sub_path); //_path_derivation_vertex_index[sub_path] = cur_recover_vertex; if (_path_derivation_vertex_index.ContainsKey(sub_path)) { _path_derivation_vertex_index[sub_path] = cur_recover_vertex; } else { _path_derivation_vertex_index.Add(sub_path, cur_recover_vertex); } } } //3.4.5 restore the edge BaseVertex succ_vertex = cur_path_vertex_list[i + 1]; _graph.recover_removed_edge(new Pair <int, int>(cur_recover_vertex.get_id(), succ_vertex.get_id())); //3.4.6 update cost if necessary double cost_1 = _graph.get_edge_weight(cur_recover_vertex, succ_vertex) + reverse_tree.get_start_vertex_distance_index()[succ_vertex]; if (reverse_tree.get_start_vertex_distance_index()[cur_recover_vertex] > cost_1) { //reverse_tree.get_start_vertex_distance_index()[cur_recover_vertex] = cost_1; if (reverse_tree.get_start_vertex_distance_index().ContainsKey(cur_recover_vertex)) { reverse_tree.get_start_vertex_distance_index()[cur_recover_vertex] = cost_1; } else { reverse_tree.get_start_vertex_distance_index().Add(cur_recover_vertex, cost_1); } //reverse_tree.get_predecessor_index()[cur_recover_vertex] = succ_vertex; if (reverse_tree.get_predecessor_index().ContainsKey(cur_recover_vertex)) { reverse_tree.get_predecessor_index()[cur_recover_vertex] = succ_vertex; } else { reverse_tree.get_predecessor_index().Add(cur_recover_vertex, succ_vertex); } reverse_tree.correct_cost_backward(cur_recover_vertex); } } //3.5 restore everything _graph.recover_removed_edges(); _graph.recover_removed_vertices(); // return(cur_path); }