public Tuple <string[], string[]> SplitToArray(string sentence) { string[] words = DicSplit.GetInstance().Do(sentence);//this.SplitToStrings(sentence); string[] sems = GetSemanticSeq(words); Tuple <string[], string[]> tuple = new Tuple <string[], string[]>(words, sems); return(tuple); }
public static DicSplit GetInstance() { string serFile = "Init\\DicSplit.ser"; if (Split == null && File.Exists(serFile)) { Split = Text.Serializer.DeserializeFromFile <DicSplit>(serFile); } else if (Split == null) { Split = new DicSplit(); Text.Serializer.SerializeToFile(Split, serFile); } return(Split); }
public string Split(string sentence, string delimeter) { string[] results = DicSplit.GetInstance().Do(sentence);; //this.SplitToStrings(sentence); return(string.Join(delimeter, results)); }
public string[] SplitToStrings(string sentence) { return(DicSplit.GetInstance().Do(sentence)); //获取概率矩阵 List <Word> probabilityMatrix = new List <Word>(); for (int i = 0; i < sentence.Length; i++) { if (_table.ContainsKey(sentence[i].ToString())) { Word word = (Word)_table[sentence[i].ToString()]; probabilityMatrix.Add(word); } else { probabilityMatrix.Add(null); } } //最佳路径计算(计算矩阵的最佳路线,暴力法) List <WordState> path = new List <WordState>(); WordState[] states = new WordState[4]; states[0] = WordState.SINGLE; states[1] = WordState.MIDDLE; states[2] = WordState.HEAD; states[3] = WordState.END; WordState lastState = WordState.END; for (int j = 0; j < probabilityMatrix.Count; j++) { if (probabilityMatrix[j] == null) { path.Add(WordState.END); continue; } List <WordState> currentStates = probabilityMatrix[j].SortStateCount(); for (int i = 0; i < currentStates.Count; i++) { if (currentStates[i] == WordState.SINGLE || currentStates[i] == WordState.HEAD) { if (lastState == WordState.END || lastState == WordState.SINGLE) { path.Add(currentStates[i]); lastState = currentStates[i]; break; } } else //if (currentStates[i] == State.MIDDLE || currentStates[i] == State.END) { if (lastState == WordState.HEAD || lastState == WordState.MIDDLE) { path.Add(currentStates[i]); lastState = currentStates[i]; break; } } } } List <string> resultList = new List <string>(); string strResult = ""; for (int i = 0; i < path.Count; i++) { strResult += sentence[i]; if (path[i] == WordState.END || path[i] == WordState.SINGLE) { resultList.Add(strResult); strResult = ""; } } return(resultList.ToArray()); }