/// <summary> /// 切割未知字符 /// 如 japannese alpha /// arabic kraon /// </summary> /// <param name="ch"></param> private void flexUnknowChar(char ch) { var w = new _WordInnfo(); var name = ch.ToString(); bool flag = false; while (hasNext) { var b = next(); flag = true; if (!Regex_Helper.Is_ACN(b.ToString()) && !Regex_Helper.Is_Mark(b.ToString())) { name += b; } else { break; } } if (flag) { previous(); } w.Name = name; w.MaxType = WordType.Noun; _susentence.Words.Add(w); }
/// <summary> /// 反向分词函数,读取字母,数字序列 /// </summary> /// <param name="ls">反向分词结果的词链表</param> /// <param name="temp">要进行反向分词的字符串序列</param> /// <param name="i">当前扫描位置</param> private void reflexNumberAlpha(List <_WordInnfo> ls, ref string temp, ref int i) { var temps = temp[i].ToString(); var w = new _WordInnfo(); for (int j = i + 1; j < temp.Length; j++, i++) { if (Regex_Helper.Is_Number(temp[j].ToString()) || Regex_Helper.Is_Alpha(temp[j].ToString())) { temps += temp[j]; } else { i++; break; } } w.Name = temps; Lexicer.SetAlphaNumberType(w); ls.Add(w); if (i != temp.Length - 1) { i--; } }
/// <summary> /// 切割字母数字 /// </summary> /// <param name="ch"></param> private void flexNumberAlpha(char ch) { var name = ch.ToString(); /*************************** * 用于确定是否回跳 * 如果执行了 while 下面的语句 * 需要回跳一个字符 * *************************/ bool flag = false; while (_currentPos < _context.Length - 1) { flag = true; var b = next(); if (Regex_Helper.Is_AN(b.ToString())) { name += b; } /******************************** * website address, math expression * *************************************/ else if (Regex_Helper.Is_Mark(b.ToString())) { /********************** * if chinese mark ens prosses * *************************/ if (Regex_Helper.Is_Chinese_Mark(b.ToString())) { break; } name += b; } else { break; } } var w = new _WordInnfo() { Name = name, }; /********************** * 设置获得片段的词性 * 可能是数词,也可能是其他(数学表达式,网址等) * ************************/ SetAlphaNumberType(w); if (flag) { previous(); } w.Name = name; _susentence.Words.Add(w); }
/// <summary> /// 反向分词函数,读取未知字符序列 /// </summary> /// <param name="ls">反向分词结果的词链表</param> /// <param name="temp">要进行反向分词的字符串序列</param> /// <param name="i">当前扫描位置</param> private void reflexUnknowChars(List <_WordInnfo> ls, ref string temp, ref int i) { var temps = temp[i].ToString(); var _w = new _WordInnfo(); for (int j = i + 1; j < temp.Length; j++, i++) { if (!Regex_Helper.Is_ACN(temp[j].ToString()) && !Regex_Helper.Is_Mark(temp[j].ToString())) { temps += temp[j]; } else { i++; break; } } _w.Name = temps; _w.MaxType = WordType.Noun; ls.Add(_w); if (i != temp.Length - 1) { i--; } }
private static bool contains(_WordInnfo w, Contains c) { foreach (var item in w.TypeInfo) { if (c(item.Key)) { return(true); } } return(false); }
private static List <KeyValuePair <WordType, int> > get(_WordInnfo w, Contains s) { List <KeyValuePair <WordType, int> > res = new List <KeyValuePair <WordType, int> >(); foreach (var item in w.TypeInfo) { if (s(item.Key)) { res.Add(item); } } return(res); }
/// <summary> /// 子句结束处理函数 /// </summary> private void subSentenceEnd(char ch) { /**************** * 确定是否为子句添加一个标点 * 如果子句以空格结束 * ************************/ var w = new _WordInnfo(); if (MarkHelper.IsSubSentenceEndMark(ch)) { w.Name = ch.ToString(); } else if (MarkHelper.IsSentenceEndMark(ch)) { w.Name = ch.ToString(); } else { w.Name = '。'.ToString(); } w.MaxType = WordType.Mark; _susentence.Words.Add(w); _susentence.Position.End = _currentPos; _isSubsentenceStarted = false; /****************** * 是否执行分次结果检测 * ****************/ if (_config.IsReflex) { _checker.Check(_susentence.Words); } /************************* * 发布子句解析完成事件 * ********************/ if (SubSenceFinish != null) { SubSenceFinish(_paragraph); } }
private void name() { switch (_token) { case ')': _w = new _WordInnfo(); if (_reverseName) { _w.Name = StringHelper.Reverse(_temp); } else { _w.Name = _temp; } _temp = string.Empty; _state = DicParserState.PreType; break; default: _temp += _token; break; } }
/// <summary> /// 判断类型 /// reflexier也会使用 ,所以改成公开静态函数 /// </summary> /// <param name="w"></param> public static void SetAlphaNumberType(_WordInnfo w) { var t = StringHelper.Count(w.Name, (x) => Regex_Helper.Is_Concrete_Number(x.ToString())); if (t == w.Name.Length) { w.MaxType = WordType.NumberConcrete; } else if (t == w.Name.Length - 1) { if (w.Name.Contains('.'.ToString())) { w.MaxType = WordType.NumberConcrete; } else { w.MaxType = WordType.NounAlphaNumberMark; } } else { w.MaxType = WordType.NounAlphaNumberMark; } }
public static List <KeyValuePair <WordType, int> > GetAdjectiveAdverb(this _WordInnfo w) => get(w, (x) => x.IsAdjectiveAdverb());
public static List <KeyValuePair <WordType, int> > GetPreposition(this _WordInnfo w) => get(w, (x) => x.IsPreposition());
public static List <KeyValuePair <WordType, int> > GetNumber(this _WordInnfo w) => get(w, (x) => x.IsNumber());
public static List <KeyValuePair <WordType, int> > GetConjunction(this _WordInnfo w) => get(w, (x) => x.IsConjunction());
public static bool ContainsAuxiliary(this _WordInnfo w) { return(contains(w, (x) => x.IsAuxliary())); }
/// <summary> /// 获取 所有verb /// </summary> /// <param name="w"></param> /// <returns></returns> public static List <KeyValuePair <WordType, int> > GetVerb(this _WordInnfo w) => get(w, (x) => x.IsVerb());
public static bool ContainsAdjectiveAdverb(this _WordInnfo w) { return(contains(w, (x) => x.IsAdjectiveAdverb())); }
public static bool ContainsCollective(this _WordInnfo w) { return(contains(w, (x) => x.IsConnective())); }
public static bool ContainsNoun(this _WordInnfo w) { return(contains(w, (x) => x.IsNoun())); }
public static bool ContainsPreposition(this _WordInnfo w) { return(contains(w, (x) => x.IsPreposition())); }
public static List <KeyValuePair <WordType, int> > GetClassifier(this _WordInnfo w) => get(w, (x) => x.IsClassification());
public static bool ContainsConjunction(this _WordInnfo w) { return(contains(w, (x) => x.IsConjunction())); }
public Vertex(_WordInnfo w) { Content = w; }
public static List <KeyValuePair <WordType, int> > GetAuxliary(this _WordInnfo w) => get(w, (x) => x.IsAuxliary());