Example #1
0
        /// <summary>
        /// 切割未知字符
        /// 如 japannese alpha
        /// arabic kraon
        /// </summary>
        /// <param name="ch"></param>
        private void flexUnknowChar(char ch)
        {
            var w = new _WordInnfo();

            var  name = ch.ToString();
            bool flag = false;

            while (hasNext)
            {
                var b = next();
                flag = true;
                if (!Regex_Helper.Is_ACN(b.ToString()) && !Regex_Helper.Is_Mark(b.ToString()))
                {
                    name += b;
                }
                else
                {
                    break;
                }
            }
            if (flag)
            {
                previous();
            }
            w.Name    = name;
            w.MaxType = WordType.Noun;
            _susentence.Words.Add(w);
        }
Example #2
0
        /// <summary>
        /// 反向分词函数,读取字母,数字序列
        /// </summary>
        /// <param name="ls">反向分词结果的词链表</param>
        /// <param name="temp">要进行反向分词的字符串序列</param>
        /// <param name="i">当前扫描位置</param>
        private void reflexNumberAlpha(List <_WordInnfo> ls, ref string temp, ref int i)
        {
            var temps = temp[i].ToString();
            var w     = new _WordInnfo();

            for (int j = i + 1; j < temp.Length; j++, i++)
            {
                if (Regex_Helper.Is_Number(temp[j].ToString()) || Regex_Helper.Is_Alpha(temp[j].ToString()))
                {
                    temps += temp[j];
                }
                else
                {
                    i++;
                    break;
                }
            }
            w.Name = temps;
            Lexicer.SetAlphaNumberType(w);
            ls.Add(w);
            if (i != temp.Length - 1)
            {
                i--;
            }
        }
Example #3
0
        /// <summary>
        /// 切割字母数字
        /// </summary>
        /// <param name="ch"></param>
        private void flexNumberAlpha(char ch)
        {
            var name = ch.ToString();

            /***************************
            * 用于确定是否回跳
            * 如果执行了 while 下面的语句
            * 需要回跳一个字符
            * *************************/
            bool flag = false;

            while (_currentPos < _context.Length - 1)
            {
                flag = true;
                var b = next();
                if (Regex_Helper.Is_AN(b.ToString()))
                {
                    name += b;
                }

                /********************************
                 * website address, math expression
                 * *************************************/
                else if (Regex_Helper.Is_Mark(b.ToString()))
                {
                    /**********************
                     * if chinese mark ens prosses
                     * *************************/
                    if (Regex_Helper.Is_Chinese_Mark(b.ToString()))
                    {
                        break;
                    }
                    name += b;
                }
                else
                {
                    break;
                }
            }

            var w = new _WordInnfo()
            {
                Name = name,
            };

            /**********************
             * 设置获得片段的词性
             * 可能是数词,也可能是其他(数学表达式,网址等)
             * ************************/
            SetAlphaNumberType(w);


            if (flag)
            {
                previous();
            }

            w.Name = name;
            _susentence.Words.Add(w);
        }
Example #4
0
        /// <summary>
        /// 反向分词函数,读取未知字符序列
        /// </summary>
        /// <param name="ls">反向分词结果的词链表</param>
        /// <param name="temp">要进行反向分词的字符串序列</param>
        /// <param name="i">当前扫描位置</param>
        private void reflexUnknowChars(List <_WordInnfo> ls, ref string temp, ref int i)
        {
            var temps = temp[i].ToString();
            var _w    = new _WordInnfo();

            for (int j = i + 1; j < temp.Length; j++, i++)
            {
                if (!Regex_Helper.Is_ACN(temp[j].ToString()) && !Regex_Helper.Is_Mark(temp[j].ToString()))
                {
                    temps += temp[j];
                }
                else
                {
                    i++;
                    break;
                }
            }
            _w.Name    = temps;
            _w.MaxType = WordType.Noun;
            ls.Add(_w);
            if (i != temp.Length - 1)
            {
                i--;
            }
        }
Example #5
0
 private static bool contains(_WordInnfo w, Contains c)
 {
     foreach (var item in w.TypeInfo)
     {
         if (c(item.Key))
         {
             return(true);
         }
     }
     return(false);
 }
Example #6
0
        private static List <KeyValuePair <WordType, int> > get(_WordInnfo w, Contains s)
        {
            List <KeyValuePair <WordType, int> > res = new List <KeyValuePair <WordType, int> >();

            foreach (var item in w.TypeInfo)
            {
                if (s(item.Key))
                {
                    res.Add(item);
                }
            }
            return(res);
        }
Example #7
0
        /// <summary>
        /// 子句结束处理函数
        /// </summary>
        private void subSentenceEnd(char ch)
        {
            /****************
             * 确定是否为子句添加一个标点
             * 如果子句以空格结束
             * ************************/
            var w = new _WordInnfo();

            if (MarkHelper.IsSubSentenceEndMark(ch))
            {
                w.Name = ch.ToString();
            }
            else if (MarkHelper.IsSentenceEndMark(ch))
            {
                w.Name = ch.ToString();
            }
            else
            {
                w.Name = '。'.ToString();
            }
            w.MaxType = WordType.Mark;
            _susentence.Words.Add(w);

            _susentence.Position.End = _currentPos;
            _isSubsentenceStarted    = false;

            /******************
            * 是否执行分次结果检测
            * ****************/
            if (_config.IsReflex)
            {
                _checker.Check(_susentence.Words);
            }

            /*************************
             * 发布子句解析完成事件
             * ********************/
            if (SubSenceFinish != null)
            {
                SubSenceFinish(_paragraph);
            }
        }
Example #8
0
        private void name()
        {
            switch (_token)
            {
            case ')':
                _w = new _WordInnfo();
                if (_reverseName)
                {
                    _w.Name = StringHelper.Reverse(_temp);
                }
                else
                {
                    _w.Name = _temp;
                }
                _temp  = string.Empty;
                _state = DicParserState.PreType;
                break;

            default:
                _temp += _token;
                break;
            }
        }
Example #9
0
        /// <summary>
        /// 判断类型
        /// reflexier也会使用 ,所以改成公开静态函数
        /// </summary>
        /// <param name="w"></param>
        public static void SetAlphaNumberType(_WordInnfo w)
        {
            var t = StringHelper.Count(w.Name, (x) => Regex_Helper.Is_Concrete_Number(x.ToString()));

            if (t == w.Name.Length)
            {
                w.MaxType = WordType.NumberConcrete;
            }
            else if (t == w.Name.Length - 1)
            {
                if (w.Name.Contains('.'.ToString()))
                {
                    w.MaxType = WordType.NumberConcrete;
                }
                else
                {
                    w.MaxType = WordType.NounAlphaNumberMark;
                }
            }
            else
            {
                w.MaxType = WordType.NounAlphaNumberMark;
            }
        }
Example #10
0
 public static List <KeyValuePair <WordType, int> > GetAdjectiveAdverb(this _WordInnfo w)
 => get(w, (x) => x.IsAdjectiveAdverb());
Example #11
0
 public static List <KeyValuePair <WordType, int> > GetPreposition(this _WordInnfo w)
 => get(w, (x) => x.IsPreposition());
Example #12
0
 public static List <KeyValuePair <WordType, int> > GetNumber(this _WordInnfo w)
 => get(w, (x) => x.IsNumber());
Example #13
0
 public static List <KeyValuePair <WordType, int> > GetConjunction(this _WordInnfo w)
 => get(w, (x) => x.IsConjunction());
Example #14
0
 public static bool ContainsAuxiliary(this _WordInnfo w)
 {
     return(contains(w, (x) => x.IsAuxliary()));
 }
Example #15
0
 /// <summary>
 /// 获取 所有verb
 /// </summary>
 /// <param name="w"></param>
 /// <returns></returns>
 public static List <KeyValuePair <WordType, int> > GetVerb(this _WordInnfo w)
 => get(w, (x) => x.IsVerb());
Example #16
0
 public static bool ContainsAdjectiveAdverb(this _WordInnfo w)
 {
     return(contains(w, (x) => x.IsAdjectiveAdverb()));
 }
Example #17
0
 public static bool ContainsCollective(this _WordInnfo w)
 {
     return(contains(w, (x) => x.IsConnective()));
 }
Example #18
0
 public static bool ContainsNoun(this _WordInnfo w)
 {
     return(contains(w, (x) => x.IsNoun()));
 }
Example #19
0
 public static bool ContainsPreposition(this _WordInnfo w)
 {
     return(contains(w, (x) => x.IsPreposition()));
 }
Example #20
0
 public static List <KeyValuePair <WordType, int> > GetClassifier(this _WordInnfo w)
 => get(w, (x) => x.IsClassification());
Example #21
0
 public static bool ContainsConjunction(this _WordInnfo w)
 {
     return(contains(w, (x) => x.IsConjunction()));
 }
Example #22
0
 public Vertex(_WordInnfo w)
 {
     Content = w;
 }
Example #23
0
 public static List <KeyValuePair <WordType, int> > GetAuxliary(this _WordInnfo w)
 => get(w, (x) => x.IsAuxliary());