public override void Load() {
            Bind<ProgramSettings>().ToConstant(ProgramSettings.Current).InSingletonScope();
            Bind<SmtpClient>().ToSelf();

            Bind<JsonSerializerSettings>().ToSelf()
                .WithPropertyValue("ContractResolver", new CamelCasePropertyNamesContractResolver())
                .WithPropertyValue("DateTimeZoneHandling", DateTimeZoneHandling.Utc);
            // Ninject无法注入Field,只能手动生成
            MatchOptions matchOptions = new MatchOptions();
            matchOptions.ChineseNameIdentify = true;
            matchOptions.EnglishMultiDimensionality = true;
            matchOptions.TraditionalChineseEnabled = true;
            Bind<MatchOptions>().ToConstant(matchOptions).InSingletonScope();

            // Infrastructure接口
            Bind<IWebDownload>().To<WebDownload>().InSingletonScope();

            Bind<IAppParser>().To<AppParser>()
                .WithConstructorArgument("truncateLimit", 200);

            Bind<IAppIndexer>().To<LuceneIndexer>().Named("Rebuild")
                .WithConstructorArgument("rebuild", true);
            Bind<IAppIndexer>().To<LuceneIndexer>().Named("Update")
                .WithConstructorArgument("rebuild", false);

            Bind<IUpdateNotifier>().To<UpdateNotifier>();
        }
Exemple #2
0
        public MatchOptions Clone()
        {
            MatchOptions result = new MatchOptions();

            foreach (FieldInfo fi in this.GetType().GetFields())
            {
                object value = fi.GetValue(this);
                fi.SetValue(result, value);
            }

            return(result);
        }
Exemple #3
0
        public AppParser(IWebDownload download, JsonSerializerSettings serializerSettings,
            int truncateLimit, MatchOptions segmentMatchOptions, ProgramSettings settings) {
            this.download = download;
            this.serializerSettings = serializerSettings;
            this.truncateLimit = truncateLimit;
            this.segmentMatchOptions = segmentMatchOptions;
            this.settings = settings;

            // Debug下为了Fiddler的Auto Responder能稳定拦截请求,需要对id进行排序
            if (settings.Debug) {
                this.output = new SortedSet<int>();
            }
            else {
                this.output = new HashSet<int>();
            }
        }
        public PanGuTokenizer(System.IO.TextReader input, MatchOptions options, MatchParameter parameters)
            : base(input) 
        {
            lock (_LockObj)
            {
                Init();
            }

            _InputText = base.input.ReadToEnd();

            if (string.IsNullOrEmpty(_InputText))
            {
                char[] readBuf = new char[1024];

                int relCount = base.input.Read(readBuf, 0, readBuf.Length);

                StringBuilder inputStr = new StringBuilder(readBuf.Length);


                while (relCount > 0)
                {
                    inputStr.Append(readBuf, 0, relCount);

                    relCount = input.Read(readBuf, 0, readBuf.Length);
                }

                if (inputStr.Length > 0)
                {
                    _InputText = inputStr.ToString();
                }
            }

            if (string.IsNullOrEmpty(_InputText))
            {
                _WordList = new WordInfo[0];
            }
            else
            {
                global::PanGu.Segment segment = new Segment();
                ICollection<WordInfo> wordInfos = segment.DoSegment(_InputText, options, parameters);
                _WordList = new WordInfo[wordInfos.Count];
                wordInfos.CopyTo(_WordList, 0);
            }
        }
Exemple #5
0
        public ICollection <WordInfo> DoSegment(string text, Match.MatchOptions options, Match.MatchParameter parameters)
        {
            if (string.IsNullOrEmpty(text))
            {
                return(new SuperLinkedList <WordInfo>());
            }

            try
            {
                Dict.DictionaryLoader.Lock.Enter(PanGu.Framework.Lock.Mode.Share);
                _Options    = options;
                _Parameters = parameters;

                Init();

                if (_Options == null)
                {
                    _Options = Setting.PanGuSettings.Config.MatchOptions;
                }

                if (_Parameters == null)
                {
                    _Parameters = Setting.PanGuSettings.Config.Parameters;
                }

                SuperLinkedList <WordInfo> result = PreSegment(text);

                if (_Options.FilterStopWords)
                {
                    FilterStopWord(result);
                }

                ProcessAfterSegment(text, result);

                return(result);
            }
            finally
            {
                Dict.DictionaryLoader.Lock.Leave();
            }
        }
 public PanGuAnalyzer(MatchOptions options, MatchParameter parameters)
     : base()
 {
     _options = options;
     _parameters = parameters;
 }
Exemple #7
0
        public SuperLinkedList <WordInfo> Match(PanGu.Dict.PositionLength[] positionLenArr, string orginalText, int count)
        {
            if (_Options == null)
            {
                _Options = Setting.PanGuSettings.Config.MatchOptions;
            }

            if (_Parameters == null)
            {
                _Parameters = Setting.PanGuSettings.Config.Parameters;
            }

            int[] masks      = new int[orginalText.Length];
            int   redundancy = _Parameters.Redundancy;

            SuperLinkedList <WordInfo> result = new SuperLinkedList <WordInfo>();

            if (count == 0)
            {
                if (_Options.UnknownWordIdentify)
                {
                    WordInfo wi = new WordInfo();
                    wi.Word     = orginalText;
                    wi.Position = 0;
                    wi.WordType = WordType.None;
                    wi.Rank     = 1;
                    result.AddFirst(wi);
                    return(result);
                }
                else
                {
                    int position = 0;
                    foreach (char c in orginalText)
                    {
                        WordInfo wi = new WordInfo();
                        wi.Word     = c.ToString();
                        wi.Position = position++;
                        wi.WordType = WordType.None;
                        wi.Rank     = 1;
                        result.AddLast(wi);
                    }

                    return(result);
                }
            }

            Node[] leafNodeArray = GetLeafNodeArray(positionLenArr, orginalText.Length, count);

            //下面两句是不采用孤立点分割算法的老算法
            //Node[] leafNodeArray = GetLeafNodeArrayCore(positionLenArr, orginalText.Length, count);
            //Framework.QuickSort<Node>.TopSort(leafNodeArray,
            //    _LeafNodeList.Count, (int)Math.Min(TopRecord, _LeafNodeList.Count), new NodeComparer());

            int j = 0;

            // 获取前TopRecord个单词序列
            foreach (Node node in leafNodeArray)
            {
                if (leafNodeArray[j] == null)
                {
                    break;
                }

                if (j >= TopRecord || j >= leafNodeArray.Length)
                {
                    break;
                }

                Dict.PositionLength[] comb = new PanGu.Dict.PositionLength[node.AboveCount];

                int  i   = node.AboveCount - 1;
                Node cur = node;

                while (i >= 0)
                {
                    comb[i] = cur.PositionLength;
                    cur     = cur.Parent;
                    i--;
                }

                _AllCombinations.Add(comb);

                j++;
            }

            //Force single word
            //强制一元分词
            if (_Options.ForceSingleWord)
            {
                Dict.PositionLength[] comb = new PanGu.Dict.PositionLength[orginalText.Length];

                for (int i = 0; i < comb.Length; i++)
                {
                    PanGu.Dict.PositionLength pl = new PanGu.Dict.PositionLength(i, 1, new WordAttribute(orginalText[i].ToString(), POS.POS_UNK, 0));
                    pl.Level = 3;
                    comb[i]  = pl;
                }

                _AllCombinations.Add(comb);
            }

            if (_AllCombinations.Count > 0)
            {
                ICollection <Dict.PositionLength> positionCollection = MergeAllCombinations(redundancy);

                foreach (Dict.PositionLength pl in positionCollection)
                //for (int i = 0; i < _AllCombinations[0].Length; i++)
                {
                    //result.AddLast(new WordInfo(_AllCombinations[0][i], orginalText));
                    result.AddLast(new WordInfo(pl, orginalText, _Parameters));
                    if (pl.Length > 1)
                    {
                        for (int k = pl.Position;
                             k < pl.Position + pl.Length; k++)
                        {
                            masks[k] = 2;
                        }
                    }
                    else
                    {
                        masks[pl.Position] = 1;
                    }
                }
            }

            #region 合并未登录词

            bool            needRemoveSingleWord;
            List <WordInfo> unknownWords = GetUnknowWords(masks, orginalText, out needRemoveSingleWord);

            //合并到结果序列的对应位置中
            if (unknownWords.Count > 0)
            {
                SuperLinkedListNode <WordInfo> cur = result.First;

                if (needRemoveSingleWord && !_Options.ForceSingleWord)
                {
                    //Remove single word need be remvoed

                    while (cur != null)
                    {
                        if (cur.Value.Word.Length == 1)
                        {
                            if (masks[cur.Value.Position] == 11)
                            {
                                SuperLinkedListNode <WordInfo> removeItem = cur;

                                cur = cur.Next;

                                result.Remove(removeItem);

                                continue;
                            }
                        }

                        cur = cur.Next;
                    }
                }

                cur = result.First;

                j = 0;

                while (cur != null)
                {
                    if (cur.Value.Position >= unknownWords[j].Position)
                    {
                        result.AddBefore(cur, unknownWords[j]);
                        j++;
                        if (j >= unknownWords.Count)
                        {
                            break;
                        }
                    }

                    if (cur.Value.Position < unknownWords[j].Position)
                    {
                        cur = cur.Next;
                    }
                }

                while (j < unknownWords.Count)
                {
                    result.AddLast(unknownWords[j]);
                    j++;
                }
            }


            #endregion



            return(result);
        }
        public MatchOptions Clone()
        {
            MatchOptions result = new MatchOptions();

            foreach (FieldInfo fi in this.GetType().GetFields())
            {
                object value = fi.GetValue(this);
                fi.SetValue(result, value);
            }

            return result;
        }
Exemple #9
0
 public ICollection <WordInfo> DoSegment(string text, Match.MatchOptions options)
 {
     return(DoSegment(text, options, null));
 }
Exemple #10
0
        public ICollection<WordInfo> DoSegment(string text, Match.MatchOptions options, Match.MatchParameter parameters)
        {
            if (string.IsNullOrEmpty(text))
            {
                return new SuperLinkedList<WordInfo>();
            }

            try
            {
                Dict.DictionaryLoader.Lock.Enter(PanGu.Framework.Lock.Mode.Share);
                _Options = options;
                _Parameters = parameters;

                Init();

                if (_Options == null)
                {
                    _Options = Setting.PanGuSettings.Config.MatchOptions;
                }

                if (_Parameters == null)
                {
                    _Parameters = Setting.PanGuSettings.Config.Parameters;
                }

                SuperLinkedList<WordInfo> result = PreSegment(text);

                if (_Options.FilterStopWords)
                {
                    FilterStopWord(result);
                }

                ProcessAfterSegment(result);

                return result;
            }
            finally
            {
                Dict.DictionaryLoader.Lock.Leave();
            }
        }
 public PanGuTokenizer(System.IO.TextReader input, bool originalResult, MatchOptions options, MatchParameter parameters)
     : this(input, options, parameters)
 {
     _OriginalResult = originalResult;
 }