Ejemplo n.º 1
0
 public List<IDataNode> Load()
 {
     List<IDataNode> nodes = new List<IDataNode>();
     using (StreamReader sr = new StreamReader(setting.Uri))
     {
         string line = sr.ReadLine();
         while (line != null)
         {
             string[] parts = line.Split(new char[] { ' ' });
             WordAttribute wa = new WordAttribute();
             if (parts.Length == 3)
             {
                 wa.Word = parts[0];
                 wa.Frequency = Double.Parse(parts[1]);
                 wa.POS = (BluePrint.SegmentFramework.POSType)Convert.ToInt32(parts[2]);
             }
             else
             {
                 wa.Word = parts[0];
             }
             nodes.Add(wa);
             line = sr.ReadLine();
         }
     }
     return nodes;
 }
Ejemplo n.º 2
0
 public PositionLength(int position, int length, WordAttribute wordAttr)
 {
     this.Position = position;
     this.Length   = length;
     this.WordAttr = wordAttr;
     this.Level    = 0;
 }
Ejemplo n.º 3
0
        private bool IsKnownSingleWord(int[] masks, int index, string orginalText)
        {
            int state = masks[index];

            if (state == 2)
            {
                return(false);
            }

            if (state == 1)
            {
                if (!_Options.UnknownWordIdentify)
                {
                    return(false);
                }

                //如果单字是连词、助词、介词、代词
                WordAttribute wa = _WordDict.GetWordAttr(orginalText[index].ToString());

                if (wa != null)
                {
                    if ((wa.Pos & SingleWordMask) != 0)
                    {
                        return(false);
                    }
                }
            }

            return(true);
        }
Ejemplo n.º 4
0
 public PositionLength(int position, int length, WordAttribute wordAttr)
 {
     this.Position = position;
     this.Length = length;
     this.WordAttr = wordAttr;
     this.Level = 0;
 }
Ejemplo n.º 5
0
        private void BatchInsert(String fileName, String encoder)
        {
            String content = PanGu.Framework.File.ReadFileToString(fileName, Encoding.GetEncoding(encoder));

            String[] words = PanGu.Framework.Regex.Split(content, @"\r\n");

            bool          allUse  = false;
            WordAttribute lstWord = null;

            foreach (String word in words)
            {
                if (word == null)
                {
                    continue;
                }

                if (word.Trim() == "")
                {
                    continue;
                }

                string[] strs = word.Split(new char[] { '|' });

                if (strs.Length == 3)
                {
                    try
                    {
                        POS pos = (POS)int.Parse(strs[1].Substring(2, strs[1].Length - 2),
                                                 System.Globalization.NumberStyles.HexNumber);
                        double frequency = double.Parse(strs[2]);
                        string w         = strs[0].Trim();
                        _WordDict.InsertWord(w, frequency, pos);
                        continue;
                    }
                    catch
                    {
                    }
                }


                FormBatchInsert frmBatchInsert = new FormBatchInsert();

                if (!allUse || lstWord == null)
                {
                    frmBatchInsert.Word.Word = word.Trim();

                    if (frmBatchInsert.ShowDialog() == DialogResult.OK)
                    {
                        lstWord = frmBatchInsert.Word;
                        allUse  = frmBatchInsert.AllUse;
                        _WordDict.InsertWord(lstWord.Word, lstWord.Frequency, lstWord.Pos);
                    }
                }
                else
                {
                    lstWord.Word = word.Trim();
                    _WordDict.InsertWord(lstWord.Word, lstWord.Frequency, lstWord.Pos);
                }
            }
        }
Ejemplo n.º 6
0
        private void textBoxWord_TextChanged(object sender, EventArgs e)
        {
            String word = textBoxWord.Text.Trim();

            if (word == "")
            {
                buttonUpdate.Enabled = false;
                buttonInsert.Enabled = false;
                buttonDelete.Enabled = false;
                return;
            }

            WordAttribute selWord = _WordDict.GetWordAttr(word);

            if (selWord != null)
            {
                buttonUpdate.Enabled         = true;
                buttonInsert.Enabled         = false;
                buttonDelete.Enabled         = true;
                numericUpDownFrequency.Value = (decimal)selWord.Frequency;
                posCtrl.Pos = (int)selWord.Pos;
            }
            else
            {
                buttonUpdate.Enabled         = false;
                buttonInsert.Enabled         = true;
                buttonDelete.Enabled         = false;
                numericUpDownFrequency.Value = 0;
                posCtrl.Pos = 0;
            }
        }
Ejemplo n.º 7
0
        private WordDictionaryFile LoadFromTextFile(String fileName)
        {
            WordDictionaryFile dictFile = new WordDictionaryFile();

            dictFile.Dicts = new List <WordAttribute>();

            using (StreamReader sr = new StreamReader(fileName, Encoding.UTF8))
            {
                while (!sr.EndOfStream)
                {
                    string line = sr.ReadLine();

                    string[] strs = line.Split(new char[] { '|' });

                    if (strs.Length == 3)
                    {
                        string word = strs[0].Trim();

                        POS           pos       = (POS)int.Parse(strs[1].Substring(2, strs[1].Length - 2), System.Globalization.NumberStyles.HexNumber);
                        double        frequency = double.Parse(strs[2]);
                        WordAttribute dict      = new WordAttribute(word, pos, frequency);

                        dictFile.Dicts.Add(dict);
                    }
                }
            }

            return(dictFile);
        }
Ejemplo n.º 8
0
        public Dictionary()
        {
            dictionary.Create <Word>();

            DataTable data = DataTable.ReadCSV(ResourceAccessor.GetResource <Stream>("NightlyCode.StreamRC.Gangolf.Dictionary.dictionary.csv"), ';', true);

            for (int row = 0; row < data.RowCount; ++row)
            {
                WordAttribute attributes = WordAttribute.None;
                if (!string.IsNullOrEmpty(data.TryGetValue <string>(row, "Insultive")))
                {
                    attributes |= WordAttribute.Insultive;
                }
                if (!string.IsNullOrEmpty(data.TryGetValue <string>(row, "Romantic")))
                {
                    attributes |= WordAttribute.Romantic;
                }
                if (!string.IsNullOrEmpty(data.TryGetValue <string>(row, "Product")))
                {
                    attributes |= WordAttribute.Product;
                }
                if (!string.IsNullOrEmpty(data.TryGetValue <string>(row, "Tool")))
                {
                    attributes |= WordAttribute.Tool;
                }
                if (!string.IsNullOrEmpty(data.TryGetValue <string>(row, "Producer")))
                {
                    attributes |= WordAttribute.Producer;
                }
                if (!string.IsNullOrEmpty(data.TryGetValue <string>(row, "Color")))
                {
                    attributes |= WordAttribute.Color;
                }
                if (!string.IsNullOrEmpty(data.TryGetValue <string>(row, "Political")))
                {
                    attributes |= WordAttribute.Political;
                }
                if (!string.IsNullOrEmpty(data.TryGetValue <string>(row, "Descriptive")))
                {
                    attributes |= WordAttribute.Descriptive;
                }

                int group;
                int.TryParse(data.TryGetValue <string>(row, "Conjunktion"), out group);

                foreach (Tuple <string, WordClass> word in ExtractWord(data, row))
                {
                    WordAttribute termattributes = attributes;
                    if (word.Item2 == WordClass.Subject)
                    {
                        termattributes |= WordAttribute.Object;
                    }

                    dictionary.Insert <Word>().Columns(w => w.Text, w => w.Class, w => w.Attributes, w => w.Group)
                    .Values(word.Item1, word.Item2, termattributes, group)
                    .Execute();
                }
            }
        }
 public PositionLength(int position, int length, WordAttribute wordAttr, WordClass wordClass = WordClass.Default)
 {
     this.Position  = position;
     this.Length    = length;
     this.WordAttr  = wordAttr;
     this.Level     = 0;
     this.WordClass = wordClass;
 }
Ejemplo n.º 10
0
 // Constructor
 public PhraseDefinition(WordAttribute attribute, List <Phrase> forms, List <Phrase> synonyms, List <Phrase> opposites, string definition, string abs)
 {
     // Initialize members
     Attribute  = attribute;
     Forms      = forms;
     Synonyms   = synonyms;
     Opposites  = opposites;
     Definition = definition;
     Abstract   = abs;
 }
Ejemplo n.º 11
0
        /// <summary>
        /// 匹配关键词属性
        /// </summary>
        /// <param name="keywords"></param>
        /// <returns></returns>
        public WordAttribute GetWordAttr(string keywords)
        {
            WordAttribute selWord = wordDict.GetWordAttr(keywords);

            if (selWord != null)
            {
                return(selWord);
            }
            return(null);
        }
        private WordDictionaryFile LoadFromBinFile(String fileName, out string verNumStr)
        {
            WordDictionaryFile dictFile = new WordDictionaryFile();

            dictFile.Dicts = new List <WordAttribute>();

            Stream stream = null;

            if (File.Exists(fileName))
            {
                stream = new FileStream(fileName, FileMode.Open, FileAccess.Read);
            }
            else
            {
                stream = GetStreamFromResources(Path.GetFileName(fileName));
            }

            byte[] version = new byte[32];
            stream.Read(version, 0, version.Length);

            String ver = Encoding.UTF8.GetString(version, 0, version.Length);

            int zeroPosition = ver.IndexOf('\0');

            if (zeroPosition >= 0)
            {
                ver = ver.Substring(0, zeroPosition);
            }

            verNumStr = Framework.Regex.GetMatch(ver, "Pan Gu Segment V(.+)", true);

            while (stream.Position < stream.Length)
            {
                byte[] buf = new byte[sizeof(int)];
                stream.Read(buf, 0, buf.Length);
                int length = BitConverter.ToInt32(buf, 0);

                buf = new byte[length];

                stream.Read(buf, 0, buf.Length);

                string word      = Encoding.UTF8.GetString(buf, 0, length - sizeof(int) - sizeof(double));
                POS    pos       = (POS)BitConverter.ToInt32(buf, length - sizeof(int) - sizeof(double));
                double frequency = BitConverter.ToDouble(buf, length - sizeof(double));

                WordAttribute dict = new WordAttribute(word, pos, frequency);
                string.Intern(dict.Word);

                dictFile.Dicts.Add(dict);
            }

            stream.Close();

            return(dictFile);
        }
Ejemplo n.º 13
0
        /// <summary>
        /// 匹配关键词属性
        /// </summary>
        /// <param name="keywords"></param>
        /// <returns></returns>
        public double GetWordAttr(string keywords, out int pos)
        {
            pos = -1;
            WordAttribute selWord = wordDict.GetWordAttr(keywords);

            if (selWord != null)
            {
                pos = (int)selWord.Pos;
                return(selWord.Frequency);
            }
            return(0);
        }
Ejemplo n.º 14
0
        /// <summary>
        /// 新增关键词
        /// </summary>
        /// <param name="keywords">关键词</param>
        /// <param name="number">词频</param>
        /// <param name="pos">词性</param>
        /// <returns></returns>
        public bool InsertWord(string keywords, double number, int pos)
        {
            if (string.IsNullOrEmpty(keywords))
            {
                return(false);
            }
            WordAttribute selWord = wordDict.GetWordAttr(keywords);

            if (null != selWord)
            {
                return(false);
            }
            wordDict.InsertWord(keywords, number, (POS)pos);
            return(true);
        }
Ejemplo n.º 15
0
        public Node <INode> GetNode(string nodeName)
        {
            switch (nodeName)
            {
            case ValueAttribute.NAME:
                INode nodeValueAttribute = new ValueAttribute() as INode;
                return(new Node <INode> (nodeValueAttribute));

            case WordAttribute.NAME:
                INode nodeWordAttribute = new WordAttribute() as INode;
                return(new Node <INode> (nodeWordAttribute));

            default:
                return(null);
            }
        }
Ejemplo n.º 16
0
        private async Task <WordDictionaryFile> LoadFromBinFile(String fileName)
        {
            InitConvertDictionary();
            WordDictionaryFile dictFile = new WordDictionaryFile();

            dictFile.Dicts = new List <WordAttribute>();
            try
            {
                //FileStream fs = new FileStream(fileName, FileMode.Open, FileAccess.Read);
                using (Stream fs = await GlobalAccess.DirectoryService.OpenFile(fileName, FileModeEnum.Open))
                {
                    byte[] version = new byte[32];
                    fs.Read(version, 0, 32);
                    string temp = "";
                    String ver  = Encoding.UTF8.GetString(version, 0, version.Length);
                    while (fs.CanRead)
                    {
                        byte[] buf = new byte[sizeof(int)];
                        fs.Read(buf, 0, buf.Length);
                        int length = BitConverter.ToInt32(buf, 0);

                        buf = new byte[length];

                        fs.Read(buf, 0, buf.Length);
                        if (length - sizeof(int) - sizeof(double) <= 0)
                        {
                            break;
                        }
                        string word      = Encoding.UTF8.GetString(buf, 0, length - sizeof(int) - sizeof(double));
                        POS    pos       = (POS)BitConverter.ToInt32(buf, length - sizeof(int) - sizeof(double));
                        double frequency = BitConverter.ToDouble(buf, length - sizeof(double));

                        WordAttribute dict = new WordAttribute(word, pos, frequency);
                        //string.Intern(dict.Word);
                        dictFile.Dicts.Add(dict);
                        temp = word;
                    }
                }

                return(dictFile);
            }
            catch (Exception)
            {
                throw;
            }
        }
Ejemplo n.º 17
0
        private void BatchInsert(String fileName, String encoder)
        {
            String content = PanGu.Framework.File.ReadFileToString(fileName, Encoding.GetEncoding(encoder));

            String[] words = PanGu.Framework.Regex.Split(content, @"\r\n");

            bool          allUse  = false;
            WordAttribute lstWord = null;

            foreach (String word in words)
            {
                if (word == null)
                {
                    continue;
                }

                if (word.Trim() == "")
                {
                    continue;
                }

                FormBatchInsert frmBatchInsert = new FormBatchInsert();

                if (!allUse || lstWord == null)
                {
                    frmBatchInsert.Word.Word = word.Trim();

                    if (frmBatchInsert.ShowDialog() == DialogResult.OK)
                    {
                        lstWord = frmBatchInsert.Word;
                        allUse  = frmBatchInsert.AllUse;
                        _WordDict.InsertWord(lstWord.Word, lstWord.Frequency, lstWord.Pos);
                    }
                }
                else
                {
                    lstWord.Word = word.Trim();
                    _WordDict.InsertWord(lstWord.Word, lstWord.Frequency, lstWord.Pos);
                }
            }
        }
Ejemplo n.º 18
0
        private async Task <WordDictionaryFile> LoadFromTextFile(String fileName)
        {
            InitConvertDictionary();
            WordDictionaryFile dictFile = new WordDictionaryFile();

            try
            {
                dictFile       = new WordDictionaryFile();
                dictFile.Dicts = new List <WordAttribute>();
                using (Stream s = await GlobalAccess.DirectoryService.OpenFile(fileName, FileModeEnum.Open))
                {
                    using (StreamReader sr = new StreamReader(s, Encoding.UTF8))
                    {
                        while (!sr.EndOfStream)
                        {
                            string line = sr.ReadLine();

                            string[] strs = line.Split(new char[] { '|' });

                            if (strs.Length == 3)
                            {
                                string word = strs[0].Trim();

                                POS           pos       = (POS)Enum.Parse(typeof(POS), strs[1]);
                                double        frequency = double.Parse(strs[2]);
                                WordAttribute dict      = new WordAttribute(word, pos, frequency);

                                dictFile.Dicts.Add(dict);
                            }
                        }
                    }
                }

                return(dictFile);
            }
            catch (Exception)
            {
                throw;
            }
        }
Ejemplo n.º 19
0
        private WordDictionaryFile LoadFromBinFile(String fileName, out string verNumStr)
        {
            WordDictionaryFile dictFile = new WordDictionaryFile();
            dictFile.Dicts = new List<IDataNode>();

            FileStream fs = new FileStream(fileName, FileMode.Open, FileAccess.Read);

            byte[] version = new byte[32];
            fs.Read(version, 0, version.Length);

            String ver = Encoding.UTF8.GetString(version, 0, version.Length);

            int zeroPosition = ver.IndexOf('\0');
            if (zeroPosition >= 0)
            {
                ver = ver.Substring(0, zeroPosition);
            }

            var matches = verRegex.Matches(ver);
            if (matches.Count > 0)
                verNumStr = matches[0].Value;
            else
                verNumStr = null;

            while (fs.Position < fs.Length)
            {
                byte[] buf = new byte[sizeof(int)];
                fs.Read(buf, 0, buf.Length);
                int length = BitConverter.ToInt32(buf, 0);

                buf = new byte[length];

                fs.Read(buf, 0, buf.Length);

                string word = Encoding.UTF8.GetString(buf, 0, length - sizeof(int) - sizeof(double));
                POSType pos = (POSType)BitConverter.ToInt32(buf, length - sizeof(int) - sizeof(double));
                double frequency = BitConverter.ToDouble(buf, length - sizeof(double));

                WordAttribute dict = new WordAttribute(word, pos, frequency);
                string.Intern(dict.Word);

                dictFile.Dicts.Add(dict);
            }

            fs.Close();

            return dictFile;
        }
Ejemplo n.º 20
0
        public void InsertWord(String word, double frequency, POS pos)
        {
            if (_WordDict == null)
            {
                return;
            }

            string key = word.ToLower();

            if (key.Length == 1)
            {
                if (_FirstCharDict.ContainsKey(key[0]))
                {
                    return;
                }
            }

            if (key.Length == 2)
            {
                uint doubleChar = ((uint)key[0] * 65536) + key[1];
                if (_DoubleCharDict.ContainsKey(doubleChar))
                {
                    return;
                }
            }

            if (_WordDict.ContainsKey(key))
            {
                return;
            }

            WordAttribute wa = new WordAttribute(word, pos, frequency);

            if (key.Length == 1)
            {
                if (!_FirstCharDict.ContainsKey(key[0]))
                {
                    _FirstCharDict.Add(key[0], wa);
                    return;
                }
            }

            if (key.Length == 2)
            {
                uint doubleChar = ((uint)key[0] * 65536) + key[1];
                if (!_DoubleCharDict.ContainsKey(doubleChar))
                {
                    _DoubleCharDict.Add(doubleChar, wa);
                    return;
                }
            }

            _WordDict.Add(key, wa);

            long tripleChar = ((long)key[0]) * 0x100000000 + (uint)(key[1] * 65536) + key[2];

            byte[] wordLenArray;
            if (!_TripleCharDict.TryGetValue(tripleChar, out wordLenArray))
            {
                wordLenArray    = new byte[4];
                wordLenArray[0] = (byte)key.Length;

                _TripleCharDict.Add(tripleChar, wordLenArray);
            }
            else
            {
                bool find = false;
                int  i;
                for (i = 0; i < wordLenArray.Length; i++)
                {
                    byte len = wordLenArray[i];
                    if (len == key.Length)
                    {
                        find = true;
                        break;
                    }

                    if (len == 0)
                    {
                        wordLenArray[i] = (byte)key.Length;
                        find            = true;
                        break;
                    }
                }

                if (!find)
                {
                    byte[] temp = new byte[wordLenArray.Length * 2];

                    wordLenArray.CopyTo(temp, 0);
                    wordLenArray    = temp;
                    wordLenArray[i] = (byte)key.Length;

                    _TripleCharDict[tripleChar] = wordLenArray;
                }
            }
        }
Ejemplo n.º 21
0
        public Framework.AppendList <PositionLength> GetAllMatchs(string text, bool chineseNameIdentify)
        {
            Framework.AppendList <PositionLength> result = new PanGu.Framework.AppendList <PositionLength>();

            if (text == null && text == "")
            {
                return(result);
            }

            string keyText = text;

            if (text[0] < 128)
            {
                keyText = keyText.ToLower();
            }

            for (int i = 0; i < text.Length; i++)
            {
                byte[] lenList;
                char   fst = keyText[i];

                List <string> chsNames = null;

                if (chineseNameIdentify)
                {
                    chsNames = ChineseName.Match(text, i);

                    if (chsNames != null)
                    {
                        foreach (string name in chsNames)
                        {
                            WordAttribute wa = new WordAttribute(name, POS.POS_A_NR, 0);

                            result.Add(new PositionLength(i, name.Length, wa));
                        }
                    }
                }


                WordAttribute fwa;
                if (_FirstCharDict.TryGetValue(fst, out fwa))
                {
                    result.Add(new PositionLength(i, 1, fwa));
                }

                if (i < keyText.Length - 1)
                {
                    uint doubleChar = ((uint)keyText[i] * 65536) + keyText[i + 1];

                    if (_DoubleCharDict.TryGetValue(doubleChar, out fwa))
                    {
                        result.Add(new PositionLength(i, 2, fwa));
                    }
                }

                if (i >= keyText.Length - 2)
                {
                    continue;
                }

                long tripleChar = ((long)keyText[i]) * 0x100000000 + (uint)(keyText[i + 1] * 65536) + keyText[i + 2];

                if (_TripleCharDict.TryGetValue(tripleChar, out lenList))
                {
                    foreach (byte len in lenList)
                    {
                        if (len == 0)
                        {
                            break;
                        }

                        if (i + len > keyText.Length)
                        {
                            continue;
                        }

                        string key = keyText.Substring(i, len);

                        WordAttribute wa;

                        if (_WordDict.TryGetValue(key, out wa))
                        {
                            if (chsNames != null)
                            {
                                bool find = false;

                                foreach (string name in chsNames)
                                {
                                    if (wa.Word == name)
                                    {
                                        find = true;
                                        break;
                                    }
                                }

                                if (find)
                                {
                                    continue;
                                }
                            }

                            result.Add(new PositionLength(i, len, wa));
                        }
                    }
                }
            }

            return(result);
        }
Ejemplo n.º 22
0
 // Notice difference definitions might share the same WordAttribute, e.g. different meanings/senses of the same noun
 public void AddDefinition(WordAttribute attribute, List <Phrase> forms, List <Phrase> synonyms, List <Phrase> opposites, string definition, string abs)
 {
     Definitions.Add(new PhraseDefinition(attribute, forms, synonyms, opposites, definition, abs));
 }
Ejemplo n.º 23
0
        private bool MergeEnglishSpecialWord(string orginalText, SuperLinkedList <WordInfo> wordInfoList, ref SuperLinkedListNode <WordInfo> current)
        {
            SuperLinkedListNode <WordInfo> cur = current;

            cur = cur.Next;

            int last = -1;

            while (cur != null)
            {
                if (cur.Value.WordType == WordType.Symbol || cur.Value.WordType == WordType.English)
                {
                    last = cur.Value.Position + cur.Value.Word.Length;
                    cur  = cur.Next;
                }
                else
                {
                    break;
                }
            }


            if (last >= 0)
            {
                int first = current.Value.Position;

                string newWord = orginalText.Substring(first, last - first);

                WordAttribute wa = _WordDictionary.GetWordAttr(newWord);

                if (wa == null)
                {
                    return(false);
                }

                while (current != cur)
                {
                    SuperLinkedListNode <WordInfo> removeItem = current;
                    current = current.Next;
                    wordInfoList.Remove(removeItem);
                }

                WordInfo newWordInfo = new WordInfo(new Dict.PositionLength(first, last - first,
                                                                            wa), orginalText, _Parameters);

                newWordInfo.WordType = WordType.English;
                newWordInfo.Rank     = _Parameters.EnglishRank;

                if (_Options.EnglishSegment)
                {
                    string lowerWord = newWordInfo.Word.ToLower();

                    if (lowerWord != newWordInfo.Word)
                    {
                        if (current == null)
                        {
                            wordInfoList.AddLast(newWordInfo);
                        }
                        else
                        {
                            wordInfoList.AddBefore(current, newWordInfo);
                        }
                    }

                    newWordInfo = new WordInfo(lowerWord, newWordInfo.Position, newWordInfo.Pos, newWordInfo.Frequency, _Parameters.EnglishLowerRank, newWordInfo.WordType,
                                               newWordInfo.OriginalWordType);
                }
                else if (_Options.IgnoreCapital)
                {
                    newWordInfo.Word = newWordInfo.Word.ToLower();
                }

                if (current == null)
                {
                    wordInfoList.AddLast(newWordInfo);
                }
                else
                {
                    wordInfoList.AddBefore(current, newWordInfo);
                }

                return(true);
            }


            return(false);
        }
Ejemplo n.º 24
0
        private WordDictionaryFile LoadFromBinFile(String fileName, out string verNumStr)
        {
            WordDictionaryFile dictFile = new WordDictionaryFile();
            dictFile.Dicts = new List<WordAttribute>();

            FileStream fs = new FileStream(fileName, FileMode.Open, FileAccess.Read);

            byte[] version = new byte[32];
            fs.Read(version, 0, version.Length);

            String ver = Encoding.UTF8.GetString(version, 0, version.Length);

            int zeroPosition = ver.IndexOf('\0');
            if (zeroPosition >= 0)
            {
                ver = ver.Substring(0, zeroPosition);
            }

            verNumStr = Framework.Regex.GetMatch(ver, "Pan Gu Segment V(.+)", true);

            while (fs.Position < fs.Length)
            {
                byte[] buf = new byte[sizeof(int)];
                fs.Read(buf, 0, buf.Length);
                int length = BitConverter.ToInt32(buf, 0);

                buf = new byte[length];

                fs.Read(buf, 0, buf.Length);

                string word = Encoding.UTF8.GetString(buf, 0, length - sizeof(int) - sizeof(double));
                POS pos = (POS)BitConverter.ToInt32(buf, length - sizeof(int) - sizeof(double));
                double frequency = BitConverter.ToDouble(buf, length - sizeof(double));

                WordAttribute dict = new WordAttribute(word, pos, frequency);
                string.Intern(dict.Word);

                dictFile.Dicts.Add(dict);
            }

            fs.Close();

            return dictFile;
        }
Ejemplo n.º 25
0
        public string CreateInsult()
        {
            HashSet <long> used = new HashSet <long>();

            long[] usedids;

            StringBuilder text = new StringBuilder();

            Word          adjective       = dictionary.GetRandomWord(w => (w.Class & WordClass.Adjective) == WordClass.Adjective);
            WordAttribute attributefilter = WordAttribute.None;

            double chance = 0.36;

            do
            {
                if (adjective.Attributes.HasFlag(WordAttribute.Color))
                {
                    attributefilter |= WordAttribute.Color;
                }

                if (adjective.Attributes.HasFlag(WordAttribute.Political))
                {
                    attributefilter |= WordAttribute.Political;
                }

                text.Append(adjective).Append(' ');

                used.Add(adjective.ID);
                usedids   = used.ToArray();
                adjective = dictionary.GetRandomWord(w => (w.Class & WordClass.Adjective) == WordClass.Adjective &&
                                                     (w.Attributes & attributefilter) == WordAttribute.None &&
                                                     !usedids.Contains(w.ID));
                chance *= chance;
            }while(RNG.XORShift64.NextDouble() < chance);
            text.Append(adjective).Append(' ');
            used.Clear();

            Word noun;

            if (RNG.XORShift64.NextDouble() < 0.3)
            {
                Word verb = dictionary.GetRandomWord(w => (w.Class & WordClass.Verb) == WordClass.Verb);

                if (!verb.Attributes.HasFlag(WordAttribute.Insultive))
                {
                    noun = dictionary.GetRandomWord(w => w.Class == WordClass.Noun && (w.Attributes & WordAttribute.Insultive) == WordAttribute.Insultive);
                    text.Append(noun).Append('-');
                    used.Add(noun.ID);
                }

                text.Append(verb).Append(' ');
            }

            if (used.Count > 0)
            {
                usedids = used.ToArray();
                noun    = dictionary.GetRandomWord(w => (w.Class & (WordClass.Noun | WordClass.Subject)) != WordClass.None && (w.Attributes & WordAttribute.Object) != WordAttribute.None && w.ID != usedids[0]);
            }
            else
            {
                noun = dictionary.GetRandomWord(w => (w.Class & (WordClass.Noun | WordClass.Subject)) != WordClass.None && (w.Attributes & WordAttribute.Object) != WordAttribute.None);
            }

            used.Add(noun.ID);

            WordAttribute predicate = WordAttribute.Descriptive;

            if (noun.Attributes.HasFlag(WordAttribute.Product))
            {
                predicate |= WordAttribute.Producer;
            }
            if (!noun.Attributes.HasFlag(WordAttribute.Insultive))
            {
                predicate |= WordAttribute.Insultive;
            }

            usedids = used.ToArray();
            Word descriptive = dictionary.GetRandomWord(w => (w.Class & WordClass.Noun) != WordClass.None && (w.Attributes & predicate) == predicate && !usedids.Contains(w.ID));

            text.Append($"{descriptive.Text}{noun.Text}");
            if (noun.Class == WordClass.Noun && noun.Group > 0 && RNG.XORShift64.NextFloat() < 0.07)
            {
                Word postposition = dictionary.GetRandomWord(w => (w.Class & WordClass.Postposition) == WordClass.Postposition && (w.Group & noun.Group) != 0);
                if (postposition != null)
                {
                    text.Append(postposition);
                }
            }
            return(text.ToString());
        }
Ejemplo n.º 26
0
        public Framework.AppendList<PositionLength> GetAllMatchs(string text, bool chineseNameIdentify)
        {
            Framework.AppendList<PositionLength> result = new PanGu.Framework.AppendList<PositionLength>();

            if (text == null && text == "")
            {
                return result;
            }

            string keyText = text;

            if (text[0] < 128)
            {
                keyText = keyText.ToLower();
            }

            for (int i = 0; i < text.Length; i++)
            {

                byte[] lenList;
                char fst = keyText[i];

                List<string> chsNames = null;

                if (chineseNameIdentify)
                {
                    chsNames = ChineseName.Match(text, i);

                    if (chsNames != null)
                    {
                        foreach (string name in chsNames)
                        {
                            WordAttribute wa = new WordAttribute(name, POS.POS_A_NR, 0);

                            result.Add(new PositionLength(i, name.Length, wa));
                        }
                    }
                }


                WordAttribute fwa;
                if (_FirstCharDict.TryGetValue(fst, out fwa))
                {
                    result.Add(new PositionLength(i, 1, fwa));
                }

                if (i < keyText.Length - 1)
                {
                    uint doubleChar = ((uint)keyText[i] * 65536) + keyText[i+1];

                    if (_DoubleCharDict.TryGetValue(doubleChar, out fwa))
                    {
                        result.Add(new PositionLength(i, 2, fwa));
                    }
                }

                if (i >= keyText.Length - 2)
                {
                    continue;
                }

                long tripleChar = ((long)keyText[i]) * 0x100000000 + (uint)(keyText[i + 1] * 65536) + keyText[i+2];

                if (_TripleCharDict.TryGetValue(tripleChar, out lenList))
                {
                    foreach (byte len in lenList)
                    {
                        if (len == 0)
                        {
                            break;
                        }

                        if (i + len > keyText.Length)
                        {
                            continue;
                        }

                        string key = keyText.Substring(i, len);

                        WordAttribute wa;

                        if (_WordDict.TryGetValue(key, out wa))
                        {
                            if (chsNames != null)
                            {
                                bool find = false;

                                foreach (string name in chsNames)
                                {
                                    if (wa.Word == name)
                                    {
                                        find = true;
                                        break;
                                    }
                                }

                                if (find)
                                {
                                    continue;
                                }
                            }

                            result.Add(new PositionLength(i, len, wa));
                        }
                    }
                }
            }

            return result;
        }
Ejemplo n.º 27
0
        private WordDictionaryFile LoadFromTextFile(String fileName)
        {
            WordDictionaryFile dictFile = new WordDictionaryFile();
            dictFile.Dicts = new List<IDataNode>();

            using (StreamReader sr = new StreamReader(fileName, Encoding.UTF8))
            {
                while (!sr.EndOfStream)
                {
                    string line = sr.ReadLine();

                    string[] strs = line.Split(new char[] { '|' });

                    if (strs.Length == 3)
                    {
                        string word = strs[0].Trim();

                        POSType pos = (POSType)int.Parse(strs[1].Substring(2, strs[1].Length - 2), System.Globalization.NumberStyles.HexNumber);
                        double frequency = double.Parse(strs[2]);
                        WordAttribute dict = new WordAttribute(word, pos, frequency);

                        dictFile.Dicts.Add(dict);
                    }
                }
            }

            return dictFile;
        }
Ejemplo n.º 28
0
        // Match Functions
        /// <summary>
        /// Match input sentence with current element and output relavent parts, by nature of design elements only match as long as it can recognize and doesn't require input sentence to be exact length
        /// bOptional should be checked by caller
        /// </summary>
        /// <param name="content">string to be matched from begining</param>
        /// <param name="consumed">actual number of characters consumed during the match</param>
        /// <returns>An element instance if match found, otherwise null; Caller might also want to remove trailing space</returns>
        /// <Debug> By design MatchELement doesn't consider English spacing, so caller must be cautious about that since consumed doesn't count ending spaces</Debug>
        public PatternElementInstance MatchElement(string content, VocabularyManager vocabulary, out int consumed)
        {
            switch (Type)
            {
            case PatternElementType.SpecificWord:
                consumed = content.IndexOf(content.TrimStart());
                if (content.TrimStart().IndexOf(Key.ToLower()) == 0)
                {
                    consumed += Key.Length;
                    return(new PatternElementInstance(Type, Key));
                }
                break;

            case PatternElementType.VarietyWord:
                consumed = content.IndexOf(content.TrimStart());
                if (vocabulary.IsPraseVaryingFormOrSynonymUndetermined(content.TrimStart(), Key, ref consumed) == true)
                {
                    return(new PatternElementInstance(Type, content.Substring(0, consumed)));
                }
                break;

            case PatternElementType.WordAttribute:
                // We do not explicitly trim for this; Handling of beginning white spaces dealt with below
                if (Key == "any")
                {
                    throw new Exception("Any should be handled outside");
                }
                else
                {
                    // Get attribtues to match; Attributes are guaranted to be valid at load time
                    bool          bInfinite  = (Key.ElementAt(0) == '*');
                    string[]      attributes = Key.Split(new char[] { '+', '*' });
                    WordAttribute attribute  = 0;
                    foreach (string a in attributes)
                    {
                        if (string.IsNullOrWhiteSpace(a))
                        {
                            continue;
                        }
                        attribute |= (WordAttribute)Enum.Parse(typeof(WordAttribute), a);
                    }
                    consumed = 0;
                    // The input must be recognziable so it's gonna be a phrase of some kind
                    Phrase phrase = vocabulary.GetPhrase(content);     // <Improvement> Could we be matching the shortest attribute? // <Warning> GetPhrase() trimmed, so phrase.Length might not equal actual consumed characters
                    while (phrase != null)
                    {
                        // Try match against attributes
                        if ((phrase.Attribute & attribute) == attribute || attribute == WordAttribute.any)
                        {
                            consumed += content.IndexOf(phrase.Key) + phrase.Key.Length;    // Use content.IndexOf(phrase.Key) first to find where in the original string our phrase is is necessary for sometimes there might be some spaces in front of it
                            phrase    = vocabulary.GetPhrase(content.Substring(consumed));  // Continue with next phrase
                        }
                        else
                        {
                            phrase = null;
                        }
                        if (!bInfinite)
                        {
                            break;
                        }
                    }
                    if (consumed != 0)
                    {
                        return(new PatternElementInstance(Type, content.Substring(0, consumed).Trim()));       // Return that many elements as one single phrase (which by itself may not exist in the library)
                        // <Development> This can be utilzied by action handlers for learning new expressions e.g. "big shinny red juicy" apple
                    }
                }
                break;

            case PatternElementType.SubPattern:
                consumed = 0;
                PatternInstance subPatternInstance = SubPattern.Match(content, vocabulary, ref consumed, false);
                if (subPatternInstance != null)
                {
                    return(new PatternElementInstance(Type, subPatternInstance));
                }
                break;

            case PatternElementType.Choice:
                // Emitting a successful choice at the first matching // <Improvement> A more accurate way would be to match all options and use the longest match, e.g. Courtesy Interrupt
                PatternElementInstance ChoiceInstance = null;
                foreach (PatternElement choiceElement in Choices)
                {
                    ChoiceInstance = choiceElement.MatchElement(content, vocabulary, out consumed);
                    if (ChoiceInstance != null)
                    {
                        return(new PatternElementInstance(Type, ChoiceInstance.ElementValue));
                    }
                }
                // Valid if we have at least one and only one choice
                break;

            case PatternElementType.Tag:
                consumed = content.IndexOf(content.TrimStart());
                string tagValue = MatchTag(Key, content.TrimStart(), vocabulary);
                if (tagValue != null)
                {
                    consumed += tagValue.Length;
                    return(new PatternElementInstance(Type, tagValue));
                }
                break;

            case PatternElementType.CategoryInclude:
            {
                consumed = content.IndexOf(content.TrimStart());
                string match = MatchCategory(Key, true, content.TrimStart(), vocabulary);
                if (match != null)
                {
                    consumed += match.Length;
                    return(new PatternElementInstance(Type, match));
                }
            }
            break;

            case PatternElementType.CategoryExclude:
            {
                consumed = content.IndexOf(content.TrimStart());
                string match = MatchCategory(Key, false, content.TrimStart(), vocabulary);
                if (match != null)
                {
                    consumed += match.Length;
                    return(new PatternElementInstance(Type, match));
                }
            }
            break;

            case PatternElementType.Punctuation:
                if (content.IndexOf(Key) == 0)
                {
                    consumed = Key.Length;
                    return(new PatternElementInstance(Type, Key));
                }
                break;

            case PatternElementType.UnknownPhrase:
                // Try extract unknown from known
                string unknownString = vocabulary.GetUnknownPhrase(content);
                if (unknownString != null)      // Commit only if we find no match
                {
                    consumed = unknownString.Length;
                    return(new PatternElementInstance(Type, unknownString));
                }
                break;

            default:
                break;
            }
            consumed = 0;
            return(null);
        }
Ejemplo n.º 29
0
        public void InsertWord(String word, double frequency, POS pos)
        {
            if (_WordDict == null)
            {
                return;
            }

            string key = word.ToLower();

            if (key.Length == 1)
            {
                if (_FirstCharDict.ContainsKey(key[0]))
                {
                    return;
                }
            }

            if (key.Length == 2)
            {
                uint doubleChar = ((uint)key[0] * 65536) + key[1];
                if (_DoubleCharDict.ContainsKey(doubleChar))
                {
                    return;
                }
            }

            if (_WordDict.ContainsKey(key))
            {
                return;
            }

            WordAttribute wa = new WordAttribute(word, pos, frequency);

            if (key.Length == 1)
            {
                if (!_FirstCharDict.ContainsKey(key[0]))
                {
                    _FirstCharDict.Add(key[0], wa);
                    return;
                }
            }

            if (key.Length == 2)
            {
                uint doubleChar = ((uint)key[0] * 65536) + key[1];
                if (!_DoubleCharDict.ContainsKey(doubleChar))
                {
                    _DoubleCharDict.Add(doubleChar, wa);
                    return;
                }
            }

            _WordDict.Add(key, wa);

            long tripleChar = ((long)key[0]) * 0x100000000 + (uint)(key[1] * 65536) + key[2];

            byte[] wordLenArray;
            if (!_TripleCharDict.TryGetValue(tripleChar, out wordLenArray))
            {
                wordLenArray = new byte[4];
                wordLenArray[0] = (byte)key.Length;

                _TripleCharDict.Add(tripleChar, wordLenArray);
            }
            else
            {
                bool find = false;
                int i;
                for (i = 0; i < wordLenArray.Length; i++)
                {
                    byte len = wordLenArray[i];
                    if (len == key.Length)
                    {
                        find = true;
                        break;
                    }

                    if (len == 0)
                    {
                        wordLenArray[i] = (byte)key.Length;
                        find = true;
                        break;
                    }
                }

                if (!find)
                {
                    byte[] temp = new byte[wordLenArray.Length * 2];

                    wordLenArray.CopyTo(temp, 0);
                    wordLenArray = temp;
                    wordLenArray[i] = (byte)key.Length;

                    _TripleCharDict[tripleChar] = wordLenArray;
                }
            }

        }