Beispiel #1
0
        public static bool SerializeToXml(TrainedDataSet dataset, string filename)
        {
            if (dataset == null || dataset._wordDictionary == null || dataset._wordDictionary.UniqueWordCount < 1)
            {
                return(false);
            }

            // Sort every Word's internal dictionary
            foreach (Word word in dataset._wordDictionary.Words)
            {
                word.OrderInternalDictionary();
            }

            // Sort the NextWordDictionary
            dataset._wordDictionary =
                new WordDictionary(
                    dataset._wordDictionary._internalDictionary.OrderByDescending(kvp => kvp.Value.TotalWordsSeen).ToDictionary(kvp => kvp.Key, kvp => kvp.Value)
                    );

            XDocument doc = new XDocument(
                new XElement(XmlElementNames.RootNode,
                             new XElement(XmlElementNames.TotalWordsProcessedNode, dataset._wordDictionary.TotalWordsProcessed),
                             dataset._wordDictionary.Words.Select(word =>
                                                                  new XElement(XmlElementNames.WordNode,
                                                                               new XElement(XmlElementNames.ValueNode, word.Value),
                                                                               new XElement(XmlElementNames.DictionarySizeNode, word.TotalWordsSeen),
                                                                               new XElement(XmlElementNames.DictionaryNode,
                                                                                            word._nextWordDictionary._internalDictionary.Select(kvp =>
                                                                                                                                                new XElement(XmlElementNames.KeyValuePairNode,
                                                                                                                                                             new XElement(XmlElementNames.KeyNode, kvp.Key.Value),
                                                                                                                                                             new XElement(XmlElementNames.ValueNode, kvp.Value)
                                                                                                                                                             )
                                                                                                                                                )
                                                                                            )
                                                                               )
                                                                  )
                             )
                );

            if (doc != null)
            {
                doc.Save(filename, SaveOptions.None);
                return(File.Exists(filename));
            }

            return(false);
        }
Beispiel #2
0
        public static TrainedDataSet DeserializeFromXml(string filename)
        {
            if (!File.Exists(filename))
            {
                return(new TrainedDataSet());
            }

            XDocument doc = XDocument.Parse(File.ReadAllText(filename), LoadOptions.None);

            if (doc == null)
            {
                return(new TrainedDataSet());
            }

            XElement rootNode = doc.XPathSelectElement(XmlElementNames.RootNode);

            if (rootNode == null)
            {
                return(new TrainedDataSet());
            }

            XElement totalWordsNode = rootNode.XPathSelectElement(XmlElementNames.TotalWordsProcessedNode);

            if (totalWordsNode == null)
            {
                return(new TrainedDataSet());
            }
            int totalWordsProcessed = 0; int.TryParse(totalWordsNode.Value, out totalWordsProcessed);

            List <XElement> wordNodes = rootNode.XPathSelectElements(XmlElementNames.WordNode).ToList();

            if (wordNodes == null || wordNodes.Count < 1)
            {
                return(new TrainedDataSet());
            }

            // Dictionary
            Dictionary <string, Word> dictionary = new Dictionary <string, Word>();

            // Create a Word object for each Word before populating NextWordFrequencyDictionary
            foreach (XElement wordNode in wordNodes)
            {
                XElement textNode  = wordNode.XPathSelectElement(XmlElementNames.ValueNode);
                XElement countNode = wordNode.XPathSelectElement(XmlElementNames.DictionarySizeNode);

                if (textNode == null || countNode == null)
                {
                    continue;
                }

                string text         = textNode.Value;
                int    ttlWordCount = 0; int.TryParse(countNode.Value, out ttlWordCount);

                Word newWord = new Word(text);
                //newWord.TotalWordsSeen = ttlWordCount;

                dictionary.Add(text, newWord);
            }

            // Now populate NextWordFrequencyDictionary
            foreach (XElement wordNode in wordNodes)
            {
                XElement textNode = wordNode.XPathSelectElement(XmlElementNames.ValueNode);
                XElement dictNode = wordNode.XPathSelectElement(XmlElementNames.DictionaryNode);

                if (textNode == null || dictNode == null)
                {
                    continue;
                }

                string text = textNode.Value;

                if (!dictionary.Keys.Contains(text))
                {
                    continue;
                }

                Word word = dictionary[text];

                List <XElement> kvpNodes = dictNode.XPathSelectElements(XmlElementNames.KeyValuePairNode).ToList();
                foreach (XElement kvpNode in kvpNodes)
                {
                    XElement keyNode   = kvpNode.XPathSelectElement(XmlElementNames.KeyNode);
                    XElement valueNode = kvpNode.XPathSelectElement(XmlElementNames.ValueNode);

                    string keyText  = keyNode.Value;
                    int    valueInt = 0; int.TryParse(valueNode.Value, out valueInt);

                    if (!dictionary.Keys.Contains(keyText))
                    {
                        continue;
                    }

                    Word keyWord = dictionary[keyText];
                    word._nextWordDictionary._internalDictionary.Add(keyWord, valueInt);
                }
            }

            if (dictionary != null)
            {
                TrainedDataSet result = new TrainedDataSet(new WordDictionary(dictionary));
                return(result);
            }
            else
            {
                return(new TrainedDataSet());
            }
        }
        public static TrainedDataSet DeserializeFromXml(string filename)
        {
            if (!File.Exists(filename)) { return new TrainedDataSet(); }

            XDocument doc = XDocument.Parse(File.ReadAllText(filename), LoadOptions.None);
            if (doc == null) { return new TrainedDataSet(); }

            XElement rootNode = doc.XPathSelectElement(XmlElementNames.RootNode);
            if (rootNode == null) { return new TrainedDataSet(); }

            XElement totalWordsNode = rootNode.XPathSelectElement(XmlElementNames.TotalWordsProcessedNode);
            if (totalWordsNode == null) { return new TrainedDataSet(); }
            int totalWordsProcessed = 0; int.TryParse(totalWordsNode.Value, out totalWordsProcessed);

            List<XElement> wordNodes = rootNode.XPathSelectElements(XmlElementNames.WordNode).ToList();
            if (wordNodes == null || wordNodes.Count < 1) { return new TrainedDataSet(); }

            // Dictionary
            Dictionary<string, Word> dictionary = new Dictionary<string, Word>();

            // Create a Word object for each Word before populating NextWordFrequencyDictionary
            foreach (XElement wordNode in wordNodes)
            {
                XElement textNode = wordNode.XPathSelectElement(XmlElementNames.ValueNode);
                XElement countNode = wordNode.XPathSelectElement(XmlElementNames.DictionarySizeNode);

                if (textNode == null || countNode == null)
                {
                    continue;
                }

                string text = textNode.Value;
                int ttlWordCount = 0; int.TryParse(countNode.Value, out ttlWordCount);

                Word newWord = new Word(text);
                //newWord.TotalWordsSeen = ttlWordCount;

                dictionary.Add(text, newWord);
            }

            // Now populate NextWordFrequencyDictionary
            foreach (XElement wordNode in wordNodes)
            {
                XElement textNode = wordNode.XPathSelectElement(XmlElementNames.ValueNode);
                XElement dictNode = wordNode.XPathSelectElement(XmlElementNames.DictionaryNode);

                if (textNode == null || dictNode == null)
                {
                    continue;
                }

                string text = textNode.Value;

                if (!dictionary.Keys.Contains(text))
                {
                    continue;
                }

                Word word = dictionary[text];

                List<XElement> kvpNodes = dictNode.XPathSelectElements(XmlElementNames.KeyValuePairNode).ToList();
                foreach (XElement kvpNode in kvpNodes)
                {
                    XElement keyNode = kvpNode.XPathSelectElement(XmlElementNames.KeyNode);
                    XElement valueNode = kvpNode.XPathSelectElement(XmlElementNames.ValueNode);

                    string keyText = keyNode.Value;
                    int valueInt = 0; int.TryParse(valueNode.Value, out valueInt);

                    if (!dictionary.Keys.Contains(keyText))
                    {
                        continue;
                    }

                    Word keyWord = dictionary[keyText];
                    word._nextWordDictionary._internalDictionary.Add(keyWord, valueInt);
                }
            }

            if (dictionary != null)
            {
                TrainedDataSet result = new TrainedDataSet(new WordDictionary(dictionary));
                return result;
            }
            else
            {
                return new TrainedDataSet();
            }
        }
        public static bool SerializeToXml(TrainedDataSet dataset, string filename)
        {
            if (dataset == null || dataset._wordDictionary == null || dataset._wordDictionary.UniqueWordCount < 1)
            {
                return false;
            }

            // Sort every Word's internal dictionary
            foreach (Word word in dataset._wordDictionary.Words)
            {
                word.OrderInternalDictionary();
            }

            // Sort the NextWordDictionary
            dataset._wordDictionary =
                new WordDictionary(
                    dataset._wordDictionary._internalDictionary.OrderByDescending(kvp => kvp.Value.TotalWordsSeen).ToDictionary(kvp => kvp.Key, kvp => kvp.Value)
                );

            XDocument doc = new XDocument(
                new XElement(XmlElementNames.RootNode,
                    new XElement(XmlElementNames.TotalWordsProcessedNode, dataset._wordDictionary.TotalWordsProcessed),
                    dataset._wordDictionary.Words.Select(word =>
                        new XElement(XmlElementNames.WordNode,
                            new XElement(XmlElementNames.ValueNode, word.Value),
                            new XElement(XmlElementNames.DictionarySizeNode, word.TotalWordsSeen),
                            new XElement(XmlElementNames.DictionaryNode,
                                word._nextWordDictionary._internalDictionary.Select(kvp =>
                                    new XElement(XmlElementNames.KeyValuePairNode,
                                        new XElement(XmlElementNames.KeyNode, kvp.Key.Value),
                                        new XElement(XmlElementNames.ValueNode, kvp.Value)
                                    )
                                )
                            )
                        )
                    )
                )
            );

            if (doc != null)
            {
                doc.Save(filename, SaveOptions.None);
                return File.Exists(filename);
            }

            return false;
        }
 private void NewDataSet()
 {
     if (AskIfSaveFirst())
     {
         dataSet = new TrainedDataSet();
         OnDataSetLoaded();
     }
 }
 public MainForm()
 {
     InitializeComponent();
     dataSet = new TrainedDataSet();
     IsDatasetDirty = false;
 }