public static bool SerializeToXml(TrainedDataSet dataset, string filename) { if (dataset == null || dataset._wordDictionary == null || dataset._wordDictionary.UniqueWordCount < 1) { return(false); } // Sort every Word's internal dictionary foreach (Word word in dataset._wordDictionary.Words) { word.OrderInternalDictionary(); } // Sort the NextWordDictionary dataset._wordDictionary = new WordDictionary( dataset._wordDictionary._internalDictionary.OrderByDescending(kvp => kvp.Value.TotalWordsSeen).ToDictionary(kvp => kvp.Key, kvp => kvp.Value) ); XDocument doc = new XDocument( new XElement(XmlElementNames.RootNode, new XElement(XmlElementNames.TotalWordsProcessedNode, dataset._wordDictionary.TotalWordsProcessed), dataset._wordDictionary.Words.Select(word => new XElement(XmlElementNames.WordNode, new XElement(XmlElementNames.ValueNode, word.Value), new XElement(XmlElementNames.DictionarySizeNode, word.TotalWordsSeen), new XElement(XmlElementNames.DictionaryNode, word._nextWordDictionary._internalDictionary.Select(kvp => new XElement(XmlElementNames.KeyValuePairNode, new XElement(XmlElementNames.KeyNode, kvp.Key.Value), new XElement(XmlElementNames.ValueNode, kvp.Value) ) ) ) ) ) ) ); if (doc != null) { doc.Save(filename, SaveOptions.None); return(File.Exists(filename)); } return(false); }
public static TrainedDataSet DeserializeFromXml(string filename) { if (!File.Exists(filename)) { return(new TrainedDataSet()); } XDocument doc = XDocument.Parse(File.ReadAllText(filename), LoadOptions.None); if (doc == null) { return(new TrainedDataSet()); } XElement rootNode = doc.XPathSelectElement(XmlElementNames.RootNode); if (rootNode == null) { return(new TrainedDataSet()); } XElement totalWordsNode = rootNode.XPathSelectElement(XmlElementNames.TotalWordsProcessedNode); if (totalWordsNode == null) { return(new TrainedDataSet()); } int totalWordsProcessed = 0; int.TryParse(totalWordsNode.Value, out totalWordsProcessed); List <XElement> wordNodes = rootNode.XPathSelectElements(XmlElementNames.WordNode).ToList(); if (wordNodes == null || wordNodes.Count < 1) { return(new TrainedDataSet()); } // Dictionary Dictionary <string, Word> dictionary = new Dictionary <string, Word>(); // Create a Word object for each Word before populating NextWordFrequencyDictionary foreach (XElement wordNode in wordNodes) { XElement textNode = wordNode.XPathSelectElement(XmlElementNames.ValueNode); XElement countNode = wordNode.XPathSelectElement(XmlElementNames.DictionarySizeNode); if (textNode == null || countNode == null) { continue; } string text = textNode.Value; int ttlWordCount = 0; int.TryParse(countNode.Value, out ttlWordCount); Word newWord = new Word(text); //newWord.TotalWordsSeen = ttlWordCount; dictionary.Add(text, newWord); } // Now populate NextWordFrequencyDictionary foreach (XElement wordNode in wordNodes) { XElement textNode = wordNode.XPathSelectElement(XmlElementNames.ValueNode); XElement dictNode = wordNode.XPathSelectElement(XmlElementNames.DictionaryNode); if (textNode == null || dictNode == null) { continue; } string text = textNode.Value; if (!dictionary.Keys.Contains(text)) { continue; } Word word = dictionary[text]; List <XElement> kvpNodes = dictNode.XPathSelectElements(XmlElementNames.KeyValuePairNode).ToList(); foreach (XElement kvpNode in kvpNodes) { XElement keyNode = kvpNode.XPathSelectElement(XmlElementNames.KeyNode); XElement valueNode = kvpNode.XPathSelectElement(XmlElementNames.ValueNode); string keyText = keyNode.Value; int valueInt = 0; int.TryParse(valueNode.Value, out valueInt); if (!dictionary.Keys.Contains(keyText)) { continue; } Word keyWord = dictionary[keyText]; word._nextWordDictionary._internalDictionary.Add(keyWord, valueInt); } } if (dictionary != null) { TrainedDataSet result = new TrainedDataSet(new WordDictionary(dictionary)); return(result); } else { return(new TrainedDataSet()); } }
public static TrainedDataSet DeserializeFromXml(string filename) { if (!File.Exists(filename)) { return new TrainedDataSet(); } XDocument doc = XDocument.Parse(File.ReadAllText(filename), LoadOptions.None); if (doc == null) { return new TrainedDataSet(); } XElement rootNode = doc.XPathSelectElement(XmlElementNames.RootNode); if (rootNode == null) { return new TrainedDataSet(); } XElement totalWordsNode = rootNode.XPathSelectElement(XmlElementNames.TotalWordsProcessedNode); if (totalWordsNode == null) { return new TrainedDataSet(); } int totalWordsProcessed = 0; int.TryParse(totalWordsNode.Value, out totalWordsProcessed); List<XElement> wordNodes = rootNode.XPathSelectElements(XmlElementNames.WordNode).ToList(); if (wordNodes == null || wordNodes.Count < 1) { return new TrainedDataSet(); } // Dictionary Dictionary<string, Word> dictionary = new Dictionary<string, Word>(); // Create a Word object for each Word before populating NextWordFrequencyDictionary foreach (XElement wordNode in wordNodes) { XElement textNode = wordNode.XPathSelectElement(XmlElementNames.ValueNode); XElement countNode = wordNode.XPathSelectElement(XmlElementNames.DictionarySizeNode); if (textNode == null || countNode == null) { continue; } string text = textNode.Value; int ttlWordCount = 0; int.TryParse(countNode.Value, out ttlWordCount); Word newWord = new Word(text); //newWord.TotalWordsSeen = ttlWordCount; dictionary.Add(text, newWord); } // Now populate NextWordFrequencyDictionary foreach (XElement wordNode in wordNodes) { XElement textNode = wordNode.XPathSelectElement(XmlElementNames.ValueNode); XElement dictNode = wordNode.XPathSelectElement(XmlElementNames.DictionaryNode); if (textNode == null || dictNode == null) { continue; } string text = textNode.Value; if (!dictionary.Keys.Contains(text)) { continue; } Word word = dictionary[text]; List<XElement> kvpNodes = dictNode.XPathSelectElements(XmlElementNames.KeyValuePairNode).ToList(); foreach (XElement kvpNode in kvpNodes) { XElement keyNode = kvpNode.XPathSelectElement(XmlElementNames.KeyNode); XElement valueNode = kvpNode.XPathSelectElement(XmlElementNames.ValueNode); string keyText = keyNode.Value; int valueInt = 0; int.TryParse(valueNode.Value, out valueInt); if (!dictionary.Keys.Contains(keyText)) { continue; } Word keyWord = dictionary[keyText]; word._nextWordDictionary._internalDictionary.Add(keyWord, valueInt); } } if (dictionary != null) { TrainedDataSet result = new TrainedDataSet(new WordDictionary(dictionary)); return result; } else { return new TrainedDataSet(); } }
public static bool SerializeToXml(TrainedDataSet dataset, string filename) { if (dataset == null || dataset._wordDictionary == null || dataset._wordDictionary.UniqueWordCount < 1) { return false; } // Sort every Word's internal dictionary foreach (Word word in dataset._wordDictionary.Words) { word.OrderInternalDictionary(); } // Sort the NextWordDictionary dataset._wordDictionary = new WordDictionary( dataset._wordDictionary._internalDictionary.OrderByDescending(kvp => kvp.Value.TotalWordsSeen).ToDictionary(kvp => kvp.Key, kvp => kvp.Value) ); XDocument doc = new XDocument( new XElement(XmlElementNames.RootNode, new XElement(XmlElementNames.TotalWordsProcessedNode, dataset._wordDictionary.TotalWordsProcessed), dataset._wordDictionary.Words.Select(word => new XElement(XmlElementNames.WordNode, new XElement(XmlElementNames.ValueNode, word.Value), new XElement(XmlElementNames.DictionarySizeNode, word.TotalWordsSeen), new XElement(XmlElementNames.DictionaryNode, word._nextWordDictionary._internalDictionary.Select(kvp => new XElement(XmlElementNames.KeyValuePairNode, new XElement(XmlElementNames.KeyNode, kvp.Key.Value), new XElement(XmlElementNames.ValueNode, kvp.Value) ) ) ) ) ) ) ); if (doc != null) { doc.Save(filename, SaveOptions.None); return File.Exists(filename); } return false; }
private void NewDataSet() { if (AskIfSaveFirst()) { dataSet = new TrainedDataSet(); OnDataSetLoaded(); } }
public MainForm() { InitializeComponent(); dataSet = new TrainedDataSet(); IsDatasetDirty = false; }