/// <summary> /// StarDict 2.4.2 doesn't support synonyms file (.syn), /// nor does it support multiple same words pointing to a different definition. /// This is supported in 2.4.8 but not 2.4.2. /// So, here we 'merge' the meanings for a same words together. /// /// eg: /// fan (a supporter), fan (stuff that creates air), Fan (cant remember what it means) /// Instead of having 3 entries in the IDX that points to 3 differents definitions, /// we create 1 entry 'fan' and we put all the meanings together. /// /// As a result we will have something like: /// 1. fan (supporter) /// ... /// 2. fan (stuff for air) /// ... /// 3. Fan (another meaning) /// </summary> private async Task CreateIdxInMemory() { Messaging.Send(MessageLevel.Info, "Creating .idx in memory ..."); Task indexCreation = new Task(() => { _indexByWord = new SortedDictionary <string, IdxStructure>(new AsciiComparer()); // Group meanings for the same words (homonyms become 1 word with multiple meanings) Dictionary <string, List <Meaning> > meaningsForWord = _genericDictionary.AllWords.Keys .GroupBy(m => m.Word, StringComparer.InvariantCultureIgnoreCase) .ToDictionary(g => g.Key, g => g.OrderBy(m => m.Id).ToList(), StringComparer.InvariantCultureIgnoreCase); // Add all of the alternate words so they point to the definitions of their 'main word' foreach (var meaningByWord in meaningsForWord) { string mainWord = meaningByWord.Key; List <Meaning> meanings = meaningByWord.Value; IdxStructure mainWordStructure = new IdxStructure { ParentWord = null, Meanings = meanings }; // Word is also a synonym for another word // and has already been added if (_indexByWord.ContainsKey(mainWord)) { // Make sure this word stays a main word as it has its own definition _indexByWord[mainWord] = mainWordStructure; } else { // Add main word to .idx _indexByWord.Add(mainWord, mainWordStructure); } // Reference the 'main word' for each synonym foreach (string synonym in meanings.SelectMany(m => m.AlternateWords)) { if (_indexByWord.ContainsKey(synonym)) { continue; } // Add aletrnate word to .idx specifying the parent word IdxStructure alternateWordStructure = new IdxStructure { ParentWord = mainWordStructure, Meanings = meanings }; _indexByWord.Add(synonym, alternateWordStructure); } } }); indexCreation.Start(); await indexCreation; Messaging.Send(MessageLevel.Info, $"Done, {_indexByWord.Count} word entries ready."); }
private async Task WriteDict(string targetDictFilePath) { Messaging.Send(MessageLevel.Info, "Writing .dict ..."); int wordsToWrite = _indexByWord.Count; long dictSize = 0; Task export = new Task(() => { int wordsWritten = 0; using (FileStream dictStream = new FileStream(targetDictFilePath, FileMode.Create)) { using (BinaryWriter dictWriter = new BinaryWriter(dictStream, Encoding.UTF8)) { // Get a word and its meanings foreach (string word in _indexByWord.Keys) { IdxStructure idxStructure = _indexByWord[word]; List <Meaning> meanings = idxStructure.Meanings; // Get this word entry in the .idx structure if (!_indexByWord.ContainsKey(word)) { throw new KeyNotFoundException(word); } // Update position in .idx uint definitionPostionBegin = Convert.ToUInt32(dictWriter.BaseStream.Position); idxStructure.DefinitionPosition = definitionPostionBegin; // Write the word's meanings one after another for (int index = 0; index < meanings.Count; index++) { Meaning meaning = meanings[index]; // Visually separate meanings WriteWordMeaningHtmlHeader(dictWriter, meaning, index + 1, meanings.Count); // A meaning can have multiple definitions, re-order them List <Definition> orderedDefinitions = _genericDictionary.AllWords[meaning] .OrderBy(d => d.MeaningId) .ThenBy(d => d.Position) .Distinct() .ToList(); // Write definitions for this meaning one after the other foreach (Definition definition in orderedDefinitions) { string tweakedHtml = TweakHtml(definition.DefinitionHtml); dictWriter.Write(Encoding.UTF8.GetBytes(tweakedHtml)); } } // Notify progression wordsWritten++; if (wordsWritten % 2000 == 0) { int percent = (int)(wordsWritten * 100.0 / wordsToWrite); Messaging.Send(MessageLevel.Info, $"Writing ... {percent:000}%"); } // Update length in .idx uint definitionPostionEnd = Convert.ToUInt32(dictWriter.BaseStream.Position); idxStructure.DefinitionLength = definitionPostionEnd - definitionPostionBegin; // Check if this word has alternate words, and use the same position/lenght for them IEnumerable <string> alternateWords = meanings.SelectMany(m => m.AlternateWords).Distinct(); foreach (string alternateWord in alternateWords) { IdxStructure alternateWordIdxStructure = _indexByWord[alternateWord]; if (alternateWordIdxStructure.ParentWord != null) { alternateWordIdxStructure.DefinitionPosition = alternateWordIdxStructure.ParentWord.DefinitionPosition; alternateWordIdxStructure.DefinitionLength = alternateWordIdxStructure.ParentWord.DefinitionLength; } } } dictSize = dictStream.Position; } } }); export.Start(); await export; Messaging.Send(MessageLevel.Info, $"Done, .dict written ({dictSize} bytes)"); }