예제 #1
0
        /// <summary>
        /// StarDict 2.4.2 doesn't support synonyms file (.syn),
        /// nor does it support multiple same words pointing to a different definition.
        /// This is supported in 2.4.8 but not 2.4.2.
        /// So, here we 'merge' the meanings for a same words together.
        ///
        /// eg:
        /// fan (a supporter), fan (stuff that creates air), Fan (cant remember what it means)
        /// Instead of having 3 entries in the IDX that points to 3 differents definitions,
        /// we create 1 entry 'fan' and we put all the meanings together.
        ///
        /// As a result we will have something like:
        /// 1. fan (supporter)
        ///     ...
        /// 2. fan (stuff for air)
        ///     ...
        /// 3. Fan (another meaning)
        /// </summary>
        private async Task CreateIdxInMemory()
        {
            Messaging.Send(MessageLevel.Info, "Creating .idx in memory ...");

            Task indexCreation = new Task(() =>
            {
                _indexByWord = new SortedDictionary <string, IdxStructure>(new AsciiComparer());

                // Group meanings for the same words (homonyms become 1 word with multiple meanings)
                Dictionary <string, List <Meaning> > meaningsForWord =
                    _genericDictionary.AllWords.Keys
                    .GroupBy(m => m.Word, StringComparer.InvariantCultureIgnoreCase)
                    .ToDictionary(g => g.Key, g => g.OrderBy(m => m.Id).ToList(), StringComparer.InvariantCultureIgnoreCase);

                // Add all of the alternate words so they point to the definitions of their 'main word'
                foreach (var meaningByWord in meaningsForWord)
                {
                    string mainWord         = meaningByWord.Key;
                    List <Meaning> meanings = meaningByWord.Value;

                    IdxStructure mainWordStructure = new IdxStructure
                    {
                        ParentWord = null,
                        Meanings   = meanings
                    };

                    // Word is also a synonym for another word
                    // and has already been added
                    if (_indexByWord.ContainsKey(mainWord))
                    {
                        // Make sure this word stays a main word as it has its own definition
                        _indexByWord[mainWord] = mainWordStructure;
                    }
                    else
                    {
                        // Add main word to .idx
                        _indexByWord.Add(mainWord, mainWordStructure);
                    }

                    // Reference the 'main word' for each synonym
                    foreach (string synonym in meanings.SelectMany(m => m.AlternateWords))
                    {
                        if (_indexByWord.ContainsKey(synonym))
                        {
                            continue;
                        }

                        // Add aletrnate word to .idx specifying the parent word
                        IdxStructure alternateWordStructure = new IdxStructure
                        {
                            ParentWord = mainWordStructure,
                            Meanings   = meanings
                        };
                        _indexByWord.Add(synonym, alternateWordStructure);
                    }
                }
            });

            indexCreation.Start();
            await indexCreation;

            Messaging.Send(MessageLevel.Info, $"Done, {_indexByWord.Count} word entries ready.");
        }
예제 #2
0
        private async Task WriteDict(string targetDictFilePath)
        {
            Messaging.Send(MessageLevel.Info, "Writing .dict ...");
            int  wordsToWrite = _indexByWord.Count;
            long dictSize     = 0;

            Task export = new Task(() =>
            {
                int wordsWritten = 0;

                using (FileStream dictStream = new FileStream(targetDictFilePath, FileMode.Create))
                {
                    using (BinaryWriter dictWriter = new BinaryWriter(dictStream, Encoding.UTF8))
                    {
                        // Get a word and its meanings
                        foreach (string word in _indexByWord.Keys)
                        {
                            IdxStructure idxStructure = _indexByWord[word];
                            List <Meaning> meanings   = idxStructure.Meanings;

                            // Get this word entry in the .idx structure
                            if (!_indexByWord.ContainsKey(word))
                            {
                                throw new KeyNotFoundException(word);
                            }

                            // Update position in .idx
                            uint definitionPostionBegin     = Convert.ToUInt32(dictWriter.BaseStream.Position);
                            idxStructure.DefinitionPosition = definitionPostionBegin;

                            // Write the word's meanings one after another
                            for (int index = 0; index < meanings.Count; index++)
                            {
                                Meaning meaning = meanings[index];

                                // Visually separate meanings
                                WriteWordMeaningHtmlHeader(dictWriter, meaning, index + 1, meanings.Count);

                                // A meaning can have multiple definitions, re-order them
                                List <Definition> orderedDefinitions =
                                    _genericDictionary.AllWords[meaning]
                                    .OrderBy(d => d.MeaningId)
                                    .ThenBy(d => d.Position)
                                    .Distinct()
                                    .ToList();

                                // Write definitions for this meaning one after the other
                                foreach (Definition definition in orderedDefinitions)
                                {
                                    string tweakedHtml = TweakHtml(definition.DefinitionHtml);
                                    dictWriter.Write(Encoding.UTF8.GetBytes(tweakedHtml));
                                }
                            }

                            // Notify progression
                            wordsWritten++;
                            if (wordsWritten % 2000 == 0)
                            {
                                int percent = (int)(wordsWritten * 100.0 / wordsToWrite);
                                Messaging.Send(MessageLevel.Info, $"Writing ... {percent:000}%");
                            }

                            // Update length in .idx
                            uint definitionPostionEnd     = Convert.ToUInt32(dictWriter.BaseStream.Position);
                            idxStructure.DefinitionLength = definitionPostionEnd - definitionPostionBegin;

                            // Check if this word has alternate words, and use the same position/lenght for them
                            IEnumerable <string> alternateWords = meanings.SelectMany(m => m.AlternateWords).Distinct();
                            foreach (string alternateWord in alternateWords)
                            {
                                IdxStructure alternateWordIdxStructure = _indexByWord[alternateWord];

                                if (alternateWordIdxStructure.ParentWord != null)
                                {
                                    alternateWordIdxStructure.DefinitionPosition = alternateWordIdxStructure.ParentWord.DefinitionPosition;
                                    alternateWordIdxStructure.DefinitionLength   = alternateWordIdxStructure.ParentWord.DefinitionLength;
                                }
                            }
                        }
                        dictSize = dictStream.Position;
                    }
                }
            });

            export.Start();
            await export;

            Messaging.Send(MessageLevel.Info, $"Done, .dict written ({dictSize} bytes)");
        }