예제 #1
0
        public string GetLastError()
        {
            if (IsLinux)
            {
                var len = GrammarApi.sol_GetErrorLen8(_engine);
                if (len == 0)
                {
                    return("");
                }

                var errUtf8 = new byte[len];
                GrammarApi.sol_GetError8(_engine, errUtf8, len);

                GrammarApi.sol_ClearError(_engine);
                return(Encoding.UTF8.GetString(errUtf8));
            }
            else
            {
                var len = GrammarApi.sol_GetErrorLen(_engine);
                if (len == 0)
                {
                    return("");
                }

                var b = new StringBuilder(len + 1);
                GrammarApi.sol_GetError(_engine, b, len);
                GrammarApi.sol_ClearError(_engine);
                return(b.ToString());
            }
        }
예제 #2
0
        public string GetNounForm(int id, int number, int @case)
        {
            var sb = new StringBuilder();

            GrammarApi.sol_GetNounForm(GetEngineHandle(), id, number, @case, sb);
            return(sb.ToString());
        }
예제 #3
0
        /// <summary>
        ///     Split the string into words and return the list of these words.
        ///     Language-specific rules are used to process dots, hyphens etc.
        /// </summary>
        /// <remarks>
        /// Works only on pre-segmented sentnces.
        /// </remarks>
        public string[] TokenizeSentence(string text, Languages language)
        {
            var hTokens = GrammarApi.sol_TokenizeW(_engine, text, (int)language);

            string[] tokens     = null;
            var      maxWordLen = GrammarApi.sol_MaxLexemLen(_engine) + 1;

            if (hTokens != (IntPtr)null)
            {
                var ntoken = GrammarApi.sol_CountStrings(hTokens);
                tokens = new string[ntoken];

                var buffer = new StringBuilder(maxWordLen);
                for (var i = 0; i < ntoken; ++i)
                {
                    buffer.Length = 0;
                    GrammarApi.sol_GetStringW(hTokens, i, buffer);
                    tokens[i] = buffer.ToString();
                }

                GrammarApi.sol_DeleteStrings(hTokens);
            }

            return(tokens);
        }
예제 #4
0
        /// <summary>
        ///     Split the string into words and return the list of these words
        ///     in a single string separated with specified character.
        ///     Language-specific rules are used to process dots, hyphens etc.
        /// </summary>
        public string TokenizeWithSeparator(string text, Languages language, char separator = '|')
        {
            var hTokens = GrammarApi.sol_TokenizeW(_engine, text, (int)language);

            if (hTokens == IntPtr.Zero)
            {
                return(string.Empty);
            }

            var result     = new StringBuilder(text.Length);
            int maxWordLen = GrammarApi.sol_MaxLexemLen(_engine) + 1;
            int ntoken     = GrammarApi.sol_CountStrings(hTokens);

            var buffer = new StringBuilder(maxWordLen);

            for (var i = 0; i < ntoken; ++i)
            {
                buffer.Length = 0;
                GrammarApi.sol_GetStringW(hTokens, i, buffer);
                result.Append(buffer.ToString()).Append(separator);
            }

            result.Length--;
            GrammarApi.sol_DeleteStrings(hTokens);

            return(result.ToString());
        }
예제 #5
0
        public AnalysisResults AnalyzeMorphology(string phrase, Languages language, MorphologyFlags flags, int constraints)
        {
            var hPack = GrammarApi.sol_MorphologyAnalysis(_engine, phrase, flags, 0, constraints, (int)language);
            var res   = new AnalysisResults(this, hPack);

            return(res);
        }
예제 #6
0
        public AnalysisResults AnalyzeSyntax(string phrase, Languages language, MorphologyFlags morphFlags, SyntaxFlags syntaxFlags, int constraints)
        {
            var hPack = GrammarApi.sol_SyntaxAnalysis(_engine, phrase, morphFlags, syntaxFlags, constraints, (int)language);
            var res   = new AnalysisResults(this, hPack);

            return(res);
        }
        private static string GetNodeContents(IntPtr hNode)
        {
            var b = new StringBuilder(32);

            GrammarApi.sol_GetNodeContents(hNode, b);
            return(b.ToString());
        }
예제 #8
0
        /// <summary>
        /// Reads the next sentence.
        /// </summary>
        /// <returns>Sentence or null if reached the end of file.</returns>
        public string ReadSentence()
        {
            if (_disposed)
            {
                throw new ObjectDisposedException("Segmenter disposed.");
            }

            int len;

            if ((len = GrammarApi.sol_FetchSentence(_hObject)) < 0)
            {
                CanRead = false;
                return(null);
            }

            if (len == 0)
            {
                return(string.Empty);
            }

            var b = new StringBuilder(len + 2);

            GrammarApi.sol_GetFetchedSentence(_hObject, b);
            return(b.ToString());
        }
예제 #9
0
        public List <string> GenerateWordforms(int entryId, List <int> coordId, List <int> stateId)
        {
            var npairs = coordId.Count;
            var pairs  = new int[npairs * 2];

            for (int i = 0, j = 0; i < npairs; ++i)
            {
                pairs[j++] = coordId[i];
                pairs[j++] = stateId[i];
            }

            var res  = new List <string>();
            var hStr = GrammarApi.sol_GenerateWordforms(_engine, entryId, npairs, pairs);

            if (hStr != (IntPtr)0)
            {
                var nstr = GrammarApi.sol_CountStrings(hStr);
                for (var k = 0; k < nstr; ++k)
                {
                    res.Add(GrammarApi.sol_GetStringFX(hStr, k));
                }

                GrammarApi.sol_DeleteStrings(hStr);
            }

            return(res);
        }
예제 #10
0
        public string GetVerbForm(int id, int number, int gender, int tense, int person)
        {
            var sb = new StringBuilder();

            GrammarApi.sol_GetVerbForm(GetEngineHandle(), id, number, gender, tense, person, sb);
            return(sb.ToString());
        }
예제 #11
0
 protected virtual void Dispose(bool disposing)
 {
     if (_hList != IntPtr.Zero)
     {
         GrammarApi.sol_DeleteProjections(_hList);
         _hList = IntPtr.Zero;
     }
 }
예제 #12
0
        internal WordProjection(GrammarEngine engine, IntPtr hList, int idx)
        {
            _engine = engine;
            _hList  = hList;
            _idx    = idx;

            Entry = _engine.GetEntry(GrammarApi.sol_GetIEntry(_hList, _idx));
        }
예제 #13
0
 //[SecurityPermission(SecurityAction.Demand, UnmanagedCode = true)]
 protected virtual void Dispose(bool disposing)
 {
     if (_hList != IntPtr.Zero)
     {
         GrammarApi.sol_DeleteLinksInfo(_hEngine, _hList);
         _hList = IntPtr.Zero;
     }
 }
예제 #14
0
 /// <summary>
 /// Dispose.
 /// </summary>
 protected virtual void Dispose(bool disposing)
 {
     if (!_disposed)
     {
         _disposed = true;
         Interlocked.MemoryBarrier();
         GrammarApi.sol_DeleteSentenceBroker(_hObject);
     }
 }
예제 #15
0
        /// <summary>
        /// Creates a segmenter that reads sentences from a text file.
        /// </summary>
        /// <param name="filePath">File path.</param>
        /// <param name="isUtf8">Indicates whether file has UTF-8 encoding. Unicode assumed otherwise.</param>
        /// <param name="language">Language to use.</param>
        /// <returns>Text file segmenter.</returns>
        public TextFileSegmenter CreateTextFileSegmenter(string filePath, bool isUtf8, Languages language)
        {
            var h = GrammarApi.sol_CreateSentenceBroker(GetEngineHandle(), filePath, isUtf8 ? "utf-8" : "unicode", (int)language);

            if (h == IntPtr.Zero)
            {
                throw new InvalidOperationException("Failed to create the segmenter!");
            }

            return(new TextFileSegmenter(h));
        }
예제 #16
0
        protected virtual void Dispose(bool disposing)
        {
            if (_engine != IntPtr.Zero)
            {
                GrammarApi.sol_DeleteGrammarEngine(_engine);
            }

            //if (_lemmatizer != IntPtr.Zero)
            //{
            //    GrammarApi.sol_DeleteLemmatizator(_lemmatizer);
            //}
        }
        //[SecurityPermission(SecurityAction.Demand, UnmanagedCode = true)]
        protected virtual void Dispose(bool disposing)
        {
            if (!disposing || _disposed)
            {
                return;
            }

            lock (_locker)
            {
                GrammarApi.sol_DeleteResPack(_hPack);
                _disposed = true;
            }
        }
예제 #18
0
        public string GetEntryName(int idEntry)
        {
            if (IsLinux)
            {
                var buf8 = GetLexemBuffer8();
                GrammarApi.sol_GetEntryName8(_engine, idEntry, buf8);
                return(Utf8ToString(buf8));
            }

            var b = new StringBuilder(32); // магическая константа 32 - фактически сейчас слов длиннее 32 символов в словарях нет.

            GrammarApi.sol_GetEntryName(_engine, idEntry, b);
            return(b.ToString());
        }
예제 #19
0
        public string GetClassName(int partOfSpeechId)
        {
            if (IsLinux)
            {
                var buf8 = GetLexemBuffer8();
                GrammarApi.sol_GetClassName8(_engine, partOfSpeechId, buf8);
                return(Utf8ToString(buf8));
            }

            var b = new StringBuilder(32);

            GrammarApi.sol_GetClassName(_engine, partOfSpeechId, b);
            return(b.ToString());
        }
예제 #20
0
        //#region Lemmatization

        ///// <summary>
        ///// Lemmatize sentence. By default expects tokens to be separated by '|'.
        ///// </summary>
        ///// <param name="sentence">Sentence to lemmatize.</param>
        ///// <param name="separator">Token separator.</param>
        ///// <returns>Lemmatized tokens.</returns>
        //public string[] LemmatizeSentence(string sentence, char separator = '|')
        //{
        //    if (string.IsNullOrEmpty(sentence))
        //    {
        //        return new string[0];
        //    }

        //    var lemResult = GrammarApi.sol_LemmatizePhraseW(_lemmatizer, sentence, 0, separator);
        //    if (lemResult == IntPtr.Zero)
        //    {
        //        return new string[0];
        //    }

        //    int lemmaCnt = GrammarApi.sol_CountLemmas(lemResult);
        //    var result = new string[lemmaCnt];
        //    var buffer = new StringBuilder(120);
        //    for (int i = 0; i < lemmaCnt; i++)
        //    {
        //        GrammarApi.sol_GetLemmaStringW(lemResult, i, buffer, 120);
        //        result[i] = buffer.ToString();
        //        buffer.Clear();
        //    }

        //    GrammarApi.sol_DeleteLemmas(lemResult);

        //    return result;
        //}

        //#endregion

        #region Misc

        public string NormalizePhrase(AnalysisResults linkages)
        {
            var wchar_ptr = GrammarApi.sol_NormalizePhraseW(_engine, linkages.GetHandle());

            if (wchar_ptr == (IntPtr)null)
            {
                return("");
            }

            var res = Marshal.PtrToStringUni(wchar_ptr);

            GrammarApi.sol_Free(_engine, wchar_ptr);
            return(res);
        }
예제 #21
0
        public SyntaxTreeNode(GrammarEngine gren, IntPtr hNode)
        {
            _gren  = gren;
            _hNode = hNode;

            Entry      = gren.GetEntry(GrammarApi.sol_GetNodeIEntry(_gren.GetEngineHandle(), _hNode));
            SourceWord = GetNodeContents(_hNode);

            int nleaf = GrammarApi.sol_CountLeafs(_hNode);

            Leafs = new SyntaxTreeNode[nleaf];
            for (int i = 0; i < nleaf; ++i)
            {
                Leafs[i] = new SyntaxTreeNode(_gren, GrammarApi.sol_GetLeaf(_hNode, i));
            }
        }
예제 #22
0
        private CoordPair[] GetPairs()
        {
            int n   = GrammarApi.sol_GetNodePairsCount(_hNode);
            var res = new CoordPair[n];

            for (int i = 0; i < n; ++i)
            {
                int coord = GrammarApi.sol_GetNodePairCoord(_hNode, i);
                int state = GrammarApi.sol_GetNodePairState(_hNode, i);
                res[i] = new CoordPair(coord, state,
                                       _gren.GetCoordName(coord),
                                       _gren.GetCoordStateName(coord, state));
            }

            return(res);
        }
예제 #23
0
        private CoordPair[] GetPairs()
        {
            int cnt = GrammarApi.sol_GetProjCoordCount(_engine.GetEngineHandle(), _hList, _idx);
            var res = new CoordPair[cnt];

            for (int i = 0; i < cnt; ++i)
            {
                int coordId = GrammarApi.sol_GetProjCoordId(_engine.GetEngineHandle(), _hList, _idx, i);
                int stateId = GrammarApi.sol_GetProjStateId(_engine.GetEngineHandle(), _hList, _idx, i);

                res[i] = new CoordPair(coordId, stateId,
                                       _engine.GetCoordName(coordId),
                                       _engine.GetCoordStateName(coordId, stateId));
            }

            return(res);
        }
예제 #24
0
        public void LoadDictionary(string dictionaryPath)
        {
            _log.Info($"Loading dictionary from {dictionaryPath}");

            string dir = Path.GetDirectoryName(dictionaryPath);

            if (string.IsNullOrEmpty(dir) || !Directory.Exists(dir))
            {
                WarnAndThrow("Dictionary directory not found!");
            }

            string dicPath = Path.Combine(dir, "dictionary.xml");

            if (!File.Exists(dicPath))
            {
                WarnAndThrow("Dictionary file not found!");
            }

            //string lemPath = Path.Combine(dir, "lemmatizer.db");
            //if (!File.Exists(lemPath))
            //{
            //    throw new InvalidOperationException("Lemmatizer database not found!");
            //}

            var result = LinuxHandler(() => GrammarApi.sol_LoadDictionary8(_engine, GetUtf8Bytes(dictionaryPath)),
                                      () => GrammarApi.sol_LoadDictionaryW(_engine, dictionaryPath));

            if (result != 1)
            {
                var err = GetLastError();
                WarnAndThrow($"Failed to load dictionary from {dicPath}. {err}");
            }

            //_lemmatizer = GrammarApi.sol_LoadLemmatizatorW(lemPath, LemmatizerFlags.Default);
            //if (_lemmatizer == IntPtr.Zero)
            //{
            //    var err = GetLastError();
            //    throw new InvalidOperationException($"Failed to load dictionary from {dicPath}. {err}");
            //}

            Initialized = true;

            _log.Info("Loaded dictionary.");
        }
        public AnalysisResults(GrammarEngine gren, IntPtr hPack, bool preserveMarkers = false)
        {
            _hPack = hPack;

            int n = GrammarApi.sol_CountRoots(_hPack, 0);

            if (n == 0)
            {
                _nodes = new SyntaxTreeNode[0];
                return;
            }

            int offset = preserveMarkers ? 0 : 1;

            _nodes = new SyntaxTreeNode[n - offset * 2];
            for (int i = offset; i < n - offset; i++)
            {
                _nodes[i - offset] = new SyntaxTreeNode(gren, GrammarApi.sol_GetRoot(_hPack, 0, i));
            }
        }
예제 #26
0
        public List <int> GetPhrasalLinks(int idPhrase, int linkType)
        {
            var res = new List <int>();

            var hList = GrammarApi.sol_ListLinksTxt(_engine, idPhrase, linkType, 1);

            if (hList != IntPtr.Zero)
            {
                var n = GrammarApi.sol_LinksInfoCount(_engine, hList);
                for (var i = 0; i < n; ++i)
                {
                    var idPhrase2 = GrammarApi.sol_LinksInfoEKey2(_engine, hList, i);
                    res.Add(idPhrase2);
                }

                GrammarApi.sol_DeleteLinksInfo(_engine, hList);
            }

            return(res);
        }
예제 #27
0
        public List <string> SplitSentences(string input)
        {
            var broker = GrammarApi.sol_CreateSentenceBrokerMemW(_engine, input, (int)Languages.RUSSIAN_LANGUAGE);
            var result = new List <string>();

            int len;

            while ((len = GrammarApi.sol_FetchSentence(broker)) >= 0)
            {
                if (len > 0)
                {
                    var b = new StringBuilder(len + 2);
                    GrammarApi.sol_GetFetchedSentence(broker, b);
                    result.Add(b.ToString());
                }
            }

            GrammarApi.sol_DeleteSentenceBroker(broker);

            return(result);
        }
예제 #28
0
 public string GetTags(int index)
 {
     return(GrammarApi.sol_LinksInfoTagsTxtFX(_hEngine, _hList, index));
 }
예제 #29
0
 public int GetLinkId(int index)
 {
     return(GrammarApi.sol_LinksInfoID(_hEngine, _hList, index));
 }
예제 #30
0
 public int GetEntry2(int index)
 {
     return(GrammarApi.sol_LinksInfoEKey2(_hEngine, _hList, index));
 }