public string GetLastError() { if (IsLinux) { var len = GrammarApi.sol_GetErrorLen8(_engine); if (len == 0) { return(""); } var errUtf8 = new byte[len]; GrammarApi.sol_GetError8(_engine, errUtf8, len); GrammarApi.sol_ClearError(_engine); return(Encoding.UTF8.GetString(errUtf8)); } else { var len = GrammarApi.sol_GetErrorLen(_engine); if (len == 0) { return(""); } var b = new StringBuilder(len + 1); GrammarApi.sol_GetError(_engine, b, len); GrammarApi.sol_ClearError(_engine); return(b.ToString()); } }
public string GetNounForm(int id, int number, int @case) { var sb = new StringBuilder(); GrammarApi.sol_GetNounForm(GetEngineHandle(), id, number, @case, sb); return(sb.ToString()); }
/// <summary> /// Split the string into words and return the list of these words. /// Language-specific rules are used to process dots, hyphens etc. /// </summary> /// <remarks> /// Works only on pre-segmented sentnces. /// </remarks> public string[] TokenizeSentence(string text, Languages language) { var hTokens = GrammarApi.sol_TokenizeW(_engine, text, (int)language); string[] tokens = null; var maxWordLen = GrammarApi.sol_MaxLexemLen(_engine) + 1; if (hTokens != (IntPtr)null) { var ntoken = GrammarApi.sol_CountStrings(hTokens); tokens = new string[ntoken]; var buffer = new StringBuilder(maxWordLen); for (var i = 0; i < ntoken; ++i) { buffer.Length = 0; GrammarApi.sol_GetStringW(hTokens, i, buffer); tokens[i] = buffer.ToString(); } GrammarApi.sol_DeleteStrings(hTokens); } return(tokens); }
/// <summary> /// Split the string into words and return the list of these words /// in a single string separated with specified character. /// Language-specific rules are used to process dots, hyphens etc. /// </summary> public string TokenizeWithSeparator(string text, Languages language, char separator = '|') { var hTokens = GrammarApi.sol_TokenizeW(_engine, text, (int)language); if (hTokens == IntPtr.Zero) { return(string.Empty); } var result = new StringBuilder(text.Length); int maxWordLen = GrammarApi.sol_MaxLexemLen(_engine) + 1; int ntoken = GrammarApi.sol_CountStrings(hTokens); var buffer = new StringBuilder(maxWordLen); for (var i = 0; i < ntoken; ++i) { buffer.Length = 0; GrammarApi.sol_GetStringW(hTokens, i, buffer); result.Append(buffer.ToString()).Append(separator); } result.Length--; GrammarApi.sol_DeleteStrings(hTokens); return(result.ToString()); }
public AnalysisResults AnalyzeMorphology(string phrase, Languages language, MorphologyFlags flags, int constraints) { var hPack = GrammarApi.sol_MorphologyAnalysis(_engine, phrase, flags, 0, constraints, (int)language); var res = new AnalysisResults(this, hPack); return(res); }
public AnalysisResults AnalyzeSyntax(string phrase, Languages language, MorphologyFlags morphFlags, SyntaxFlags syntaxFlags, int constraints) { var hPack = GrammarApi.sol_SyntaxAnalysis(_engine, phrase, morphFlags, syntaxFlags, constraints, (int)language); var res = new AnalysisResults(this, hPack); return(res); }
private static string GetNodeContents(IntPtr hNode) { var b = new StringBuilder(32); GrammarApi.sol_GetNodeContents(hNode, b); return(b.ToString()); }
/// <summary> /// Reads the next sentence. /// </summary> /// <returns>Sentence or null if reached the end of file.</returns> public string ReadSentence() { if (_disposed) { throw new ObjectDisposedException("Segmenter disposed."); } int len; if ((len = GrammarApi.sol_FetchSentence(_hObject)) < 0) { CanRead = false; return(null); } if (len == 0) { return(string.Empty); } var b = new StringBuilder(len + 2); GrammarApi.sol_GetFetchedSentence(_hObject, b); return(b.ToString()); }
public List <string> GenerateWordforms(int entryId, List <int> coordId, List <int> stateId) { var npairs = coordId.Count; var pairs = new int[npairs * 2]; for (int i = 0, j = 0; i < npairs; ++i) { pairs[j++] = coordId[i]; pairs[j++] = stateId[i]; } var res = new List <string>(); var hStr = GrammarApi.sol_GenerateWordforms(_engine, entryId, npairs, pairs); if (hStr != (IntPtr)0) { var nstr = GrammarApi.sol_CountStrings(hStr); for (var k = 0; k < nstr; ++k) { res.Add(GrammarApi.sol_GetStringFX(hStr, k)); } GrammarApi.sol_DeleteStrings(hStr); } return(res); }
public string GetVerbForm(int id, int number, int gender, int tense, int person) { var sb = new StringBuilder(); GrammarApi.sol_GetVerbForm(GetEngineHandle(), id, number, gender, tense, person, sb); return(sb.ToString()); }
protected virtual void Dispose(bool disposing) { if (_hList != IntPtr.Zero) { GrammarApi.sol_DeleteProjections(_hList); _hList = IntPtr.Zero; } }
internal WordProjection(GrammarEngine engine, IntPtr hList, int idx) { _engine = engine; _hList = hList; _idx = idx; Entry = _engine.GetEntry(GrammarApi.sol_GetIEntry(_hList, _idx)); }
//[SecurityPermission(SecurityAction.Demand, UnmanagedCode = true)] protected virtual void Dispose(bool disposing) { if (_hList != IntPtr.Zero) { GrammarApi.sol_DeleteLinksInfo(_hEngine, _hList); _hList = IntPtr.Zero; } }
/// <summary> /// Dispose. /// </summary> protected virtual void Dispose(bool disposing) { if (!_disposed) { _disposed = true; Interlocked.MemoryBarrier(); GrammarApi.sol_DeleteSentenceBroker(_hObject); } }
/// <summary> /// Creates a segmenter that reads sentences from a text file. /// </summary> /// <param name="filePath">File path.</param> /// <param name="isUtf8">Indicates whether file has UTF-8 encoding. Unicode assumed otherwise.</param> /// <param name="language">Language to use.</param> /// <returns>Text file segmenter.</returns> public TextFileSegmenter CreateTextFileSegmenter(string filePath, bool isUtf8, Languages language) { var h = GrammarApi.sol_CreateSentenceBroker(GetEngineHandle(), filePath, isUtf8 ? "utf-8" : "unicode", (int)language); if (h == IntPtr.Zero) { throw new InvalidOperationException("Failed to create the segmenter!"); } return(new TextFileSegmenter(h)); }
protected virtual void Dispose(bool disposing) { if (_engine != IntPtr.Zero) { GrammarApi.sol_DeleteGrammarEngine(_engine); } //if (_lemmatizer != IntPtr.Zero) //{ // GrammarApi.sol_DeleteLemmatizator(_lemmatizer); //} }
//[SecurityPermission(SecurityAction.Demand, UnmanagedCode = true)] protected virtual void Dispose(bool disposing) { if (!disposing || _disposed) { return; } lock (_locker) { GrammarApi.sol_DeleteResPack(_hPack); _disposed = true; } }
public string GetEntryName(int idEntry) { if (IsLinux) { var buf8 = GetLexemBuffer8(); GrammarApi.sol_GetEntryName8(_engine, idEntry, buf8); return(Utf8ToString(buf8)); } var b = new StringBuilder(32); // магическая константа 32 - фактически сейчас слов длиннее 32 символов в словарях нет. GrammarApi.sol_GetEntryName(_engine, idEntry, b); return(b.ToString()); }
public string GetClassName(int partOfSpeechId) { if (IsLinux) { var buf8 = GetLexemBuffer8(); GrammarApi.sol_GetClassName8(_engine, partOfSpeechId, buf8); return(Utf8ToString(buf8)); } var b = new StringBuilder(32); GrammarApi.sol_GetClassName(_engine, partOfSpeechId, b); return(b.ToString()); }
//#region Lemmatization ///// <summary> ///// Lemmatize sentence. By default expects tokens to be separated by '|'. ///// </summary> ///// <param name="sentence">Sentence to lemmatize.</param> ///// <param name="separator">Token separator.</param> ///// <returns>Lemmatized tokens.</returns> //public string[] LemmatizeSentence(string sentence, char separator = '|') //{ // if (string.IsNullOrEmpty(sentence)) // { // return new string[0]; // } // var lemResult = GrammarApi.sol_LemmatizePhraseW(_lemmatizer, sentence, 0, separator); // if (lemResult == IntPtr.Zero) // { // return new string[0]; // } // int lemmaCnt = GrammarApi.sol_CountLemmas(lemResult); // var result = new string[lemmaCnt]; // var buffer = new StringBuilder(120); // for (int i = 0; i < lemmaCnt; i++) // { // GrammarApi.sol_GetLemmaStringW(lemResult, i, buffer, 120); // result[i] = buffer.ToString(); // buffer.Clear(); // } // GrammarApi.sol_DeleteLemmas(lemResult); // return result; //} //#endregion #region Misc public string NormalizePhrase(AnalysisResults linkages) { var wchar_ptr = GrammarApi.sol_NormalizePhraseW(_engine, linkages.GetHandle()); if (wchar_ptr == (IntPtr)null) { return(""); } var res = Marshal.PtrToStringUni(wchar_ptr); GrammarApi.sol_Free(_engine, wchar_ptr); return(res); }
public SyntaxTreeNode(GrammarEngine gren, IntPtr hNode) { _gren = gren; _hNode = hNode; Entry = gren.GetEntry(GrammarApi.sol_GetNodeIEntry(_gren.GetEngineHandle(), _hNode)); SourceWord = GetNodeContents(_hNode); int nleaf = GrammarApi.sol_CountLeafs(_hNode); Leafs = new SyntaxTreeNode[nleaf]; for (int i = 0; i < nleaf; ++i) { Leafs[i] = new SyntaxTreeNode(_gren, GrammarApi.sol_GetLeaf(_hNode, i)); } }
private CoordPair[] GetPairs() { int n = GrammarApi.sol_GetNodePairsCount(_hNode); var res = new CoordPair[n]; for (int i = 0; i < n; ++i) { int coord = GrammarApi.sol_GetNodePairCoord(_hNode, i); int state = GrammarApi.sol_GetNodePairState(_hNode, i); res[i] = new CoordPair(coord, state, _gren.GetCoordName(coord), _gren.GetCoordStateName(coord, state)); } return(res); }
private CoordPair[] GetPairs() { int cnt = GrammarApi.sol_GetProjCoordCount(_engine.GetEngineHandle(), _hList, _idx); var res = new CoordPair[cnt]; for (int i = 0; i < cnt; ++i) { int coordId = GrammarApi.sol_GetProjCoordId(_engine.GetEngineHandle(), _hList, _idx, i); int stateId = GrammarApi.sol_GetProjStateId(_engine.GetEngineHandle(), _hList, _idx, i); res[i] = new CoordPair(coordId, stateId, _engine.GetCoordName(coordId), _engine.GetCoordStateName(coordId, stateId)); } return(res); }
public void LoadDictionary(string dictionaryPath) { _log.Info($"Loading dictionary from {dictionaryPath}"); string dir = Path.GetDirectoryName(dictionaryPath); if (string.IsNullOrEmpty(dir) || !Directory.Exists(dir)) { WarnAndThrow("Dictionary directory not found!"); } string dicPath = Path.Combine(dir, "dictionary.xml"); if (!File.Exists(dicPath)) { WarnAndThrow("Dictionary file not found!"); } //string lemPath = Path.Combine(dir, "lemmatizer.db"); //if (!File.Exists(lemPath)) //{ // throw new InvalidOperationException("Lemmatizer database not found!"); //} var result = LinuxHandler(() => GrammarApi.sol_LoadDictionary8(_engine, GetUtf8Bytes(dictionaryPath)), () => GrammarApi.sol_LoadDictionaryW(_engine, dictionaryPath)); if (result != 1) { var err = GetLastError(); WarnAndThrow($"Failed to load dictionary from {dicPath}. {err}"); } //_lemmatizer = GrammarApi.sol_LoadLemmatizatorW(lemPath, LemmatizerFlags.Default); //if (_lemmatizer == IntPtr.Zero) //{ // var err = GetLastError(); // throw new InvalidOperationException($"Failed to load dictionary from {dicPath}. {err}"); //} Initialized = true; _log.Info("Loaded dictionary."); }
public AnalysisResults(GrammarEngine gren, IntPtr hPack, bool preserveMarkers = false) { _hPack = hPack; int n = GrammarApi.sol_CountRoots(_hPack, 0); if (n == 0) { _nodes = new SyntaxTreeNode[0]; return; } int offset = preserveMarkers ? 0 : 1; _nodes = new SyntaxTreeNode[n - offset * 2]; for (int i = offset; i < n - offset; i++) { _nodes[i - offset] = new SyntaxTreeNode(gren, GrammarApi.sol_GetRoot(_hPack, 0, i)); } }
public List <int> GetPhrasalLinks(int idPhrase, int linkType) { var res = new List <int>(); var hList = GrammarApi.sol_ListLinksTxt(_engine, idPhrase, linkType, 1); if (hList != IntPtr.Zero) { var n = GrammarApi.sol_LinksInfoCount(_engine, hList); for (var i = 0; i < n; ++i) { var idPhrase2 = GrammarApi.sol_LinksInfoEKey2(_engine, hList, i); res.Add(idPhrase2); } GrammarApi.sol_DeleteLinksInfo(_engine, hList); } return(res); }
public List <string> SplitSentences(string input) { var broker = GrammarApi.sol_CreateSentenceBrokerMemW(_engine, input, (int)Languages.RUSSIAN_LANGUAGE); var result = new List <string>(); int len; while ((len = GrammarApi.sol_FetchSentence(broker)) >= 0) { if (len > 0) { var b = new StringBuilder(len + 2); GrammarApi.sol_GetFetchedSentence(broker, b); result.Add(b.ToString()); } } GrammarApi.sol_DeleteSentenceBroker(broker); return(result); }
public string GetTags(int index) { return(GrammarApi.sol_LinksInfoTagsTxtFX(_hEngine, _hList, index)); }
public int GetLinkId(int index) { return(GrammarApi.sol_LinksInfoID(_hEngine, _hList, index)); }
public int GetEntry2(int index) { return(GrammarApi.sol_LinksInfoEKey2(_hEngine, _hList, index)); }