public override string ToString() { const string format = "[base: '{0}', normal-form: '{1}', pos: '{2}', {{morpho-form-endings: '{3}'}}]"; var sb = new StringBuilder(); foreach (var morphoFormEnding in MorphoFormEndings) { if (sb.Length != 0) { sb.Append(", "); } sb.Append(StringsHelper.ToString(morphoFormEnding)); } var _base = StringsHelper.ToString(Base); /* * var normalForm = (_MorphoFormEndings.Length != 0) * ? StringsHelper.CreateWordForm( _Base, _MorphoFormEndings[ 0 ] ) * : _base; */ var normalForm = GetNormalForm(); return(string.Format(format, _base, normalForm, PartOfSpeech, sb.ToString())); }
private static MModelRecord ToModelRecord(this KeyValuePair <IntPtr, BucketValue> pair) { return(new MModelRecord() { Ngram = StringsHelper.ToString(pair.Key), WeighByLanguages = new WeighByLanguageEnumerator(pair.Value) }); }
private static void ToModelRecord(IntPtr baseIntPtr, out ModelRecord m) { var s = StringsHelper.ToString(baseIntPtr); m = new ModelRecord() { Ngram = s, Probability = ToProbability(baseIntPtr, s.Length + 1) }; }
public static ModelRecord ToModelRecord(IntPtr baseIntPtr) { var s = StringsHelper.ToString(baseIntPtr); return(new ModelRecord() { Ngram = s, Probability = ToProbability(baseIntPtr, s.Length + 1) }); }
private static string ToString(SortedListIntPtrKey <Pair[]> .Tuple[] array) { var sb = new System.Text.StringBuilder(); foreach (var a in array) { sb.Append(StringsHelper.ToString(a.Key)).Append(Environment.NewLine); } return(sb.ToString()); }
/// <summary> /// сохранение морфотипа /// </summary> private void AddMorphoType2Dictionary(ref MorphoTypeNative_pair_t morphoTypePair) { if (_MorphoTypesDictionary.ContainsKey(morphoTypePair.Name)) { _ModelLoadingErrorCallback("Duplicated morpho-type", StringsHelper.ToString(morphoTypePair.Name)); //throw (new DuplicatedMorphoTypeException()); } else { _MorphoTypesDictionary.Add(morphoTypePair.Name, morphoTypePair.MorphoType); } }
public IEnumerable <MModelRecord> GetAllRecords() { //return (_Dictionary.GetAllModelRecords()); foreach (var p in _Dictionary) { yield return(new MModelRecord() { Ngram = StringsHelper.ToString(p.Key), WeighByLanguages = ToArrayOfWeighByLanguage(p.Value) }); //new WeighByLanguageEnumerator( p.Value ) }); // } }
/// <summary> /// создание морфотипа из строки /// </summary> private MorphoTypeNative_pair_t?CreateMorphoTypePair(char *lineBase, int lineLength) { var index1 = IndexOf(lineBase, COMMA); if (index1 == -1) { return(null); } var index2 = IndexAfter_MORPHO_TYPE(lineBase + index1 + 1); if (index2 == -1) { return(null); } var partOfSpeech = default(PartOfSpeechEnum); if (_EnumParserPartOfSpeech.TryParse(lineBase, index1, ref partOfSpeech)) { var startIndex = index1 + 1 + index2 + 1; IntPtr namePtr; AllocHGlobalAndCopy(lineBase + startIndex, lineLength - startIndex, out namePtr); var morphoType = new MorphoTypeNative(_PartOfSpeechList.GetPartOfSpeech(partOfSpeech)); var morphoTypePair = new MorphoTypeNative_pair_t() { Name = namePtr, MorphoType = morphoType, }; return(morphoTypePair); } else { var pos = StringsHelper.ToString(lineBase, index1); _ModelLoadingErrorCallback("Unknown part-of-speech: '" + pos + '\'', StringsHelper.ToString(lineBase)); } return(null); }
public override string ToString() { return("[" + StringsHelper.ToString(Ending) + ", {" + string.Join(",", (IEnumerable <MorphoAttributePair>)MorphoAttributePairs) + "}]"); }
public override string ToString() => $"'{StringsHelper.ToString( BasePtr + StartIndex, Length )}'";
public override string ToString() { return(StringsHelper.ToString(Start, Length)); }
public override string ToString() { return("MorphoTypeName: '" + StringsHelper.ToString(MorphoTypeName) + "', PartOfSpeech: '" + StringsHelper.ToString(PartOfSpeech) + '\''); }
public override string ToString() { return($"[{StringsHelper.ToString(Ending)}, {{{string.Join(",", MorphoAttributePairs)}}}]"); }
public override string ToString() => StringsHelper.ToString(Start, Length);
private void LoadModelFilenameContent(LoadModelFilenameContentCallback callbackAction) { using (var emmf = EnumeratorMMF.Create(ModelFilename)) { var lineCount = 0; var text = default(string); var weight = default(float); var pair = new Pair() { Language = this.Language }; #region [.read first line.] if (!emmf.MoveNext()) { return; } #endregion #region [.skip beginning comments.] for ( ; ;) { #region [.check on comment.] if (*emmf.Current.Start != '#') { break; } #endregion #region [.move to next line.] if (!emmf.MoveNext()) { return; } #endregion } #endregion #region [.read all lines.] for ( ; ;) { lineCount++; var ns = emmf.Current; #region [.first-value in string.] int startIndex_1 = 0; int finishIndex_2 = ns.Length - 1; #region commented //skip starts white-spaces /*for ( ; ; ) * { * if ( ((_CTM[ ns.Start[ startIndex_1 ] ] & CharType.IsWhiteSpace) != CharType.IsWhiteSpace) || * (finishIndex_2 <= ++startIndex_1) * ) * { * break; * } * }*/ #endregion //search '\t' int startIndex_2 = 0; int finishIndex_1 = 0; for ( ; ;) { if (ns.Start[finishIndex_1] == '\t') { startIndex_2 = finishIndex_1 + 1; finishIndex_1--; break; } //not found '\t' if (finishIndex_2 <= ++finishIndex_1) { throw (new InvalidDataException(string.Format(INVALIDDATAEXCEPTION_FORMAT_MESSAGE, ModelFilename, lineCount, ns.ToString()))); } } //skip ends white-spaces for ( ; ;) { if (((_CTM[ns.Start[finishIndex_1]] & CharType.IsWhiteSpace) != CharType.IsWhiteSpace) || (--finishIndex_1 <= startIndex_1) ) { break; } } if (finishIndex_1 < startIndex_1) { throw (new InvalidDataException(string.Format(INVALIDDATAEXCEPTION_FORMAT_MESSAGE, ModelFilename, lineCount, ns.ToString()))); } #endregion #region [.second-value in string.] //skip starts white-spaces for ( ; ;) { if (((_CTM[ns.Start[startIndex_2]] & CharType.IsWhiteSpace) != CharType.IsWhiteSpace) || (finishIndex_2 <= ++startIndex_2) ) { break; } } #region commented //skip ends white-spaces /*for ( ; ; ) * { * if ( ((_CTM[ ns.Start[ finishIndex_2 ] ] & CharType.IsWhiteSpace) != CharType.IsWhiteSpace) || * (--finishIndex_2 <= startIndex_1) * ) * { * break; * } * }*/ #endregion #endregion #region [.fill 'Pair_v1' & calling 'callbackAction()'.] var len = (finishIndex_2 - startIndex_2) + 1; text = StringsHelper.ToString(ns.Start + startIndex_2, len); if (!float.TryParse(text, NS, NFI, out weight)) //if ( !Number.TryParseSingle( text, NS, NFI, out weight ) ) { throw (new InvalidDataException(string.Format(INVALIDDATAEXCEPTION_FORMAT_MESSAGE, ModelFilename, lineCount, ns.ToString()))); } len = (finishIndex_1 - startIndex_1) + 1; text = StringsHelper.ToString(ns.Start + startIndex_1, len); StringsHelper.ToUpperInvariantInPlace(text); pair.Text = text; pair.Weight = weight; callbackAction(ref pair); #endregion #region [.move to next line.] if (!emmf.MoveNext()) { break; } #endregion } #endregion } }
public override string ToString() { return(StringsHelper.ToString(TextPtr) + ", {" + string.Join("; ", WeightClasses) + '}'); }
private static void LoadModelFilenameContentMMF(string modelFilename, LoadModelFilenameContentMMFCallback callbackAction) { using (var emmf = EnumeratorMMF.Create(modelFilename)) { var lineCount = 0; var text = default(string); var weight = default(float); var row = new ModelRow(); var weightClasses = new List <float>(100); var weightClassesLen = -1; #region [.move to first line.] if (!emmf.MoveNext()) { return; } #endregion #region [.skip beginning comments.] for ( ; ;) { lineCount++; #region [.check on comment.] if (*emmf.Current.Start != '#') { break; } #endregion #region [.move to next line.] if (!emmf.MoveNext()) { return; } #endregion } #endregion #region [.read all lines.] for ( ; ;) { lineCount++; var ns = emmf.Current; #region [.skip comment.] if (*ns.Start == '#') { #region [.move to next line.] if (!emmf.MoveNext()) { break; } #endregion continue; } #endregion #region [.first-value in string.] int startIndex_1 = 0; int finishIndex_2 = ns.Length - 1; //search '\t' int startIndex_2 = 0; int finishIndex_1 = 0; for ( ; ;) { if (ns.Start[finishIndex_1] == TABULATION) { startIndex_2 = finishIndex_1 + 1; finishIndex_1--; break; } //not found '\t' if (finishIndex_2 <= ++finishIndex_1) { throw (new InvalidDataException(string.Format(INVALIDDATAEXCEPTION_FORMAT_MESSAGE, modelFilename, lineCount, ns.ToString()))); } } //skip ends white-spaces for ( ; ;) { if (((_CTM[ns.Start[finishIndex_1]] & CharType.IsWhiteSpace) != CharType.IsWhiteSpace) || (--finishIndex_1 <= startIndex_1) ) { break; } } if (finishIndex_1 < startIndex_1) { throw (new InvalidDataException(string.Format(INVALIDDATAEXCEPTION_FORMAT_MESSAGE, modelFilename, lineCount, ns.ToString()))); } #endregion #region [.second-value in string.] //tokinize weight-of-classes int len; for ( ; startIndex_2 <= finishIndex_2; startIndex_2++) { //skip starts white-spaces if (((_CTM[ns.Start[startIndex_2]] & CharType.IsWhiteSpace) == CharType.IsWhiteSpace)) { continue; } //search end of weight-value for (var si = startIndex_2; ;) { if (((_CTM[ns.Start[startIndex_2]] & CharType.IsWhiteSpace) != CharType.IsWhiteSpace)) { if (finishIndex_2 == startIndex_2) { startIndex_2++; } else { startIndex_2++; continue; } } //try parse weight-value len = (startIndex_2 - si);// +1; text = StringsHelper.ToString(ns.Start + si, len); if (!float.TryParse(text, NS, NFI, out weight)) //if ( !Number.TryParseSingle( text, NS, NFI, out weight ) ) { throw (new InvalidDataException(string.Format(INVALIDDATAEXCEPTION_FORMAT_MESSAGE, modelFilename, lineCount, ns.ToString()))); } weightClasses.Add(weight); si = startIndex_2 + 1; break; } } #endregion #region [.fill 'ModelRow' & calling 'callbackAction()'.] if (weightClassesLen == -1) { if (weightClasses.Count == 0) { throw (new InvalidDataException(string.Format(INVALIDDATAEXCEPTION_FORMAT_MESSAGE, modelFilename, lineCount, ns.ToString()) + " => classes weightes not found")); } weightClassesLen = weightClasses.Count; } else if (weightClassesLen != weightClasses.Count) { Debug.WriteLine(string.Format(INVALIDDATAEXCEPTION_FORMAT_MESSAGE, modelFilename, lineCount, ns.ToString()) + " => different count of classes weightes"); continue; //throw (new InvalidDataException( string.Format( INVALIDDATAEXCEPTION_FORMAT_MESSAGE, modelFilename, lineCount, ns.ToString() ) + " => different count of classes weightes" )); } row.TextLength = (finishIndex_1 - startIndex_1) + 1; var textPtr = ns.Start + startIndex_1; textPtr[row.TextLength] = '\0'; StringsHelper.ToUpperInvariantInPlace(textPtr, row.TextLength); row.TextPtr = textPtr; row.WeightClasses = weightClasses; callbackAction(ref row); //clear weight-classes temp-buffer weightClasses.Clear(); #endregion #region [.move to next line.] if (!emmf.MoveNext()) { break; } #endregion } #endregion } }
private void LoadModelFilenameContent(LoadModelFileContentCallback callbackAction) { using (var emmf = EnumeratorMMF.Create(ModelFilename)) { var lineCount = 0; var text = default(string); var probability = default(double); var pair = new Pair(); #region [.read first line.] if (!emmf.MoveNext()) { return; } #endregion #region [.skip beginning comments.] for ( ; ;) { #region [.check on comment.] if (*emmf.Current.Start != '#') { break; } #endregion #region [.move to next line.] if (!emmf.MoveNext()) { return; } #endregion } #endregion #region [.read all lines.] for ( ; ;) { lineCount++; var ns = emmf.Current; #region [.first-value in string.] int startIndex_1 = 0; int finishIndex_2 = ns.Length - 1; //search '\t' int startIndex_2 = 0; int finishIndex_1 = 0; for ( ; ;) { if (ns.Start[finishIndex_1] == '\t') { startIndex_2 = finishIndex_1 + 1; finishIndex_1--; break; } //not found '\t' if (finishIndex_2 <= ++finishIndex_1) { throw (new InvalidDataException(string.Format(INVALIDDATAEXCEPTION_FORMAT_MESSAGE, ModelFilename, lineCount, ns.ToString()))); } } //skip ends white-spaces for ( ; ;) { if (((_CTM[ns.Start[finishIndex_1]] & CharType.IsWhiteSpace) != CharType.IsWhiteSpace) || (--finishIndex_1 <= startIndex_1) ) { break; } } if (finishIndex_1 < startIndex_1) { throw (new InvalidDataException(string.Format(INVALIDDATAEXCEPTION_FORMAT_MESSAGE, ModelFilename, lineCount, ns.ToString()))); } #endregion #region [.second-value in string.] //skip starts white-spaces for ( ; ;) { if (((_CTM[ns.Start[startIndex_2]] & CharType.IsWhiteSpace) != CharType.IsWhiteSpace) || (finishIndex_2 <= ++startIndex_2) ) { break; } } #endregion #region [.fill 'Pair_v2' & calling 'callbackAction()'.] var len = (finishIndex_2 - startIndex_2) + 1; text = StringsHelper.ToString(ns.Start + startIndex_2, len); if (!double.TryParse(text, NS, NFI, out probability)) { throw (new InvalidDataException(string.Format(INVALIDDATAEXCEPTION_FORMAT_MESSAGE, ModelFilename, lineCount, ns.ToString()))); } pair.TextLength = (finishIndex_1 - startIndex_1) + 1; var textPtr = ns.Start + startIndex_1; textPtr[pair.TextLength] = '\0'; StringsHelper.ToUpperInvariantInPlace(textPtr, pair.TextLength); pair.TextPtr = (IntPtr)textPtr; pair.Probability = probability; callbackAction(ref pair); #endregion #region [.move to next line.] if (!emmf.MoveNext()) { break; } #endregion } #endregion } }
public override string ToString() => ($"'{StringsHelper.ToString( TextPtr )}': {Probability}");
private static ModelRecord ToModelRecord(this KeyValuePair <IntPtr, double> p) => new ModelRecord() { Ngram = StringsHelper.ToString(p.Key), Probability = p.Value };
/// <summary> /// создание морфоформы из строки /// </summary> private MorphoFormNative?CreateMorphoForm(MorphoTypeNative morphoType, char *lineBase) { #region find index-of-COLON & check on length var index = IndexOf(lineBase, COLON); if ((index == -1) || (ENDING_BUFFER_SIZE <= index)) { _ModelLoadingErrorCallback("Index of COLON is undefined or length the line is too long", StringsHelper.ToString(lineBase)); return(null); } #endregion #region fill '_ENDING_LOWER_BUFFER' var i = 0; for (char *ptr = lineBase; i < index; ptr++, i++) { var ch = *ptr; if ((_CHARTYPE_MAP[ch] & CharType.IsWhiteSpace) == CharType.IsWhiteSpace) { break; } _ENDING_LOWER_BUFFER[i] = _LOWER_INVARIANT_MAP[ch]; } _ENDING_LOWER_BUFFER[i] = '\0'; #endregion #region fill '_MorphoAttributePairs_Buffer' _MorphoAttributePairs_Buffer.Clear(); for (char *ptr = lineBase + index + 1; ; ptr++) { var ch = *ptr; if (ch == '\0') { break; } if ((_CHARTYPE_MAP[ch] & CharType.IsLetter) != CharType.IsLetter) { continue; } var len = 0; for (; ; ptr++) { ch = *ptr; if (ch == '\0') { break; } var ct = _CHARTYPE_MAP[ch]; if ((ct & CharType.IsLetter) != CharType.IsLetter && (ct & CharType.IsDigit) != CharType.IsDigit) { break; } len++; } if (len != 0) { var morphoAttribute = default(MorphoAttributeEnum); if (_EnumParserMorphoAttribute.TryParse(ptr - len, len, ref morphoAttribute)) { var map = _MorphoAttributeList.TryGetMorphoAttributePair(morphoType.MorphoAttributeGroup, morphoAttribute); if (map.HasValue) { _MorphoAttributePairs_Buffer.Add(map.Value); } #if DEBUG //* //TOO MANY ERRORS AFTER last (2016.12.28) getting morpho-dcitionaries from 'LangAnalyzerStd-[ilook]' else { var attr = new string( ptr - len, 0, len ); _ModelLoadingErrorCallback("Error in morpho-attribute: '" + attr + '\'', StringsHelper.ToString(lineBase)); } //*/ #endif } else { var attr = new string(ptr - len, 0, len); _ModelLoadingErrorCallback("Unknown morpho-attribute: '" + attr + '\'', StringsHelper.ToString(lineBase)); } } if (ch == '\0') { break; } } #endregion #region Allocate native-memory for endingOfWord //* IntPtr endingPtr; IntPtr endingUpperPtr; if ((i == 1) && (_ENDING_LOWER_BUFFER[0] == UNDERLINE)) { endingPtr = _EMPTY_STRING; endingUpperPtr = _EMPTY_STRING; } else { #region ending-in-original-case endingPtr = new IntPtr(_ENDING_LOWER_BUFFER); if (_EndingDictionary.TryGetValue(endingPtr, out IntPtr existsPtr)) { endingPtr = existsPtr; } else { AllocHGlobalAndCopy(_ENDING_LOWER_BUFFER, index, out endingPtr); _EndingDictionary.Add(endingPtr, endingPtr); } #endregion #region ending-in-upper-case StringsHelper.ToUpperInvariant(_ENDING_LOWER_BUFFER, _ENDING_UPPER_BUFFER); endingUpperPtr = new IntPtr(_ENDING_UPPER_BUFFER); if (_EndingDictionary.TryGetValue(endingUpperPtr, out existsPtr)) { endingUpperPtr = existsPtr; } else { AllocHGlobalAndCopy(_ENDING_UPPER_BUFFER, index, out endingUpperPtr); _EndingDictionary.Add(endingUpperPtr, endingUpperPtr); } #endregion } #endregion var morphoForm = new MorphoFormNative((char *)endingPtr, (char *)endingUpperPtr, _MorphoAttributePairs_Buffer); return(morphoForm); }
public override string ToString() => ("[" + StringsHelper.ToString(EndingUpper) + ", {" + string.Join(",", MorphoAttributes) + "}]");
public override string ToString() { return(StringsHelper.ToString(TextPtr) + ", " + Weight + ", " + Language); }