private static void LoadModelFilenameContentMMF(string modelFilename, LoadModelFilenameContentMMFCallback callbackAction) { using (var emmf = EnumeratorMMF.Create(modelFilename)) { var lineCount = 0; var text = default(string); var weight = default(float); var row = new ModelRow(); var weightClasses = new List <float>(100); var weightClassesLen = -1; #region [.move to first line.] if (!emmf.MoveNext()) { return; } #endregion #region [.skip beginning comments.] for ( ; ;) { lineCount++; #region [.check on comment.] if (*emmf.Current.Start != '#') { break; } #endregion #region [.move to next line.] if (!emmf.MoveNext()) { return; } #endregion } #endregion #region [.read all lines.] for ( ; ;) { lineCount++; var ns = emmf.Current; #region [.skip comment.] if (*ns.Start == '#') { #region [.move to next line.] if (!emmf.MoveNext()) { break; } #endregion continue; } #endregion #region [.first-value in string.] int startIndex_1 = 0; int finishIndex_2 = ns.Length - 1; //search '\t' int startIndex_2 = 0; int finishIndex_1 = 0; for ( ; ;) { if (ns.Start[finishIndex_1] == TABULATION) { startIndex_2 = finishIndex_1 + 1; finishIndex_1--; break; } //not found '\t' if (finishIndex_2 <= ++finishIndex_1) { throw (new InvalidDataException(string.Format(INVALIDDATAEXCEPTION_FORMAT_MESSAGE, modelFilename, lineCount, ns.ToString()))); } } //skip ends white-spaces for ( ; ;) { if (((_CTM[ns.Start[finishIndex_1]] & CharType.IsWhiteSpace) != CharType.IsWhiteSpace) || (--finishIndex_1 <= startIndex_1) ) { break; } } if (finishIndex_1 < startIndex_1) { throw (new InvalidDataException(string.Format(INVALIDDATAEXCEPTION_FORMAT_MESSAGE, modelFilename, lineCount, ns.ToString()))); } #endregion #region [.second-value in string.] //tokinize weight-of-classes int len; for ( ; startIndex_2 <= finishIndex_2; startIndex_2++) { //skip starts white-spaces if (((_CTM[ns.Start[startIndex_2]] & CharType.IsWhiteSpace) == CharType.IsWhiteSpace)) { continue; } //search end of weight-value for (var si = startIndex_2; ;) { if (((_CTM[ns.Start[startIndex_2]] & CharType.IsWhiteSpace) != CharType.IsWhiteSpace)) { if (finishIndex_2 == startIndex_2) { startIndex_2++; } else { startIndex_2++; continue; } } //try parse weight-value len = (startIndex_2 - si);// +1; text = StringsHelper.ToString(ns.Start + si, len); if (!float.TryParse(text, NS, NFI, out weight)) //if ( !Number.TryParseSingle( text, NS, NFI, out weight ) ) { throw (new InvalidDataException(string.Format(INVALIDDATAEXCEPTION_FORMAT_MESSAGE, modelFilename, lineCount, ns.ToString()))); } weightClasses.Add(weight); si = startIndex_2 + 1; break; } } #endregion #region [.fill 'ModelRow' & calling 'callbackAction()'.] if (weightClassesLen == -1) { if (weightClasses.Count == 0) { throw (new InvalidDataException(string.Format(INVALIDDATAEXCEPTION_FORMAT_MESSAGE, modelFilename, lineCount, ns.ToString()) + " => classes weightes not found")); } weightClassesLen = weightClasses.Count; } else if (weightClassesLen != weightClasses.Count) { Debug.WriteLine(string.Format(INVALIDDATAEXCEPTION_FORMAT_MESSAGE, modelFilename, lineCount, ns.ToString()) + " => different count of classes weightes"); continue; //throw (new InvalidDataException( string.Format( INVALIDDATAEXCEPTION_FORMAT_MESSAGE, modelFilename, lineCount, ns.ToString() ) + " => different count of classes weightes" )); } row.TextLength = (finishIndex_1 - startIndex_1) + 1; var textPtr = ns.Start + startIndex_1; textPtr[row.TextLength] = '\0'; StringsHelper.ToUpperInvariantInPlace(textPtr, row.TextLength); row.TextPtr = textPtr; row.WeightClasses = weightClasses; callbackAction(ref row); //clear weight-classes temp-buffer weightClasses.Clear(); #endregion #region [.move to next line.] if (!emmf.MoveNext()) { break; } #endregion } #endregion } }
private void LoadModelFilenameContent(LoadModelFilenameContentCallback callbackAction) { using (var emmf = EnumeratorMMF.Create(ModelFilename)) { var lineCount = 0; var text = default(string); var weight = default(float); var pair = new Pair() { Language = this.Language }; #region [.read first line.] if (!emmf.MoveNext()) { return; } #endregion #region [.skip beginning comments.] for ( ; ;) { #region [.check on comment.] if (*emmf.Current.Start != '#') { break; } #endregion #region [.move to next line.] if (!emmf.MoveNext()) { return; } #endregion } #endregion #region [.read all lines.] for ( ; ;) { lineCount++; var ns = emmf.Current; #region [.first-value in string.] int startIndex_1 = 0; int finishIndex_2 = ns.Length - 1; #region commented //skip starts white-spaces /*for ( ; ; ) * { * if ( ((_CTM[ ns.Start[ startIndex_1 ] ] & CharType.IsWhiteSpace) != CharType.IsWhiteSpace) || * (finishIndex_2 <= ++startIndex_1) * ) * { * break; * } * }*/ #endregion //search '\t' int startIndex_2 = 0; int finishIndex_1 = 0; for ( ; ;) { if (ns.Start[finishIndex_1] == '\t') { startIndex_2 = finishIndex_1 + 1; finishIndex_1--; break; } //not found '\t' if (finishIndex_2 <= ++finishIndex_1) { throw (new InvalidDataException(string.Format(INVALIDDATAEXCEPTION_FORMAT_MESSAGE, ModelFilename, lineCount, ns.ToString()))); } } //skip ends white-spaces for ( ; ;) { if (((_CTM[ns.Start[finishIndex_1]] & CharType.IsWhiteSpace) != CharType.IsWhiteSpace) || (--finishIndex_1 <= startIndex_1) ) { break; } } if (finishIndex_1 < startIndex_1) { throw (new InvalidDataException(string.Format(INVALIDDATAEXCEPTION_FORMAT_MESSAGE, ModelFilename, lineCount, ns.ToString()))); } #endregion #region [.second-value in string.] //skip starts white-spaces for ( ; ;) { if (((_CTM[ns.Start[startIndex_2]] & CharType.IsWhiteSpace) != CharType.IsWhiteSpace) || (finishIndex_2 <= ++startIndex_2) ) { break; } } #region commented //skip ends white-spaces /*for ( ; ; ) * { * if ( ((_CTM[ ns.Start[ finishIndex_2 ] ] & CharType.IsWhiteSpace) != CharType.IsWhiteSpace) || * (--finishIndex_2 <= startIndex_1) * ) * { * break; * } * }*/ #endregion #endregion #region [.fill 'Pair_v1' & calling 'callbackAction()'.] var len = (finishIndex_2 - startIndex_2) + 1; text = StringsHelper.ToString(ns.Start + startIndex_2, len); if (!float.TryParse(text, NS, NFI, out weight)) //if ( !Number.TryParseSingle( text, NS, NFI, out weight ) ) { throw (new InvalidDataException(string.Format(INVALIDDATAEXCEPTION_FORMAT_MESSAGE, ModelFilename, lineCount, ns.ToString()))); } len = (finishIndex_1 - startIndex_1) + 1; text = StringsHelper.ToString(ns.Start + startIndex_1, len); StringsHelper.ToUpperInvariantInPlace(text); pair.Text = text; pair.Weight = weight; callbackAction(ref pair); #endregion #region [.move to next line.] if (!emmf.MoveNext()) { break; } #endregion } #endregion } }
private void LoadModelFilenameContent(LoadModelFileContentCallback callbackAction) { using (var emmf = EnumeratorMMF.Create(ModelFilename)) { var lineCount = 0; var text = default(string); var probability = default(double); var pair = new Pair(); #region [.read first line.] if (!emmf.MoveNext()) { return; } #endregion #region [.skip beginning comments.] for ( ; ;) { #region [.check on comment.] if (*emmf.Current.Start != '#') { break; } #endregion #region [.move to next line.] if (!emmf.MoveNext()) { return; } #endregion } #endregion #region [.read all lines.] for ( ; ;) { lineCount++; var ns = emmf.Current; #region [.first-value in string.] int startIndex_1 = 0; int finishIndex_2 = ns.Length - 1; //search '\t' int startIndex_2 = 0; int finishIndex_1 = 0; for ( ; ;) { if (ns.Start[finishIndex_1] == '\t') { startIndex_2 = finishIndex_1 + 1; finishIndex_1--; break; } //not found '\t' if (finishIndex_2 <= ++finishIndex_1) { throw (new InvalidDataException(string.Format(INVALIDDATAEXCEPTION_FORMAT_MESSAGE, ModelFilename, lineCount, ns.ToString()))); } } //skip ends white-spaces for ( ; ;) { if (((_CTM[ns.Start[finishIndex_1]] & CharType.IsWhiteSpace) != CharType.IsWhiteSpace) || (--finishIndex_1 <= startIndex_1) ) { break; } } if (finishIndex_1 < startIndex_1) { throw (new InvalidDataException(string.Format(INVALIDDATAEXCEPTION_FORMAT_MESSAGE, ModelFilename, lineCount, ns.ToString()))); } #endregion #region [.second-value in string.] //skip starts white-spaces for ( ; ;) { if (((_CTM[ns.Start[startIndex_2]] & CharType.IsWhiteSpace) != CharType.IsWhiteSpace) || (finishIndex_2 <= ++startIndex_2) ) { break; } } #endregion #region [.fill 'Pair_v2' & calling 'callbackAction()'.] var len = (finishIndex_2 - startIndex_2) + 1; text = StringsHelper.ToString(ns.Start + startIndex_2, len); if (!double.TryParse(text, NS, NFI, out probability)) { throw (new InvalidDataException(string.Format(INVALIDDATAEXCEPTION_FORMAT_MESSAGE, ModelFilename, lineCount, ns.ToString()))); } pair.TextLength = (finishIndex_1 - startIndex_1) + 1; var textPtr = ns.Start + startIndex_1; textPtr[pair.TextLength] = '\0'; StringsHelper.ToUpperInvariantInPlace(textPtr, pair.TextLength); pair.TextPtr = (IntPtr)textPtr; pair.Probability = probability; callbackAction(ref pair); #endregion #region [.move to next line.] if (!emmf.MoveNext()) { break; } #endregion } #endregion } }