private Action GetEnsureTokensAndTypesVecDel() { Contracts.Assert(_groupInfo.SrcColType.IsVector); var tokLang = _groupInfo.Lang; var lang = default(DvText); var srcVec = default(VBuffer<DvText>); var getSrcVec = _source.GetGetter<VBuffer<DvText>>(_groupInfo.SrcColIndex); GetSpansSimpleCallback addTokenAndTypeCallback = AddTokenAndType; return () => { Contracts.Assert(_source.Position >= 0); Contracts.Assert(_position <= _source.Position); if (_position < _source.Position) { var langToUse = tokLang; UpdateLanguage(ref langToUse, ref lang); getSrcVec(ref srcVec); ClearLists(); for (int i = 0; i < srcVec.Count; i++) Tokenize(ref srcVec.Values[i], langToUse, addTokenAndTypeCallback); CopyLists(); _position = _source.Position; } }; }
private Action GetEnsureTokensAndTypesOneDel() { Contracts.Assert(!_groupInfo.SrcColType.IsVector); var tokLang = _groupInfo.Lang; var lang = default(DvText); var src = default(DvText); var getSrc = _source.GetGetter<DvText>(_groupInfo.SrcColIndex); GetSpansSimpleCallback addTokenAndTypeCallback = AddTokenAndType; return () => { Contracts.Assert(_source.Position >= 0); Contracts.Assert(_position <= _source.Position); if (_position < _source.Position) { var langToUse = tokLang; UpdateLanguage(ref langToUse, ref lang); getSrc(ref src); ClearLists(); Tokenize(ref src, langToUse, addTokenAndTypeCallback); CopyLists(); _position = _source.Position; } }; }
/// <summary> /// Add tokens (and types if required) that are computed for src to _tokensList/_typesList. /// </summary> private void Tokenize(ref DvText src, Language lang, GetSpansSimpleCallback addTokenAndType) { Contracts.Assert(_typesList == null || _tokensList.Count == _typesList.Count); if (!src.HasChars) return; int ichMin; int ichLim; string text = src.GetRawUnderlyingBufferInfo(out ichMin, out ichLim); Tokenizers[(int)lang].GetSpansSimple(text, ichMin, ichLim - ichMin, addTokenAndType); Contracts.Assert(_typesList == null || _tokensList.Count == _typesList.Count); }