public Sentence AnalizeText(string sourceText) { var sentence = new Sentence(); var bytes = Encoding.Unicode.GetBytes(sourceText); var res = Encoding.Convert(Encoding.Unicode, encoder, bytes); var ptr = morfeusz_analyse(res); var obj = (InterpMorf)Marshal.PtrToStructure(ptr, typeof(InterpMorf)); while (!string.IsNullOrWhiteSpace(obj.Forma)) // && obj.Haslo != null && obj.Interp != null) // todo: warunek do poprawy - spelniaja go liczby i znaczki ktore sa bez znaczenia! { ptr = ptr + Marshal.SizeOf(typeof(InterpMorf)); var forma = EncodeCWord(obj.Forma); if(!string.IsNullOrWhiteSpace(obj.Haslo)) { var haslo = EncodeCWord(obj.Haslo); var tag = converter.Convert(forma, haslo, obj.Interp); sentence.AddTaggedWords(tag); } obj = (InterpMorf)Marshal.PtrToStructure(ptr, typeof(InterpMorf)); } return sentence; }
public Vector Convert(Sentence sentence) { var nouns = GetNouns(sentence); var result = new ushort[nouns.Count()]; var i = 0; foreach (var noun in nouns) { var word = noun.Tags.First().Word; AddToBag(word); result[i] = words[word]; ++i; } return new Vector(result); }
private IEnumerable<TaggedWord> GetNouns(Sentence sentence) { return sentence.Words.Where(x => x.Tags.Any(t => t.PartOfSpeach == PartOfSpeach.Noun)); }