public void CryptDecryptWord() { var reader = new VReader(Consts.VOCAB_PATH); reader.UploadBinary(); var w = reader.Vocab.GetRepresentationFor("Hello"); var ww = new Representation(w.NumericVector); var www = reader.Vocab.Distance(ww, 1).FirstOrDefault()?.Representation.WordOrNull; Trace.WriteLine(www); }
protected async Task <TrainSet> ScheduleProcessTwitter(TrainSet data) { var collection = await _twitterCollectionsStore.Get(data.SourceId); if (collection == null) { data.SetFailed("Can not find source data"); return(data); } var sources = await _twitterSourcesStore.GetBy(x => x.CollectionId == collection.Id); if (sources == null || !sources.Any()) { data.SetFailed("Can not find any twitter sources"); return(data); } var user = await _userStore.FindByIdAsync(collection.UserId); if (user == null) { data.SetFailed("Can not find user data"); return(data); } var userTwitter = await _userSocialsStore.GetTwitter(user.Id); if (userTwitter == null) { data.SetFailed("No twitter access token"); return(data); } try { OAuthTwitter(userTwitter); } catch { data.SetFailed("Error with twitter connections"); return(data); } // upload twitter data int min = data.MinCount; min = Math.Max(1, min); int max = data.MaxCount; max = Math.Max(100, max); max = Math.Min(10000, max); if (min > max) { var t = min; max = min; min = t; } int perSource = (int)Math.Ceiling((double)max / sources.Count); var entity = new TrainSetModel { }; var rawData = new StringBuilder(); int total = 0; var regex = new Regex("http[s]?://[A-Za-z0-9._-]*"); foreach (var screen in sources) { long?lastId = null; int count = 0; var twetterUser = await UserAsync.GetUserFromId(screen.TwitterId); while (perSource > count) { var @params = new UserTimelineParameters { MaximumNumberOfTweetsToRetrieve = 50, }; if (lastId.HasValue) { @params.SinceId = lastId.Value; } var tweets = await TimelineAsync.GetUserTimeline(twetterUser, @params); if (tweets == null || !tweets.Any()) { break; } count += tweets.Count(); foreach (var t in tweets) { rawData.Append(regex.Replace(t.FullText, string.Empty)); } } total += count; } if (total < min) { data.SetFailed($"Not enough data avaliable. Avaliable : {total}. Minimum: {min}"); return(data); } WordBag wb = WordBag.CreateToWords(rawData.ToString(), data.InputWordsCount + 1); _vReader.UploadBinary(); List <Tuple <string[], string[]> > stringList = new List <Tuple <string[], string[]> >(); List <Tuple <double[], double[]> > doubleList = new List <Tuple <double[], double[]> >(); foreach (var s in wb.Read()) { var vectorList = new List <double[]>(); var wordList = new List <string>(); foreach (var ss in s) { var word = _vReader.Vocab.GetRepresentationOrNullFor(ss); if (word == null) { break; } vectorList.Add(ss.Select(x => (double)x).ToArray()); wordList.Add(ss); } if (vectorList.Count < s.Length) { continue; } var tmpVector = new List <double>(); foreach (var i in vectorList.Take(data.InputWordsCount)) { tmpVector.AddRange(i); } doubleList.Add(new Tuple <double[], double[]>(tmpVector.ToArray(), vectorList.Last().ToArray())); stringList.Add(new Tuple <string[], string[]>(wordList.Take(wordList.Count - 1).ToArray(), new string[1] { wordList.Last() })); } entity.Data = doubleList.ToArray(); entity.StringSource = stringList.ToArray(); string dataString = JsonConvert.SerializeObject(entity); await _storageBlobClient.SetContainer(CONTAINER_NAME, true); var storageKey = await _storageBlobClient.WriteText(dataString); if (string.IsNullOrWhiteSpace(storageKey)) { data.SetFailed("Can not upload train set to storage"); return(data); } data.StorageKey = storageKey; data.SetReady(); data.ExamplesCount = entity.Data.Count(); return(data); }
public async Task NeuralWordsTest() { // get GAME OF THRONES string regexText = string.Empty; try { var fullText = await File.ReadAllTextAsync(Consts.GAME_OF_THRONES_PATH); regexText = new Regex("Page [0-9]+").Replace(fullText, string.Empty); } catch (Exception ex) { var e = ex; } var vReader = new VReader(Consts.VOCAB_PATH); vReader.UploadBinary(); var bag = MRWordBag.CreateToWords(regexText, 4); // create traine vectors var allSet = new List <Tuple <double[], double[]> >(); foreach (var step in bag.Read()) { bool isValid = true; foreach (var v in step) { if (!vReader.Vocab.ContainsWord(v) || !vReader.Vocab.ContainsWord(v)) { isValid = false; break; } } if (!isValid) { continue; } var forInput = step.Take(3); List <double> input = new List <double>(); foreach (var i in forInput) { input.AddRange(vReader.Vocab.GetRepresentationFor(i).NumericVector.Select(x => (double)x).ToList()); } var forOut = step.Last(); double[] output = vReader.Vocab.GetRepresentationFor(forOut).NumericVector.Select(x => (double)x).ToArray(); allSet.Add(new Tuple <double[], double[]>(input.ToArray(), output)); } var trainSet = allSet.Take(allSet.Count - 10).ToArray(); var checkSet = allSet.TakeLast(10).ToArray(); var trainRates = new double[] { 0.00005d, 0.00001d }; foreach (var rate in trainRates) { foreach (var net in NetsWordTest) { Trace.WriteLine($"Train net: layers: {net.HiddenLayersCount} | neurons: {net.Hidden.First().NeuronsCount}\tRate: {rate}"); var trainer = new NeuralNetTrainer(net, trainSet, 500, 1, rate, 1, Consts.TraceLog); var trainResult = trainer.SimpleTrain(); Trace.WriteLine("-- check net --"); foreach (var s in checkSet) { var response = net.Activate(s.Item1); var responseR = new Representation(response.Select(x => (float)x).ToArray()); var responseWord = vReader.Vocab.Distance(responseR, 1)?.FirstOrDefault()?.Representation; var correct = vReader.Vocab.Distance(new Representation(s.Item2.Select(x => (float)x).ToArray()), 1)?.FirstOrDefault()?.Representation; Trace.WriteLine($"Correct: {correct.WordOrNull}\tResponse: {responseWord.WordOrNull}"); } var name = $"Neural net ({net.HiddenLayersCount}-{net.Hidden.First().NeuronsCount}-epochs-{trainResult.EpochFinished}-error-{trainResult.ResultError}-time-{trainResult.TotalTimeMs})"; await MRSerializer.ToFile($"d://{name}.txt", net, true); } } }