Esempio n. 1
0
        public void CryptDecryptWord()
        {
            var reader = new VReader(Consts.VOCAB_PATH);

            reader.UploadBinary();

            var w  = reader.Vocab.GetRepresentationFor("Hello");
            var ww = new Representation(w.NumericVector);


            var www = reader.Vocab.Distance(ww, 1).FirstOrDefault()?.Representation.WordOrNull;

            Trace.WriteLine(www);
        }
        protected async Task <TrainSet> ScheduleProcessTwitter(TrainSet data)
        {
            var collection = await _twitterCollectionsStore.Get(data.SourceId);

            if (collection == null)
            {
                data.SetFailed("Can not find source data");
                return(data);
            }

            var sources = await _twitterSourcesStore.GetBy(x => x.CollectionId == collection.Id);

            if (sources == null || !sources.Any())
            {
                data.SetFailed("Can not find any twitter sources");
                return(data);
            }

            var user = await _userStore.FindByIdAsync(collection.UserId);

            if (user == null)
            {
                data.SetFailed("Can not find user data");
                return(data);
            }

            var userTwitter = await _userSocialsStore.GetTwitter(user.Id);

            if (userTwitter == null)
            {
                data.SetFailed("No twitter access token");
                return(data);
            }

            try
            {
                OAuthTwitter(userTwitter);
            }
            catch
            {
                data.SetFailed("Error with twitter connections");
                return(data);
            }

            // upload twitter data

            int min = data.MinCount;

            min = Math.Max(1, min);

            int max = data.MaxCount;

            max = Math.Max(100, max);
            max = Math.Min(10000, max);

            if (min > max)
            {
                var t = min;
                max = min;
                min = t;
            }

            int perSource = (int)Math.Ceiling((double)max / sources.Count);
            var entity    = new TrainSetModel {
            };

            var rawData = new StringBuilder();
            int total   = 0;

            var regex = new Regex("http[s]?://[A-Za-z0-9._-]*");

            foreach (var screen in sources)
            {
                long?lastId      = null;
                int  count       = 0;
                var  twetterUser = await UserAsync.GetUserFromId(screen.TwitterId);

                while (perSource > count)
                {
                    var @params = new UserTimelineParameters
                    {
                        MaximumNumberOfTweetsToRetrieve = 50,
                    };

                    if (lastId.HasValue)
                    {
                        @params.SinceId = lastId.Value;
                    }

                    var tweets = await TimelineAsync.GetUserTimeline(twetterUser, @params);

                    if (tweets == null || !tweets.Any())
                    {
                        break;
                    }

                    count += tweets.Count();
                    foreach (var t in tweets)
                    {
                        rawData.Append(regex.Replace(t.FullText, string.Empty));
                    }
                }

                total += count;
            }

            if (total < min)
            {
                data.SetFailed($"Not enough data avaliable. Avaliable : {total}. Minimum: {min}");
                return(data);
            }

            WordBag wb = WordBag.CreateToWords(rawData.ToString(), data.InputWordsCount + 1);

            _vReader.UploadBinary();

            List <Tuple <string[], string[]> > stringList = new List <Tuple <string[], string[]> >();
            List <Tuple <double[], double[]> > doubleList = new List <Tuple <double[], double[]> >();

            foreach (var s in wb.Read())
            {
                var vectorList = new List <double[]>();
                var wordList   = new List <string>();

                foreach (var ss in s)
                {
                    var word = _vReader.Vocab.GetRepresentationOrNullFor(ss);
                    if (word == null)
                    {
                        break;
                    }

                    vectorList.Add(ss.Select(x => (double)x).ToArray());
                    wordList.Add(ss);
                }

                if (vectorList.Count < s.Length)
                {
                    continue;
                }

                var tmpVector = new List <double>();
                foreach (var i in vectorList.Take(data.InputWordsCount))
                {
                    tmpVector.AddRange(i);
                }

                doubleList.Add(new Tuple <double[], double[]>(tmpVector.ToArray(), vectorList.Last().ToArray()));
                stringList.Add(new Tuple <string[], string[]>(wordList.Take(wordList.Count - 1).ToArray(), new string[1] {
                    wordList.Last()
                }));
            }

            entity.Data         = doubleList.ToArray();
            entity.StringSource = stringList.ToArray();

            string dataString = JsonConvert.SerializeObject(entity);

            await _storageBlobClient.SetContainer(CONTAINER_NAME, true);

            var storageKey = await _storageBlobClient.WriteText(dataString);

            if (string.IsNullOrWhiteSpace(storageKey))
            {
                data.SetFailed("Can not upload train set to storage");
                return(data);
            }

            data.StorageKey = storageKey;
            data.SetReady();

            data.ExamplesCount = entity.Data.Count();

            return(data);
        }
Esempio n. 3
0
        public async Task NeuralWordsTest()
        {
            // get GAME OF THRONES

            string regexText = string.Empty;

            try
            {
                var fullText = await File.ReadAllTextAsync(Consts.GAME_OF_THRONES_PATH);

                regexText = new Regex("Page [0-9]+").Replace(fullText, string.Empty);
            }
            catch (Exception ex)
            {
                var e = ex;
            }


            var vReader = new VReader(Consts.VOCAB_PATH);

            vReader.UploadBinary();

            var bag = MRWordBag.CreateToWords(regexText, 4);

            // create traine vectors
            var allSet = new List <Tuple <double[], double[]> >();

            foreach (var step in bag.Read())
            {
                bool isValid = true;
                foreach (var v in step)
                {
                    if (!vReader.Vocab.ContainsWord(v) || !vReader.Vocab.ContainsWord(v))
                    {
                        isValid = false;
                        break;
                    }
                }

                if (!isValid)
                {
                    continue;
                }

                var           forInput = step.Take(3);
                List <double> input    = new List <double>();
                foreach (var i in forInput)
                {
                    input.AddRange(vReader.Vocab.GetRepresentationFor(i).NumericVector.Select(x => (double)x).ToList());
                }

                var      forOut = step.Last();
                double[] output = vReader.Vocab.GetRepresentationFor(forOut).NumericVector.Select(x => (double)x).ToArray();

                allSet.Add(new Tuple <double[], double[]>(input.ToArray(), output));
            }

            var trainSet = allSet.Take(allSet.Count - 10).ToArray();
            var checkSet = allSet.TakeLast(10).ToArray();

            var trainRates = new double[] { 0.00005d, 0.00001d };

            foreach (var rate in trainRates)
            {
                foreach (var net in NetsWordTest)
                {
                    Trace.WriteLine($"Train net: layers: {net.HiddenLayersCount} | neurons: {net.Hidden.First().NeuronsCount}\tRate: {rate}");
                    var trainer     = new NeuralNetTrainer(net, trainSet, 500, 1, rate, 1, Consts.TraceLog);
                    var trainResult = trainer.SimpleTrain();

                    Trace.WriteLine("-- check net --");
                    foreach (var s in checkSet)
                    {
                        var response = net.Activate(s.Item1);

                        var responseR    = new Representation(response.Select(x => (float)x).ToArray());
                        var responseWord = vReader.Vocab.Distance(responseR, 1)?.FirstOrDefault()?.Representation;

                        var correct = vReader.Vocab.Distance(new Representation(s.Item2.Select(x => (float)x).ToArray()), 1)?.FirstOrDefault()?.Representation;

                        Trace.WriteLine($"Correct: {correct.WordOrNull}\tResponse: {responseWord.WordOrNull}");
                    }

                    var name = $"Neural net ({net.HiddenLayersCount}-{net.Hidden.First().NeuronsCount}-epochs-{trainResult.EpochFinished}-error-{trainResult.ResultError}-time-{trainResult.TotalTimeMs})";
                    await MRSerializer.ToFile($"d://{name}.txt", net, true);
                }
            }
        }