Esempio n. 1
0
        private void Header()
        {
            Token = Tokenization.GetToken();
            if (!Token.IsProgram())
            {
                throw new ExpectedException("PROGRAM", Token.Value, null);
            }
            TokenStack.Push(Token);

            AddCode(CmsCodeFactory.LSP(new CmsCode(0X0010)));
            GenerateVariableArea();
        }
Esempio n. 2
0
        private void GenerateVariableArea()
        {
            Token = Tokenization.GetTokenIgnoreSpace();
            var jmpReference = new CmsCode(0X00);
            var jmp          = CmsCodeFactory.JMP(jmpReference);

            AddCode(jmp);

            while (Token != null && !Token.IsBegin())
            {
                if (Token.IsVar())
                {
                    Token = Tokenization.GetTokenIgnoreSpace();
                    VariableArea.Add(Token.Value, new CmsCode(CodesLengh));
                    Malock();
                }

                Token = Tokenization.GetTokenIgnoreSpace();
            }

            jmpReference.ValueDecimal = CodesLengh;
        }
Esempio n. 3
0
 public tokenization Tokenization(Tokenization tokenization)
 {
     return(_tokenizations[tokenization]);
 }
Esempio n. 4
0
        public static void Train(Models.LangProfile profile)
        {
            var  tokenizer = Tokenization.Tokenizer(profile.PType);
            int  lo        = int.Parse(profile.MinGram);
            int  hi        = int.Parse(profile.MaxGram);
            bool tlc       = profile.CaseSensitive == "tlc";
            int  n         = -1; // int.Parse(args[4]);
            var  cleaner   = Cleaning.MakeCleaner("none");
            //var inFileNames = File.ReadAllLines(args[6]);
            //var inFileNames = profile.Files.Select(t => t. Directory.EnumerateFiles(Path.Combine(Directory.GetCurrentDirectory(), "Data"), "*", SearchOption.AllDirectories);
            var nfolds = -1;
            var fold   = -1;
            //string out_profile = Path.Combine(Directory.GetCurrentDirectory(), profile.ProfileName + ".bin.gz");
            //string out_profile = Path.Combine(profile.Path, profile.ProfileName + ".bin.gz");
            string out_profile = profile.ProfileFilePath;

            using (var bw = new BinaryWriter(new GZipStream(new FileStream(out_profile, FileMode.Create, FileAccess.Write), CompressionMode.Compress)))
            {
                bw.Write(profile.PType);
                bw.Write(lo);
                bw.Write(hi);
                bw.Write(tlc);
                bw.Write(profile.Files.Count());
                foreach (var eafile in profile.Files)
                {
                    //var langCode = inFileName.Substring(0, inFileName.IndexOf("_"));
                    var          langCode = eafile.Label;
                    long         absCnt   = 0;
                    MemoryStream tmpFile  = new MemoryStream();
                    using (var rd = new StreamReader(eafile.FilePath))
                    {
                        using (var wr = new StreamWriter(tmpFile))
                        {
                            for (; ;)
                            {
                                var text = rd.ReadLine();
                                if (text == null)
                                {
                                    break;
                                }
                                if (fold == -1 || (absCnt % nfolds) != fold)
                                {
                                    wr.WriteLine(cleaner(text));
                                }
                                absCnt++;
                            }
                        }
                    }
                    using (var rd = new StreamReader(new MemoryStream(tmpFile.ToArray())))
                    {
                        var distro = new Dictionary <string, long>();
                        foreach (var tok in tokenizer(EnumFromRd(rd), tlc, lo, hi))
                        {
                            if (!distro.ContainsKey(tok))
                            {
                                distro[tok] = 1;
                            }
                            else
                            {
                                distro[tok]++;
                            }
                        }
                        var orderedDistro = n > 0
                          ? distro.OrderByDescending(x => x.Value).Take(n)
                          : distro.OrderByDescending(x => x.Value);
                        bw.Write(langCode);
                        bw.Write(orderedDistro.LongCount());
                        long grams = 0;
                        long occs  = 0;
                        foreach (var kv in orderedDistro)
                        {
                            bw.Write(kv.Key);
                            bw.Write(kv.Value);
                            grams++;
                            occs += kv.Value;
                        }
                        Console.WriteLine("{0}\t{1}\t{2}\t{3}", langCode, absCnt, grams, occs);
                    }
                }
            }
        }
Esempio n. 5
0
 public void MoveNextToken() => Token = Tokenization.GetTokenIgnoreSpace();
Esempio n. 6
0
 public FilterOptions(Format format, Tokenization tokenization)
 {
     Format       = format;
     Tokenization = tokenization;
 }