コード例 #1
0
ファイル: LoadingTests.cs プロジェクト: richorama/NWord2Vec
 public void TestReLoadingText()
 {
     var model = Model.Load("model.txt");
     Model m2;
     using (var s = new MemoryStream())
     {
         using (var writer = new TextModelWriter(s, true))
         {
             writer.Write(model);
         }
         s.Seek(0, SeekOrigin.Begin);
         using (var tmr = new TextModelReader(s))
         {
             m2 = Model.Load(tmr);
         }
     }
     Assert.AreEqual(model.Words, m2.Words);
     Assert.AreEqual(model.Size, m2.Size);
 }
コード例 #2
0
ファイル: LoadingTests.cs プロジェクト: richorama/NWord2Vec
        public void TestReLoadingText()
        {
            var   model = Model.Load("model.txt");
            Model m2;

            using (var s = new MemoryStream())
            {
                using (var writer = new TextModelWriter(s, true))
                {
                    writer.Write(model);
                }
                s.Seek(0, SeekOrigin.Begin);
                var tmr = new TextModelReader(s);
                {
                    m2 = Model.Load(tmr);
                }
            }
            Assert.AreEqual(model.Words, m2.Words);
            Assert.AreEqual(model.Size, m2.Size);
        }
コード例 #3
0
        static void Main(string[] args)
        {
            try
            {
                IText        text              = new Text();
                IConcordance concordance       = new Concordance();
                var          concordanceParser = new ConcordanceParser();
                var          configuration     = new GlobalConfiguration();

                if (args.Length != 0)
                {
                    configuration.FileNames = args;
                }
                else
                {
                    using var reader = new StreamReader("../../../config.json");
                    var json = reader.ReadToEnd();
                    configuration = JsonConvert.DeserializeObject <GlobalConfiguration>(json);
                }


                foreach (var fileName in configuration.FileNames)
                {
                    using var stream = new StreamReader(new FileStream(fileName, FileMode.Open));
                    var textParser = new TextParser();
                    var textReader = new TextModelReader(stream);
                    text = textParser.ParseText(textReader.ReadAllText());
                }

                foreach (var fileName in configuration.FileNames)
                {
                    using var stream = new StreamReader(new FileStream(fileName, FileMode.Open));
                    var textParser = new TextParser();
                    var textReader = new TextModelReader(stream);
                    concordance = concordanceParser.ParseText(textReader.ReadAllText());
                }

                var jsonText = JsonTextSerializer.Serialize(text);
                var jsonConc = JsonTextSerializer.Serialize(concordance);

                using (var writer = new StreamWriter("../../../text.json"))
                {
                    var textModelWriter = new TextModelWriter(writer);
                    textModelWriter.Write(jsonText);
                }

                using (var writer = new StreamWriter("../../../concordance.json"))
                {
                    var textModelWriter = new TextModelWriter(writer);
                    textModelWriter.Write(jsonConc);
                }
                Console.WriteLine();
                Console.WriteLine("----Select words from question sentences with length 10------------------------");
                Console.WriteLine();
                foreach (var word in text.GetWordsFromQuestionSentences(10))
                {
                    Console.WriteLine(word);
                }
                Console.WriteLine();
                Console.WriteLine("----Order sentences by words count-------------------------");
                Console.WriteLine();
                foreach (var sentence in text.OrderSentencesByWordsCount())
                {
                    Console.Write(sentence);
                    Console.Write(" --- ");
                    Console.Write($"{sentence.WordsCount} words");
                    Console.WriteLine();
                }
                Console.WriteLine();
                Console.WriteLine("-----Deleting words with length 10--------------");
                Console.WriteLine();
                text.DeleteWords(10);
                foreach (var sentence in text.Sentences)
                {
                    Console.WriteLine(sentence);
                }
                Console.WriteLine();
                Console.WriteLine("-----Replacing words: \"In\" replace by \"In word replaced\"----------------");
                Console.WriteLine();
                text.ReplaceWord("In", "In word replaced");
                foreach (var sentence in text.Sentences)
                {
                    Console.WriteLine(sentence);
                }

                Console.WriteLine("------------------------------------");
            }
            catch (Exception e)
            {
                Console.WriteLine(e.Message);
            }
        }
コード例 #4
0
ファイル: TagSet.cs プロジェクト: nyanyanya/la-pcfg
        public void DumpToStream(TextModelWriter sw)
        {
            sw.Write(typeof(TagSet).FullName);

            sw.WriteOption("VER", VER);

            sw.NestLevel += 1;

            sw.WriteOption("ROOT", ROOT);
            PTs.DumpToStream(sw);
            NTs.DumpToStream(sw);

            sw.NestLevel -= 1;

            sw.Write(typeof(TagSet).FullName);
        }
コード例 #5
0
ファイル: Vocabulary.cs プロジェクト: nyanyanya/la-pcfg
        public void DumpToStream(TextModelWriter sw)
        {
            var name = typeof(Vocabulary).FullName;
            sw.Write(name);
            sw.WriteOption("SIG", SIG);
            sw.WriteOption("VER", VER);
            sw.WriteOption("knownWordCount", vocab.Count);
            //sw.WriteOption("rareWordCount", rareVocab.Count);
            sw.WriteOption("sigCount", signitureVocab.Count);
            sw.NestLevel += 1;

            vocab.DumpToStream(sw);
            //rareVocab.DumpToStream(sw);
            signitureVocab.DumpToStream(sw);

            sw.NestLevel -= 1;
            sw.Write(name);
        }
コード例 #6
0
ファイル: LAPCFGrammar.cs プロジェクト: nyanyanya/la-pcfg
        public void DumpToStream(TextModelWriter sw, TagSet tagSet, Vocabulary vocab)
        {
            var name = typeof(LAPCFGrammar).FullName;

            sw.Write(name);
            sw.WriteOption("VER", VER);
            sw.WriteOption("NTCount", NTCount);
            sw.WriteOption("PTCount", PTCount);
            sw.WriteOption("ROOTID", ROOTID);
            sw.Write("TerminalRule");
            sw.NestLevel += 1;
            foreach (var x in trules)
            {
                if (x != null)
                {
                    foreach (var y in x)
                    {
                        if (y != null)
                        {
                            var word = vocab.GetWordString(y.word);
                            var tag = tagSet.GetTagString(y.tag);
                            for (int p = 0; p < y.scores.Length; ++p)
                            {
                                if (!double.IsInfinity(y.scores [p]) && !double.IsNaN(y.scores [p]))
                                {
                                    sw.Write(string.Format("{0}_{1}\t{2}\t{3}", tag, p, word, y.scores [p]));
                                }
                            }
                        }
                    }
                }
            }
            sw.NestLevel -= 1;
            sw.Write("UnaryRule");
            sw.NestLevel += 1;
            foreach (var x in urules)
            {
                if (x != null)
                {
                    foreach (var y in x)
                    {
                        if (y != null)
                        {
                            var ptag = tagSet.GetTagString(y.ptag);
                            var ctag = tagSet.GetTagString(y.ctag);
                            for (int c = 0; c < y.scores.Length; ++c)
                            {
                                for (int p = 0; p < y.scores[c].Length; ++p)
                                {
                                    if (!double.IsInfinity(y.scores [c] [p]) && !double.IsNaN(y.scores [c] [p]))
                                    {
                                        sw.Write(string.Format("{0}_{1}\t{2}_{3}\t{4}", ptag, p, ctag, c, y.scores [c] [p]));
                                    }
                                }
                            }
                        }
                    }
                }
            }
            sw.NestLevel -= 1;
            sw.Write("BinaryRule");
            sw.NestLevel += 1;
            foreach (var x in brules)
            {
                if (x != null)
                {
                    foreach (var y in x)
                    {
                        if (y != null)
                        {
                            foreach (var z in y)
                            {
                                if (z != null)
                                {
                                    var ptag = tagSet.GetTagString(z.ptag);
                                    var ltag = tagSet.GetTagString(z.ltag);
                                    var rtag = tagSet.GetTagString(z.rtag);
                                    for (int l = 0; l < z.scores.Length; ++l)
                                    {
                                        for (int r = 0; r < z.scores[l].Length; ++r)
                                        {
                                            for (int p = 0; p < z.scores[l][r].Length; ++p)
                                            {
                                                if (!double.IsInfinity(z.scores [l] [r] [p]) && !double.IsNaN(z.scores [l] [r] [p]))
                                                {
                                                    sw.Write(
                                                        string.Format("{0}_{1}\t{2}_{3}\t{4}_{5}\t{6}",
                                                        ptag, p, ltag, l, rtag, r, z.scores [l] [r] [p])
                                                    );
                                                }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            sw.NestLevel -= 1;

            sw.WriteOption("TraceCount", subtagTraces.Count);
            foreach (var trace in subtagTraces)
            {
                sw.WriteOption("TRACE", trace.Length);
                sw.NestLevel += 1;
                foreach (var t in trace)
                {
                    sw.Write(string.Join(" ", t));
                }
                sw.NestLevel -= 1;
            }

            sw.Write(name);
        }