コード例 #1
0
        public static void InlineCorpus(
            ref string inlineText,
            out AbstractCorpusAdapter corpus)
        {
            HashSet <string>            list;
            Dictionary <string, double> vecs;

            Inline(ref inlineText, out corpus, out list, out vecs);
        }
コード例 #2
0
        private static void ExecuteProcessingWorkflow(
            out AbstractCorpusAdapter corpus,
            out HashSet <string> list,
            out Dictionary <string, double> vecs,
            IEnumerable <Dictionary <string, object> > pages,
            Dictionary <string, object> cmeta)
        {
            // CLEAN TEXT
            var cleanup = new StandardCleanup();

            foreach (var page in pages)
            {
                cleanup.Input.Enqueue(page);
            }
            cleanup.Execute();

            // PARSE TEXT
            var tagger = new RawTextTagger
            {
                Input         = cleanup.Output,
                CorpusBuilder = new CorpusBuilderWriteDirect()
            };

            tagger.Execute();

            // GET CORPUS-MODEL
            corpus = tagger.Output.FirstOrDefault();
            if (corpus == null || corpus.CountDocuments == 0 || corpus.CountToken == 0)
            {
                corpus = null;
                list   = null;
                vecs   = null;
                return;
            }

            // POST-PRODUCTION
            foreach (var m in cmeta)
            {
                corpus.SetCorpusMetadata(m.Key, m.Value);
            }

            // SAVE MODEL
            list = new HashSet <string>(corpus.GetLayers("Wort").First().Values);
            vecs = ContextToVec(corpus);
        }
コード例 #3
0
        public static void Inline(ref string inlineText, out AbstractCorpusAdapter corpus, out HashSet <string> list,
                                  out Dictionary <string, double> vecs)
        {
            var pages = new List <Dictionary <string, object> >
            {
                new Dictionary <string, object>
                {
                    { "Text", inlineText },
                    { "PAGE", 1 }
                }
            };

            // DETECT LANGUAGE
            var cmeta = new Dictionary <string, object> {
                { "LANGUAGE", LanguageDetectorHelper.DetectLanguage(ref pages) }
            };

            ExecuteProcessingWorkflow(out corpus, out list, out vecs, pages, cmeta);
        }
コード例 #4
0
        private static Dictionary <string, double> ContextToVec(AbstractCorpusAdapter corpus)
        {
            var layer = corpus?.GetLayers("Wort")?.First();
            var doc   = layer?[layer.DocumentGuids.First()];

            if (doc == null)
            {
                return(null);
            }

            var count = 0.0;
            var dic   = new Dictionary <string, double>();

            foreach (var s in doc)
            {
                count += s.Length;
                foreach (var w in s)
                {
                    var key = layer[w];
                    if (dic.ContainsKey(key))
                    {
                        dic[key]++;
                    }
                    else
                    {
                        dic.Add(key, 1);
                    }
                }
            }

            var min = (int)(1 + Math.Log(count / 500));

            dic = dic.Where(x => x.Value > min).ToDictionary(x => x.Key, x => x.Value);

            var languageVectors = LanguageVectorModelRepository.GetModel((string)corpus.GetCorpusMetadata("LANGUAGE"));
            var model           = GetVectors(languageVectors, dic.Keys.ToArray());

            return(dic.Where(x => model.ContainsKey(x.Key)).ToDictionary(x => x.Key, x => x.Value / count * model[x.Key]));
        }
コード例 #5
0
 public WebService(AbstractTableWriter writer, string ip, int port, string file, int timeout = 0) : base(writer, ip, port, timeout)
 {
     System.Console.Write(Resources.WebInit, file);
     _corpus = CorpusLoadHelper.LoadCorpus(file);
     System.Console.WriteLine(Resources.Ok);
 }