internal Dictionary <string, int> GetPsychProfile(string[] documents)
        {
            string[]   d      = new string[] { documents.Aggregate((aggr, cur) => aggr + " " + cur) };
            double[][] inputs = TFIDF.Normalize(TFIDF.Transform(d));
            int[]      output = new int[9];

            Dictionary <string, int> result = new Dictionary <string, int>(9);

            output[0] = modelDenial.Decide(inputs[0]);
            output[1] = modelRepression.Decide(inputs[0]);
            output[2] = modelRegression.Decide(inputs[0]);
            output[3] = modelCompensation.Decide(inputs[0]);
            output[4] = modelProjection.Decide(inputs[0]);
            output[5] = modelDisplacement.Decide(inputs[0]);
            output[6] = modelRationalization.Decide(inputs[0]);
            output[7] = modelReactionFormation.Decide(inputs[0]);

            output[8] = output[0] + output[1] + output[2] + output[3] + output[4] + output[5] + output[6] + output[7];
            output[8] = Convert.ToInt32(Math.Round((double)output[8] / 8));

            result.Add("Отрицание", output[0]);
            result.Add("Вытеснение", output[1]);
            result.Add("Регрессия", output[2]);
            result.Add("Компенсация", output[3]);
            result.Add("Проекция", output[4]);
            result.Add("Замещение", output[5]);
            result.Add("Рационализация", output[6]);
            result.Add("Гиперкомпенсация", output[7]);
            result.Add("Общий уровень", output[8]);

            return(result);
        }
Beispiel #2
0
        public void TfidfTest()
        {
            TFIDF  tfidf = new TFIDF(m_path);
            double tfidfJudeAndHeyJude      = Math.Round(TFIDF.CalculateTFIDF(m_path, m_testFile1, "jude"), 5);
            double cacheTfidfJudeAndHeyJude = Math.Round(tfidf.CacheCalculateTFIDF(m_testFile1, "jude"), 5);

            Assert.AreEqual(0.06891, tfidfJudeAndHeyJude);
            Assert.AreEqual(tfidfJudeAndHeyJude, cacheTfidfJudeAndHeyJude);

            double tfidfJudeAndHisGotTheWholeWorld      = Math.Round(TFIDF.CalculateTFIDF(m_path, m_testFile2, "jude"), 5);
            double cacheTfidfJudeAndHisGotTheWholeWorld = Math.Round(tfidf.CacheCalculateTFIDF(m_testFile2, "jude"), 5);

            Assert.AreEqual(0, tfidfJudeAndHisGotTheWholeWorld);
            Assert.AreEqual(0, cacheTfidfJudeAndHisGotTheWholeWorld);

            double tfidfWorldAndHeyJude      = Math.Round(TFIDF.CalculateTFIDF(m_path, m_testFile1, "world"), 5);
            double cacheTfidfWorldAndHeyJude = Math.Round(tfidf.CacheCalculateTFIDF(m_testFile1, "world"), 5);

            Assert.AreEqual(tfidfWorldAndHeyJude, cacheTfidfWorldAndHeyJude);

            double tfidfWorldAndHisGotTheWholeWorld      = Math.Round(TFIDF.CalculateTFIDF(m_path, m_testFile2, "world"), 5);
            double cacheTfidfWorldAndHisGotTheWholeWorld = Math.Round(tfidf.CacheCalculateTFIDF(m_testFile2, "world"), 5);

            Assert.AreEqual(tfidfWorldAndHisGotTheWholeWorld, cacheTfidfWorldAndHisGotTheWholeWorld);

            double tfidfWorldAndHealTheWorld      = Math.Round(TFIDF.CalculateTFIDF(m_path, m_testFile3, "world"), 5);
            double cacheTfidfWorldAndHealTheWorls = Math.Round(tfidf.CacheCalculateTFIDF(m_testFile3, "world"), 5);

            Assert.AreEqual(tfidfWorldAndHealTheWorld, cacheTfidfWorldAndHealTheWorls);
            Assert.IsTrue(tfidfWorldAndHisGotTheWholeWorld > tfidfWorldAndHealTheWorld && tfidfWorldAndHealTheWorld > tfidfWorldAndHeyJude);
        }
Beispiel #3
0
        private static void HandleRunCommand()
        {
            try
            {
                switch (m_useCache)
                {
                case true:
                    switch (m_command)
                    {
                    case Command.TF:
                        m_result = m_tfidf.CacheCalculateTF(m_fileName, m_term);
                        break;

                    case Command.IDF:
                        m_result = m_tfidf.CacheCalculateIDF(m_term);
                        break;

                    case Command.TFIDF:
                        m_result = m_tfidf.CacheCalculateTFIDF(m_fileName, m_term);
                        break;

                    default:
                        break;
                    }
                    break;

                case false:
                    switch (m_command)
                    {
                    case Command.TF:
                        m_result = TFIDF.CalculateTF(m_path, m_fileName, m_term);
                        break;

                    case Command.IDF:
                        m_result = TFIDF.CalculateIDF(m_path, m_term);
                        break;

                    case Command.TFIDF:
                        m_result = TFIDF.CalculateTFIDF(m_path, m_fileName, m_term);
                        break;

                    default:
                        break;
                    }
                    break;

                default:
                    break;
                }

                m_level = MenuLevel.ShowResult;
            }
            catch (Exception e)
            {
                Console.WriteLine("error: {0}", e.Message);
                Console.WriteLine("press any key to try again");
                Console.ReadKey();
                m_level = MenuLevel.SelectCommand;
            }
        }
Beispiel #4
0
        private double TFIDF(IEnumerable <string> words, Document doc)
        {
            var TFIDFEvaluator = new TFIDF(documents, doc);

            return(words
                   .Select(word => TFIDFEvaluator.Evaluate(word))
                   .Sum());
        }
Beispiel #5
0
        public TestTFIDF()
        {
            readAllWords();

            TFIDF tfidf = new TFIDF(allDocumentWords);

            tfidf.execute();
        }
Beispiel #6
0
        public static List <DocumentVector> transformDocuments2Vectors(List <string> documents)
        {
            // TODO
            Dictionary <string, List <string> > wordDict             = transformDocuments2Dictionary(documents);
            Dictionary <string, Dictionary <string, double> > tfidfs = new TFIDF(wordDict).execute();

            return(transformTFIDFs2Vectors(documents, tfidfs));
        }
Beispiel #7
0
 public void ArgumanetsValidationTest()
 {
     Assert.ThrowsException <ArgumentException>(() => TFIDF.CalculateIDF("", ""));
     Assert.ThrowsException <ArgumentException>(() => TFIDF.CalculateIDF("path", ""));
     Assert.ThrowsException <ArgumentException>(() => TFIDF.CalculateIDF("", "term"));
     Assert.ThrowsException <DirectoryNotFoundException>(() => TFIDF.CalculateIDF("path", "term"));
     Assert.ThrowsException <ArgumentException>(() => new TFIDF(""));
     Assert.ThrowsException <DirectoryNotFoundException>(() => new TFIDF("invalidPath"));
 }
Beispiel #8
0
        public void TermInFileTest()
        {
            double tfNoCache = TFIDF.CalculateTF(m_path, m_testFile, "data");
            TFIDF  tfidf     = new TFIDF(m_path);
            double tfCached  = tfidf.CacheCalculateTF(m_testFile, "data");

            Assert.AreEqual(0.125, tfNoCache);
            Assert.AreEqual(tfNoCache, tfCached);
        }
Beispiel #9
0
        public void TermNotInDocuments()
        {
            double greenIdf       = Math.Round(TFIDF.CalculateIDF(m_path, "green"), 5);
            TFIDF  tfidf          = new TFIDF(m_path);
            double greenIdfCached = Math.Round(tfidf.CacheCalculateIDF("green"), 5);
            double expectedIDF    = Math.Round(1.58496250072116, 5);

            Assert.AreEqual(expectedIDF, greenIdf, "CalculateIDF returns incorrect value");
            Assert.AreEqual(expectedIDF, greenIdfCached, "CacheCalculateIDF returns incorrect value");
        }
Beispiel #10
0
        public void TermInAllDocumentsIDFTest()
        {
            double heroIdf       = TFIDF.CalculateIDF(m_path, "of");
            TFIDF  tfidf         = new TFIDF(m_path);
            double heroIdfCached = tfidf.CacheCalculateIDF("of");
            double expectedIDF   = 0;

            Assert.AreEqual(expectedIDF, heroIdf, "CalculateIDF returns incorrect value");
            Assert.AreEqual(expectedIDF, heroIdfCached, "CacheCalculateIDF returns incorrect value");
        }
Beispiel #11
0
        public void TermInSomeDocumentsIdfTest()
        {
            double heroIdf       = TFIDF.CalculateIDF(m_path, "hero");
            TFIDF  tfidf         = new TFIDF(m_path);
            double heroIdfCached = tfidf.CacheCalculateIDF("hero");
            double expectedIDF   = Math.Log((double)3 / (double)2, 2);

            Assert.AreEqual(expectedIDF, heroIdf, "CalculateIDF returns incorrect value");
            Assert.AreEqual(expectedIDF, heroIdfCached, "CacheCalculateIDF returns incorrect value");
        }
Beispiel #12
0
        public void EmptyDirectoryTest()
        {
            string emptyDirectoryPath = m_path + "empty Directory";

            Directory.CreateDirectory(emptyDirectoryPath);
            double idf      = TFIDF.CalculateIDF(emptyDirectoryPath, "term");
            TFIDF  tfidf    = new TFIDF(emptyDirectoryPath);
            double cacheIdf = tfidf.CacheCalculateIDF("term");

            Assert.AreEqual(0, idf);
            Assert.AreEqual(0, cacheIdf);
            Directory.Delete(emptyDirectoryPath);
        }
Beispiel #13
0
 public void prepare()
 {
     loaderComponent.prepare();
     contentProcessor.prepare();
     linkResolver.prepare();
     crawlAdHok.prepare();
     crawlerJobEngine.prepare();
     directReportEngine.prepare();
     postReportEngine.prepare();
     supportEngine.prepare();
     TFIDF.prepare();
     indexEngine.prepare();
 }
        internal SVM()
        {
            modelDenial            = Serializer.Load <MulticlassSupportVectorMachine <Linear> >(@"Models\Denial.dat");
            modelRepression        = Serializer.Load <MulticlassSupportVectorMachine <Linear> >(@"Models\Repression.dat");
            modelRegression        = Serializer.Load <MulticlassSupportVectorMachine <Linear> >(@"Models\Regression.dat");
            modelCompensation      = Serializer.Load <MulticlassSupportVectorMachine <Linear> >(@"Models\Compensation.dat");
            modelProjection        = Serializer.Load <MulticlassSupportVectorMachine <Linear> >(@"Models\Projection.dat");
            modelDisplacement      = Serializer.Load <MulticlassSupportVectorMachine <Linear> >(@"Models\Displacement.dat");
            modelRationalization   = Serializer.Load <MulticlassSupportVectorMachine <Linear> >(@"Models\Rationalization.dat");
            modelReactionFormation = Serializer.Load <MulticlassSupportVectorMachine <Linear> >(@"Models\Reaction Formation.dat");

            TFIDF.TryLoadVocabulary();
        }
        public KeyValuePair <string, bool> ProbableMatchTypoError(List <string> vocabList, List <ChatIntent> intentList)
        {
            bool   hasMatch        = false;
            string responseMessage = contentManager.IntentPossibleMatchedResponse;
            int    counter         = 0;

            LevenshteinDistance dist   = new LevenshteinDistance();
            TFIDF         getVocab     = new TFIDF();
            List <string> responseList = new List <string>();

            foreach (string vocab in vocabList)
            {
                foreach (ChatIntent intent in intentList)
                {
                    bool IsRedirect = CheckIfRedirect(intent, intentList);
                    if (IsRedirect)
                    {
                        continue;
                    }
                    List <string> intentvocabList = getVocab.GetVocabulary(intent.IntentDescription);
                    foreach (string intentVocab in intentvocabList)
                    {
                        if (dist.Compute(vocab, intentVocab.ToLower()) < 2 && counter <= 3)
                        {
                            if (!responseList.Where(x => x.ToString() == intent.IntentDescription).Any())
                            {
                                counter = counter + 1;
                                responseList.Add(intent.IntentDescription);
                            }
                        }
                    }
                }
            }
            responseList = (responseList.Count > 1) ? responseList.Distinct().Take(3).ToList() : responseList;
            foreach (string response in responseList)
            {
                responseMessage = responseMessage + "<br>";
                responseMessage = responseMessage + response;
            }

            responseMessage = responseMessage + "<br>" + contentManager.IntentSuggestionResponse;

            if (counter > 0)
            {
                return(new KeyValuePair <string, bool>(responseMessage, true));
            }

            return(new KeyValuePair <string, bool>(responseMessage, hasMatch));
        }
Beispiel #16
0
        static void Main(string[] args)
        {
            Dictionary <string, string> config = new Dictionary <string, string>();
            string relativeLocation            = "..\\..\\..\\..\\";

            config.Add("idPath", System.IO.Path.Combine(relativeLocation, "Experiment\\Rhino\\RhinoCorpusMapping.txt"));
            config.Add("docPath", System.IO.Path.Combine(relativeLocation, "Experiment\\Rhino\\RhinoCorpus.txt"));
            config.Add("qidPath", System.IO.Path.Combine(relativeLocation, "Experiment\\Rhino\\RhinoListOfFeatures.txt"));
            config.Add("qdocPath", System.IO.Path.Combine(relativeLocation, "Experiment\\Rhino\\RhinoQueries.txt"));
            config.Add("goldSetDir", System.IO.Path.Combine(relativeLocation, "Experiment\\Rhino\\RhinoFeaturesToGoldSetMethodsMapping"));
            config.Add("effAllPath", System.IO.Path.Combine(relativeLocation, "Experiment\\EffectivenessAllMethods.txt"));
            config.Add("effBestPath", System.IO.Path.Combine(relativeLocation, "Experiment\\EffectivenessBestMethods.txt"));

            Console.WriteLine("Running experiment...");
            Console.WriteLine("Importing corpus...");
            TLArtifactsCollection corpusArtifacts = Corpus.Import(config["idPath"], config["docPath"]);

            Console.WriteLine("Computing corpus vectors...");
            Vectorizer corpusVectors = new Vectorizer(corpusArtifacts, "Ordinal");

            Console.WriteLine("Computing corpus tf, df...");
            Normalizer corpusTF = new Normalizer(corpusVectors.Vectors);

            Console.WriteLine("Computing corpus idf...");
            NormalizedVector corpusIDF = InverseDocumentFrequency.Compute(corpusVectors.Frequencies, corpusVectors.Vectors.Count);

            Console.WriteLine("Computing corpus tf-idf...");
            NormalizedVectorCollection corpusTFIDF = TFIDF.Compute(corpusTF.Vectors, corpusIDF);

            Console.WriteLine("Importing queries...");
            TLArtifactsCollection queryArtifacts = Corpus.Import(config["qidPath"], config["qdocPath"]);

            Console.WriteLine("Computing corpus vectors...");
            Vectorizer queryVectors = new Vectorizer(queryArtifacts, "Boolean");

            Console.WriteLine("Computing similarities...");
            TLSimilarityMatrix sims = CosineSimilarity.Compute(corpusTF.Vectors, corpusTF.Lengths, queryVectors.Vectors);

            Console.WriteLine("Importing gold set...");
            TLSimilarityMatrix goldset = AnswerMapping.Import(config["goldSetDir"]);

            Console.WriteLine("Calculating effectiveness measures...");
            Effectiveness.Export(queryArtifacts, sims, goldset, config["effAllPath"], config["effBestPath"]);
            Console.WriteLine("Effectiveness measures written to:\n\t" + config["effAllPath"] + "\n\t" + config["effBestPath"]);
            Console.WriteLine("Experiment complete.");

            Console.WriteLine("\nPress enter key to continue...");
            Console.ReadLine();
        }
Beispiel #17
0
        public TF_IDF()
        {
            string[][] sentences = { };
            string     data_type = "IDF_Dictionary";

            // Initialize TFIDF
            this.codebook = new TFIDF()
            {
                Tf  = TermFrequency.Log,
                Idf = InverseDocumentFrequency.Default
            };

            sentences = Extract_sentences(data_type);

            // TFIDF Document Train
            codebook.Learn(sentences);
        }
Beispiel #18
0
        public void ArgumanetsValidationTest()
        {
            Assert.ThrowsException <ArgumentException>(() => TFIDF.CalculateTFIDF("", "", ""));
            Assert.ThrowsException <ArgumentException>(() => TFIDF.CalculateTFIDF("path", "filename", ""));
            Assert.ThrowsException <ArgumentException>(() => TFIDF.CalculateTFIDF("path", "", "term"));
            Assert.ThrowsException <ArgumentException>(() => TFIDF.CalculateTFIDF("", "filename", "term"));
            Assert.ThrowsException <ArgumentException>(() => TFIDF.CalculateTFIDF("", "", "term"));
            Assert.ThrowsException <ArgumentException>(() => TFIDF.CalculateTFIDF("", "filename", ""));
            Assert.ThrowsException <ArgumentException>(() => TFIDF.CalculateTFIDF("path", "", ""));
            Assert.ThrowsException <FileNotFoundException>(() => TFIDF.CalculateTFIDF("invalidPath", "invalidFile", "term"));

            TFIDF tfidf = new TFIDF(Directory.GetCurrentDirectory());

            Assert.ThrowsException <ArgumentException>(() => tfidf.CacheCalculateTFIDF("", ""));
            Assert.ThrowsException <ArgumentException>(() => tfidf.CacheCalculateTFIDF("filename", ""));
            Assert.ThrowsException <ArgumentException>(() => tfidf.CacheCalculateTFIDF("", "term"));
            Assert.ThrowsException <FileNotFoundException>(() => tfidf.CacheCalculateTFIDF("filename", "term"));
        }
Beispiel #19
0
        public void GenerateTFIDFDictionary()
        {
            Dictionary<int, TFIDF> tfidfDict = new Dictionary<int, TFIDF>();
            foreach (DocModel m in docDB)
            {
                for (int i = 0; i < m.Length; i++)
                {
                    int wordKey = m.Word(i);
                    TFIDF tfidf;
                    if (!tfidfDict.TryGetValue(wordKey, out tfidf))
                    {
                        tfidf = new TFIDF(wordKey, docDB.Count);
                        tfidf.key = wordKey;
                        tfidfDict.Add(wordKey, tfidf);
                    }
                    tfidf.tf += m.Count(i);
                    tfidf.df++;
                }
            }

            List<KeyValuePair<int, TFIDF>> tfidfs = tfidfDict.ToList();
            tfidfs.Sort(
                (x1, x2) =>
                {
                    if (x1.Value.tfidf > x2.Value.tfidf)
                    {
                        return -1;
                    }
                    else if (x1.Value.tfidf == x2.Value.tfidf)
                    {
                        return 0;
                    }
                    else
                    {
                        return 1;
                    }
                });
            TFIDFDictionary tfidfDictionary = new TFIDFDictionary();
            for (int i = 0; i < tfidfs.Count && i < 20000; i++)
            {
                tfidfDictionary.AddValue(wordDict.GetKey(tfidfs[i].Key), tfidfs[i].Key);
            }
            tfidfDictionary.StoreToDB();
        }
Beispiel #20
0
        private static void HandlePath()
        {
            PrintHeadLine();
            Console.WriteLine("Please enter your directory path:");
            m_path = Console.ReadLine();
            bool dirExists = Directory.Exists(m_path);

            if (dirExists)
            {
                m_tfidf = new TFIDF(m_path);
                m_level = MenuLevel.SelectCommand;
            }
            else
            {
                Console.WriteLine("Invalid path: Directory not found");
                Console.WriteLine("Press any key to continue");
                Console.ReadKey();
            }
        }
Beispiel #21
0
 public MachineL(string fpath, string langg, List <string[]> LearTF)
 {
     language     = langg;
     W2Vlocation  = fpath;
     W2Vvocublary = new Word2VecTextReader().Read(W2Vlocation);
     Kernels      = new SequentialMinimalOptimization <Gaussian>()
     {
         UseComplexityHeuristic = true,
         //Complexity = 100,
         UseKernelEstimation = true,
     };
     codeB = new TFIDF()
     {
         Tf  = TermFrequency.Log,
         Idf = InverseDocumentFrequency.Default
     };
     //codeB.Learn(LearTF.ToArray());
     //Console.WriteLine(codeB.NumberOfWords);
     Console.WriteLine("w2v is read");
     Nomiss = new List <string>();
 }
Beispiel #22
0
        public string TFIDF_score(string[] webcontent)
        {
            TFIDF idf = new TFIDF();

            double[][] inputs = idf.Transform(webcontent, 0);
            inputs = TFIDF.Normalize(inputs);

            string final = "";

            for (int index = 0; index < inputs.Length; index++)
            {
                final = final + webcontent[index] + "\n";

                foreach (double value in inputs[index])
                {
                    final = value + ",";
                }

                final = final + "\n\n";
            }
            return(final);
        }
    protected void Page_Load(object sender, EventArgs e)
    {
        string s1 = @"Prince Harry is currently in Cape Town awaiting an improvement in weather conditions so that he, along with his fellow Walking With The Wounded teammates, can fly to Antarctica's Novo Airbase and begin acclimating their bodies to the extreme conditions before their 200-mile trek to the South Pole.
By committing to head to the bottom of the earth alongside his team, Harry is setting a whole new precedent in terms of royal charity involvement for the future.
Victoria Arbiter
Victoria Arbiter
Members of the royal family serve to provide continuity, promote British interests, and act as global ambassadors by representing all that is great about Great Britain, but a large percentage of their operational life is devoted to charity work.
For decades royals have travelled the length and breadth of the country, and indeed the globe, on behalf of their many organizations. They act as patron or president, raise awareness of the cause, cut ribbons, unveil plaques, attend dinners, plant trees, and most importantly -- raise money.
A royal patronage is about the best gift a charity can receive short of a wealthy benefactor bequeathing millions of dollars to the cause on their death bed, and in times of recession and economic hardship the survival of many charities rests on the regal shoulders of its patron.
 Prince prepares for South Pole trek Cressida, the new diamond in the rough Princes unite to help Lesotho herd boys
At 87 the Queen has more than 600 patronages and at 92 Prince Philip has about 800. According to a recent Time magazine article, Prince Charles raised $224 million for his charities between April 2012 and March 2013.
Tickets to the upcoming Winter Whites Gala on behalf of homeless charity Centrepoint were going for the princely sum of £500 before selling out almost immediately. The reason for the large price tag and instant sell out? Prince William, Patron of Centrepoint, will be in attendance.
Charities can command top dollar when a senior royal rolls out. Along with said royal comes a legion of reporters and wealthy benefactors, and whenever Kate's involved you can pretty much guarantee the occasion making front page news the following day. That type of attention leaves charity heads googly-eyed.
The royals have always approached charity engagements with enthusiasm, well aware that their presence allows for worldwide exposure. One need only to look at coverage of Diana shaking hands with an AIDS patient in 1989, or her walk through a partially-cleared land mine field in Angola in 1997, to understand the power of a globally recognized figure.
William and Harry, however, have taken things one step further in recent years by rolling up their proverbial sleeves and throwing themselves in at ground level.
In December 2009 Prince William spent the night sleeping rough near Blackfriars Bridge in central London. He did so in order to gain a better understanding of what the homeless community experiences night after night.
Had he simply dished out soup and shaken hands with a few volunteers he still would have drawn attention to the work of the charity Centrepoint, but by actually bedding down on the streets of Central London he significantly heightened public awareness.
In March 2011 Prince Harry joined a team of injured servicemen for the first five days of their trek to the North Pole. Yes, of course it was about raising money for the charity Walking With The Wounded, but as Harry said at the time, it was also about raising an awareness of the debt the country owes to those it sends off to fight.
 Keeping Prince George from prying eyes The royal aunt and uncle Royal baby's 'fun' Uncle Harry
Harry has made no secret of his dedication to the welfare of injured servicemen and women, and the money raised enables the charity to fulfill its mission; however, by taking part alongside his fellow soldiers, Harry gave them far more than a well-funded charity. He showed them that they matter, that their loss matters, and that their lives may continue to inspire.
Looking to the future of the monarchy, Charles has made it clear he wants to push for a more streamlined royal family, but I hope that when the time comes he will make room for extended members of the family to step up and continue their efforts on behalf of their chosen charities.
As the only blood-born princesses of their generation, Beatrice and Eugenie have already shown a readiness to support causes meaningful to them. Were the Queen to give them an official role, their potential could be enormous.
It comes down to simple mathematics: streamline the monarchy, and funding to the smaller charities that rely on a royal patron slips down the tubes.
Royals and charity work will always go hand-in-hand -- and long may it be so. Plaques will remain, trees will grow, and the work of the charity in question will continue, but it is this new hardcore approach that is so exciting.
It won't work for everyone, and it would lose its impact if suddenly every engagement required rigorous training, compression chambers, hard hats, life vests and the likes, but we should salute Prince Harry on his epic polar endeavor.
Harry's physical disability may be limited to a broken toe, but walking alongside those brave wounded warriors will no doubt leave him with an unbreakable spirit.";


        string s2 = @"Now the welfare of footballers is top of Fabrice Muamba's agenda and he has urged FIFA to put the wellbeing of players at the center of any decision on the scheduling of the Qatar 2022 World Cup.
FIFA president Sepp Blatter announced on Twitter last month that no decision on the staging of the 2022 World Cup -- be it in Qatar's summer or winter -- would be made until after the 2014 tournament in Brazil.
I hope FIFA will have a second thought because playing in those conditions is very dangerous for people,the 25-year-old told CNN.
The heat and the humidity in that country can damage people; they have to look at the bigger picture.
 Fabrice Muamba on playing soccer again Muamba: I've played football again
Former England Under-21 international Muamba suffered an on-field heart seizure while playing for Bolton Wanderers against Tottenham Hotspur in March 2012.
Read: Emotional Muamba gets closure
The Congo-born midfielder was revived by medics before making a remarkable recovery in hospital.
You worry, not just me but every player,continued Muamba. The humidity, the heat, playing in those conditions it is very worrying.
President of world football's governing body since 1998, Blatter launched a consultation process on the issue involving all stakeholders in Qatar 2022.
Harold Mayne-Nicholls led the FIFA inspection team which examined each of the bidding countries for the 2022 World Cup before delivering his report in October 2010.
Mayne-Nicholls concluded that Qatar was a high-risk option because of its soaring temperatures -- but it was still chosen by 14 of the 22 executive committee members in the final round of voting in December that year.
 Fabrice Muamba leaves hospital Fabrice Muamba leaves hospital
 Fabrice Muamba tributes Fabrice Muamba tributes
In June and July you cannot play, Mayne-Nicholls told CNN last month when asked about the conditions in Qatar.
It's not for the players. The players will be OK with the cooling system but what about the fans?
You'll have 50,000 fans walking three, four, even six blocks or more like in South Africa where I walked 10 blocks.
They will be walking in 40 degrees and it's too much. One or two crucial cases will damage the entire image of the World Cup and we must be careful.
Read: Devastated Muamba retires from football
Muamba is pleased his experiences have raised awareness of heart conditions in football, but he wants to see the sport continue to prioritize player safety.
We're trying to reach a standard where we're providing the best available equipment for the boys,said Muamba, who began his career with Arsenal. Also making sure every player gets a heart screen so we detect any damage or any medical issue.
What I tried to do was raise awareness of sudden cardiac arrest, having a defibrillator not just in the stadium but in every public place so we can save lives and ensure peoples safety.";

        //string s1 = "Mr.Green killed Colonel Mustard in the study with the candlestick. Mr.Green is not a very nice fellow";

        //string s2 = "Professor Plumb has a green plant in his study";

        //string s3 = "Miss Scarlett watered Professor Plumb's green plant while he was away from his office last week";


        //建立字庫
        BuildIndex(s1, s2);

        //取得 s1 字串所有單字的字頻
        Hashtable val = GetVector();

        //取得 s2 字串所有單字的字頻
        Hashtable val2 = GetVector2();


        //取得 兩個字串的 cosine similarty
        Response.Write(string.Format("Cosine Similarty : {0} ", TFIDF.cosineSimilarityByTFIDF(TotalNoun, val, val2)));
    }
Beispiel #24
0
    public static void Main()
    {
        var documents = new Document[N];
        var words     = new string[N][];

        for (int i = 0; i < N; ++i)
        {
            documents[i] = new Document(i);
            words[i]     = documents[i].备注词汇;
        }
        var tfIdf = new TFIDF();

        tfIdf.Learn(words);
        var inputs = new double[N][];

        for (int i = 0; i < N; ++i)
        {
            documents[i].备注特征向量  = tfIdf.Transform(documents[i].备注词汇);
            documents[i].特征向量    = new double[documents[i].备注特征向量.Length + 4];
            documents[i].特征向量[0] = documents[i].能查到正在营业 ? 1.0 : 0.0;
            documents[i].特征向量[1] = documents[i].能查到曾经营业 ? 1.0 : 0.0;
            documents[i].特征向量[2] = documents[i].无营业信息 ? 1.0 : 0.0;
            documents[i].特征向量[3] = documents[i].GPS定位 ? 1.0 : 0.0;
            documents[i].备注特征向量.CopyTo(documents[i].特征向量, 4);
            inputs[i] = documents[i].特征向量;
        }
        var outputs通讯情况 = new int[N];
        var outputs存在状况 = new int[N];
        var outputs数据有效 = new int[N];

        for (int i = 0; i < N; ++i)
        {
            documents[i].ParseOutput();
            outputs通讯情况[i] = documents[i].通讯情况;
            outputs存在状况[i] = documents[i].存在状况;
            outputs数据有效[i] = documents[i].数据有效 ? 1 : 0;
        }
        var teacher1 = new NaiveBayesLearning <NormalDistribution>();

        teacher1.Options.InnerOption = new NormalOptions {
            Regularization = 1e-12
        };
        var teacher2 = new NaiveBayesLearning <NormalDistribution>();

        teacher2.Options.InnerOption = new NormalOptions {
            Regularization = 1e-12
        };
        var teacher3 = new NaiveBayesLearning <NormalDistribution>();

        teacher3.Options.InnerOption = new NormalOptions {
            Regularization = 1e-12
        };
        var model通讯情况   = teacher1.Learn(inputs, outputs通讯情况);
        var model存在状况   = teacher2.Learn(inputs, outputs存在状况);
        var model数据有效   = teacher3.Learn(inputs, outputs数据有效);
        var correct通讯情况 = 0;
        var correct存在状况 = 0;
        var correct数据有效 = 0;

        for (int i = 0; i < N; ++i)
        {
            var 通讯情况 = model通讯情况.Decide(documents[i].特征向量);
            if (documents[i].通讯情况 == 通讯情况)
            {
                ++correct通讯情况;
            }
            else
            {
                Console.WriteLine("Input{0}.txt的通讯情况 你认为:{1} 电脑认为:{2}",
                                  i, Document.通讯情况说明[documents[i].通讯情况],
                                  Document.通讯情况说明[通讯情况]);
            }
            var 存在状况 = model存在状况.Decide(documents[i].特征向量);
            if (documents[i].存在状况 == 存在状况)
            {
                ++correct存在状况;
            }
            else
            {
                Console.WriteLine("Input{0}.txt的存在状况 你认为:{1} 电脑认为:{2}",
                                  i, Document.存在状况说明[documents[i].存在状况],
                                  Document.存在状况说明[存在状况]);
            }
            var 数据有效 = model数据有效.Decide(documents[i].特征向量) == 1;
            if (documents[i].数据有效 == 数据有效)
            {
                ++correct数据有效;
            }
            else
            {
                Console.WriteLine("Input{0}.txt的数据有效 你认为:{1} 电脑认为:{2}",
                                  i, documents[i].数据有效, 数据有效);
            }
        }
        Console.WriteLine("通讯情况准确率: {0:F2} %",
                          (double)correct通讯情况 / N * 100);
        Console.WriteLine("存在状况准确率: {0:F2} %",
                          (double)correct存在状况 / N * 100);
        Console.WriteLine("数据有效准确率: {0:F2} %",
                          (double)correct数据有效 / N * 100);
    }
Beispiel #25
0
        /// <summary>
        /// Generate KMean Clustering from text file and save to file
        /// </summary>
        /// <param name="filePath"> path of CVS file that contains data</param>
        /// <param name="columnname">Name of Column that has text to be considerd</param>
        /// <param name="ct">Column type</param>
        /// <param name="k">Number of clusters</param>
        /// <param name="maxiter">Maximun number of iterations</param>
        /// <param name="outputfile">Path where to save clustered data.</param>
        public void doIt(string filePath, string columnname, List <Type> ct, int k, int maxiter, string outputfile)
        {
            Common c = new Common();

            string[,] dt = c.getStringMatrix(filePath, ',', ct);
            string[] inp = c.getStringVector(dt, columnname);

            List <string> vocab = new List <string>();

            double[][] inputs = TFIDF.Transform(inp, ref vocab);

            inputs = TFIDF.Normalize(inputs);


            double[][] tf_labels = Common.getArrayRange(inputs, 0, 1, dt.Length);


            double[][] tf_centroids = smart_centroid_initalization(inputs, k, 0);


            List <double>             heter  = new List <double>();
            Tuple <double[][], int[]> result = kmeans(tf_labels, k, tf_centroids, maxiter, ref heter);


            List <List <double[]> > lld = new List <List <double[]> >();

            List <List <string> > lll = new List <List <string> >();

            for (int i = 0; i < k; i++)
            {
                lld.Add(new List <double[]>());
                lll.Add(new List <string>());
            }

            for (int i = 0; i < result.Item2.Length; i++)
            {
                lld[result.Item2[i]].Add(tf_labels[i]);
            }

            string[] vocabul = new string[k];
            for (int i = 0; i < k; i++)
            {
                List <double> ccc = result.Item1[i].ToList();
                ccc.Sort();
                ccc.Reverse();

                for (int j = 0; j < k; j++)
                {
                    vocabul[i] += vocab[Array.IndexOf(result.Item1[i], ccc[j])] + ": " + result.Item1[i][Array.IndexOf(result.Item1[i], ccc[j])] + "  ";
                }
            }

            using (TextWriter writer = File.CreateText(outputfile))
            {
                for (int i = 0; i < result.Item2.Length; i++)
                {
                    writer.WriteLine(inp[i] + "," + vocabul[result.Item2[i]]);
                    lll[result.Item2[i]].Add(inp[i]);
                }
            }

            Console.WriteLine("Finished!!!");
            Console.ReadKey();
        }
Beispiel #26
0
        //ChaT Bot Reponse Main Entry
        public ChatIntent ChatResponseMain()
        {
            string responseMessage = contentManager.NoIntentMatchedResponse;
            TFIDF  getVocab        = new TFIDF();
            Dictionary <string, string> reponseDict   = new Dictionary <string, string>();
            List <ChatIntent>           intentListAll = db.ChatIntent.ToList();

            ChatIntent responseIntent = intentListAll.Where(x => x.ChatIntentId == 0).FirstOrDefault();

            #region 1.CheckIntentGreetingOrGoodbye
            if (hiBye.Greet())
            {
                return(UpdateIntent(Node, contentManager.GreetResponse, responseIntent));
            }
            else if (hiBye.GoodBye())
            {
                return(UpdateIntent(Node, contentManager.GoodbyeResponse, responseIntent));
            }
            #endregion

            List <ChatIntent> intentList = (from intention in intentListAll
                                            where intention.ChatIntentId > 2 && intention.ParentId == Node
                                            select intention).ToList();

            #region 2.CheckIntentFullMatchbySuggestion
            KeyValuePair <int, bool> fullMatch = suggestionMatch.FullSuggestionMatch(intentList);
            if (fullMatch.Value)
            {
                ChatIntent fullMatchIntent = intentList.Where(x => x.ChatIntentId == fullMatch.Key).FirstOrDefault();
                responseMessage = fullMatchIntent.Response;
                var hasEntity = (from ent in db.ChatEntity where ent.ChatIntentId == fullMatchIntent.ChatIntentId
                                 select ent);
                if (hasEntity.Any())
                {
                    AskMeEntityExtraction entity = new AskMeEntityExtraction(Message, fullMatchIntent.ChatIntentId, SessionId);
                    return(entity.GetEntityforIntentfromNLP(fullMatchIntent));
                }
                return(fullMatchIntent);
            }

            KeyValuePair <int, bool> partialMatch = suggestionMatch.PartialSuggestionMatch(intentList);
            if (partialMatch.Value)
            {
                ChatIntent partialMatchIntent = intentList.Where(x => x.ChatIntentId == partialMatch.Key).FirstOrDefault();
                responseMessage = partialMatchIntent.Response;
                var hasEntity = (from ent in db.ChatEntity
                                 where ent.ChatIntentId == partialMatchIntent.ChatIntentId
                                 select ent);
                if (hasEntity.Any())
                {
                    AskMeEntityExtraction entity = new AskMeEntityExtraction(Message, partialMatchIntent.ChatIntentId, SessionId);
                    return(entity.GetEntityforIntentfromNLP(partialMatchIntent));
                }
                return(partialMatchIntent);
            }
            #endregion


            List <string> vocabList = getVocab.GetVocabulary(Message);
            if (vocabList.Count == 0)
            {
                return(UpdateIntent(Node, contentManager.NoIntentMatchedResponse, responseIntent));
            }

            if (Message.ToLower() == "yes" || Message.ToLower() == "no")
            {
                return(UpdateIntent(Node, contentManager.NoIntentMatchedResponse, responseIntent));
            }

            #region 3.TFIDF Match Process
            SimilarityCalculator      similarityCalculator = new SimilarityCalculator();
            List <ChatIntentQuestion> questionList         = db.ChatIntentQuestion.ToList();
            Dictionary <int, double>  scoreDict            = new Dictionary <int, double>();
            foreach (ChatIntentQuestion question in questionList)
            {
                double compare = similarityCalculator.CompareString(Message, question.QuestionDesc, 1);
                KeyValuePair <int, double> score = new KeyValuePair <int, double>(question.ChatIntentId, compare);
                if (scoreDict.ContainsKey(score.Key))
                {
                    if (scoreDict[score.Key] < compare)
                    {
                        scoreDict[score.Key] = compare;
                    }
                }
                else
                {
                    scoreDict.Add(score.Key, score.Value);
                }
            }

            if (scoreDict.Where(x => x.Value > 0.45).Any())
            {
                int        maxScoreChatIntentId = scoreDict.OrderByDescending(x => x.Value).Select(y => y.Key).FirstOrDefault();
                ChatIntent maxIntent            = intentListAll.Where(x => x.ChatIntentId == maxScoreChatIntentId).FirstOrDefault();
                Node = maxScoreChatIntentId;

                var hasEntity = (from ent in db.ChatEntity
                                 where ent.ChatIntentId == maxIntent.ChatIntentId
                                 select ent);
                if (hasEntity.Any())
                {
                    AskMeEntityExtraction entity = new AskMeEntityExtraction(Message, maxIntent.ChatIntentId, SessionId);
                    return(entity.GetEntityforIntentfromNLP(maxIntent));
                }

                //KeyValuePair<int, string> responseIntent = GetEntityforIntent(Node, maxIntent.Response);
                return(maxIntent);
            }
            else if (scoreDict.Where(x => x.Value >= 0.23).Any())
            {
                List <int> possibeMatch = scoreDict.OrderByDescending(x => x.Value).Where(x => x.Value >= 0.23).Select(y => y.Key).ToList();
                responseMessage = contentManager.IntentPossibleMatchedResponse;
                foreach (int match in possibeMatch)
                {
                    responseMessage = responseMessage + ", ";
                    string suggestion = intentListAll.Where(x => x.ChatIntentId == match).Select(y => y.IntentDescription).FirstOrDefault();
                    responseMessage = responseMessage + suggestion;
                }
                responseMessage = responseMessage + ", " + contentManager.IntentSuggestionResponse;
                return(UpdateIntent(Node, responseMessage, responseIntent));
            }
            #endregion

            #region 4.Probable Match Process
            KeyValuePair <string, bool> probableMatchCorrect = zPossibleMatch.ProbableMatchCorrectSpelling(vocabList, intentListAll);
            if (probableMatchCorrect.Value)
            {
                common.LogFailureResponse();
                responseMessage = probableMatchCorrect.Key;
                return(UpdateIntent(Node, responseMessage, responseIntent));
            }

            KeyValuePair <string, bool> probableMatchTypo = zPossibleMatch.ProbableMatchTypoError(vocabList, intentListAll);
            if (probableMatchTypo.Value)
            {
                common.LogFailureResponse();
                responseMessage = probableMatchTypo.Key;
                return(UpdateIntent(Node, responseMessage, responseIntent));
            }
            #endregion

            #region 4.Synonym Match Process
            KeyValuePair <string, bool> synMatch = synonymMatch.SynonymMatch(vocabList, intentListAll);
            if (synMatch.Value)
            {
                common.LogFailureResponse();
                responseMessage = synMatch.Key;
                return(UpdateIntent(Node, responseMessage, responseIntent));
            }
            #endregion

            if (responseIntent != null)
            {
                responseIntent.ChatIntentId = Node;
                responseIntent.Response     = responseMessage;
            }
            else
            {
                responseIntent = new ChatIntent();
                responseIntent.ChatIntentId = Node;
                responseIntent.Response     = "Sorry I did not understand, Please enter one of the suggestions";
            }
            return(responseIntent);
        }
        private static async Task Main()
        {
            Console.WriteLine("Reading posts from GitHub repo..");
            var posts = await GetBlogPosts();

            Console.WriteLine("Parsing documents..");
            Storage.Current = new OnlineRepositoryStorage(new DiskStorage("catalyst-models"));
            var language           = Language.English;
            var pipeline           = Pipeline.For(language);
            var postsWithDocuments = posts
                                     .Select(post =>
            {
                var document = new Document(NormaliseSomeCommonTerms(post.PlainTextContent), language)
                {
                    UID = post.Title.Hash128()
                };
                pipeline.ProcessSingle(document);
                return(Post: post, Document: document);
            })
                                     .ToArray(); // Call ToArray to force evaluation of the document processing now

            Console.WriteLine("Training FastText model..");
            var fastText = new FastText(language, version: 0, tag: "");

            fastText.Data.Type                  = FastText.ModelType.PVDM;
            fastText.Data.Loss                  = FastText.LossType.NegativeSampling;
            fastText.Data.IgnoreCase            = true;
            fastText.Data.Epoch                 = 50;
            fastText.Data.Dimensions            = 512;
            fastText.Data.MinimumCount          = 1;
            fastText.Data.ContextWindow         = 10;
            fastText.Data.NegativeSamplingCount = 20;
            fastText.Train(
                postsWithDocuments.Select(postsWithDocument => postsWithDocument.Document),
                trainingStatus: update => Console.WriteLine($" Progress: {update.Progress}, Epoch: {update.Epoch}")
                );

            Console.WriteLine("Training TF-IDF model..");
            var tfidf = new TFIDF(pipeline.Language, version: 0, tag: "");
            await tfidf.Train(postsWithDocuments.Select(postWithDocument => postWithDocument.Document));

            Console.WriteLine("Getting average TF-IDF weights per word..");
            var tokenValueTFIDF = new Dictionary <string, List <float> >(StringComparer.OrdinalIgnoreCase);

            foreach (var doc in postsWithDocuments.Select(postWithDocument => postWithDocument.Document))
            {
                tfidf.Process(doc);
                foreach (var sentence in doc)
                {
                    foreach (var token in sentence)
                    {
                        if (!tokenValueTFIDF.TryGetValue(token.Value, out var freqs))
                        {
                            freqs = new();
                            tokenValueTFIDF.Add(token.Value, freqs);
                        }
                        freqs.Add(token.Frequency);
                    }
                }
            }
            var averagedTokenValueTFIDF = tokenValueTFIDF.ToDictionary(
                entry => entry.Key,
                entry => entry.Value.Average(), StringComparer.OrdinalIgnoreCase
                );

            Console.WriteLine("Building recommendations..");

            // Combine the blog post data with the FastText-generated vectors
            var results = fastText
                          .GetDocumentVectors()
                          .Select(result =>
            {
                // Each document vector instance will include a "token" string that may be mapped back to the
                // UID of the document for each blog post. If there were a large number of posts to deal with
                // then a dictionary to match UIDs to blog posts would be sensible for performance but I only
                // have a 100+ and so a LINQ "First" scan over the list will suffice.
                var uid           = UID128.Parse(result.Token);
                var postForResult = postsWithDocuments.First(
                    postWithDocument => postWithDocument.Document.UID == uid
                    );
                return(UID: uid, result.Vector, postForResult.Post);
            })
                          .ToArray(); // ToArray since we enumerate multiple times below

            // Construct a graph to search over, as described at
            // https://github.com/curiosity-ai/hnsw-sharp#how-to-build-a-graph
            var graph = new SmallWorld <(UID128 UID, float[] Vector, BlogPost Post), float>(
                distance: (to, from) => CosineDistance.NonOptimized(from.Vector, to.Vector),
                DefaultRandomGenerator.Instance,
                new() { M = 15, LevelLambda = 1 / Math.Log(15) }
                );

            graph.AddItems(results);

            const int maximumNumberOfResultsToReturn = 3;
            var       postsWithSimilarResults        = results
                                                       .Select(result =>
            {
                // Request that the KNNSearch operate over all documents because we can't take the top {n}
                // until we've combined the ordering with the title TFIDF proximity values
                var similarResults = graph
                                     .KNNSearch(result, postsWithDocuments.Length)
                                     .Where(similarResult => similarResult.Item.UID != result.UID);

                var tokenValuesInTitle =
                    GetAllTokensForText(NormaliseSomeCommonTerms(result.Post.Title), pipeline)
                    .Select(token => token.Value)
                    .ToHashSet(StringComparer.OrdinalIgnoreCase);

                return(new
                {
                    result.Post,
                    Similar = similarResults
                              .Select(similarResult => new
                    {
                        similarResult.Item.Post,
                        similarResult.Distance,
                        ProximityByTitleTFIDF = GetProximityByTitleTFIDF(
                            NormaliseSomeCommonTerms(similarResult.Item.Post.Title),
                            tokenValuesInTitle,
                            averagedTokenValueTFIDF,
                            pipeline
                            )
                    })
                              .OrderByDescending(similarResult => similarResult.ProximityByTitleTFIDF)
                              .ThenBy(similarResult => similarResult.Distance)
                              .Take(maximumNumberOfResultsToReturn)
                              .ToArray()
                });
            })
                                                       .OrderBy(result => result.Post.Title, StringComparer.OrdinalIgnoreCase)
                                                       .ToArray();

            foreach (var postWithSimilarResults in postsWithSimilarResults)
            {
                Console.WriteLine();
                Console.WriteLine(postWithSimilarResults.Post.Title);
                foreach (var similarResult in postWithSimilarResults.Similar.OrderBy(other => other.Distance))
                {
                    Console.WriteLine($"{similarResult.ProximityByTitleTFIDF:0.000} {similarResult.Distance:0.000} {similarResult.Post.Title}");
                }
            }

            Console.WriteLine();
            Console.WriteLine("Done! Press [Enter] to terminate..");
            Console.ReadLine();
        }
Beispiel #28
0
        private static void TestLinearASGD()
        {
            // http://leon.bottou.org/projects/sgd

            string codebookPath = "codebook.bin";
            string x_train_fn   = "x_train.txt.gz";
            string x_test_fn    = "x_test.txt.gz";

            Sparse <double>[] xTrain = null, xTest = null;
            bool[]            yTrain = null, yTest = null;

            // Check if we have the precomputed dataset on disk
            if (!File.Exists(x_train_fn) || !File.Exists(x_train_fn))
            {
                Console.WriteLine("Downloading dataset");
                RCV1v2 rcv1v2 = new RCV1v2(@"C:\Temp\");

                // Note: Leon Bottou's SGD inverts training and
                // testing when benchmarking in this dataset
                var trainWords = rcv1v2.Testing.Item1;
                var testWords  = rcv1v2.Training.Item1;

                string positiveClass = "CCAT";
                yTrain = rcv1v2.Testing.Item2.Apply(x => x.Contains(positiveClass));
                yTest  = rcv1v2.Training.Item2.Apply(x => x.Contains(positiveClass));

                TFIDF tfidf;
                if (!File.Exists(codebookPath))
                {
                    Console.WriteLine("Learning TD-IDF");
                    // Create a TF-IDF considering only words that
                    // exist in both the training and testing sets
                    tfidf = new TFIDF(testWords)
                    {
                        Tf  = TermFrequency.Log,
                        Idf = InverseDocumentFrequency.Default,
                    };

                    // Learn the training set
                    tfidf.Learn(trainWords);

                    Console.WriteLine("Saving codebook");
                    tfidf.Save(codebookPath);
                }
                else
                {
                    Console.WriteLine("Loading codebook");
                    Serializer.Load(codebookPath, out tfidf);
                }

                if (!File.Exists(x_train_fn))
                {
                    // Transform and normalize training set
                    Console.WriteLine("Pre-processing training set");
                    xTrain = tfidf.Transform(trainWords, out xTrain);

                    Console.WriteLine("Post-processing training set");
                    xTrain = xTrain.Divide(Norm.Euclidean(xTrain, dimension: 1), result: xTrain);

                    Console.WriteLine("Saving training set to disk");
                    SparseFormat.Save(xTrain, yTrain, x_train_fn, compression: SerializerCompression.GZip);
                }

                if (!File.Exists(x_test_fn))
                {
                    // Transform and normalize testing set
                    Console.WriteLine("Pre-processing testing set");
                    xTest = tfidf.Transform(testWords, out xTest);

                    Console.WriteLine("Post-processing testing set");
                    xTest = xTest.Divide(Norm.Euclidean(xTest, dimension: 1), result: xTest);

                    Console.WriteLine("Saving testing set to disk");
                    SparseFormat.Save(xTest, yTest, x_test_fn, compression: SerializerCompression.GZip);
                }
            }
            else
            {
                Console.WriteLine("Loading dataset from disk");
                if (xTrain == null || yTrain == null)
                {
                    SparseFormat.Load(x_train_fn, out xTrain, out yTrain, compression: SerializerCompression.GZip);
                }
                if (xTest == null || yTest == null)
                {
                    SparseFormat.Load(x_test_fn, out xTest, out yTest, compression: SerializerCompression.GZip);
                }
            }

            int positiveTrain = yTrain.Count(x => x);
            int positiveTest  = yTest.Count(x => x);
            int negativeTrain = yTrain.Length - positiveTrain;
            int negativeTest  = yTest.Length - positiveTest;

            Console.WriteLine("Training samples: {0} [{1}+, {2}-]", positiveTrain + negativeTrain, positiveTrain, negativeTrain);
            Console.WriteLine("Negative samples: {0} [{1}+, {2}-]", positiveTest + negativeTest, positiveTest, negativeTest);

            // Create and learn a linear sparse binary support vector machine
            var learn = new AveragedStochasticGradientDescent <Linear, Sparse <double> >()
            {
                MaxIterations = 5,
                Tolerance     = 0,
            };

            Console.WriteLine("Learning training set");
            Stopwatch sw  = Stopwatch.StartNew();
            var       svm = learn.Learn(xTrain, yTrain);

            Console.WriteLine(sw.Elapsed);


            Console.WriteLine("Predicting training set");
            sw = Stopwatch.StartNew();
            bool[] trainPred = svm.Decide(xTrain);
            Console.WriteLine(sw.Elapsed);

            var train = new ConfusionMatrix(trainPred, yTrain);

            Console.WriteLine("Train acc: " + train.Accuracy);


            Console.WriteLine("Predicting testing set");
            sw = Stopwatch.StartNew();
            bool[] testPred = svm.Decide(xTest);
            Console.WriteLine(sw.Elapsed);

            var test = new ConfusionMatrix(testPred, yTest);

            Console.WriteLine("Test acc: " + test.Accuracy);
        }
    public Hashtable GetVector2()
    {
        StringBuilder sb = new StringBuilder();

        string indexPath = AppDomain.CurrentDomain.BaseDirectory.ToString() + "\\App_Data\\";

        DirectoryInfo dirInfo = new DirectoryInfo(indexPath);

        FSDirectory dir = FSDirectory.Open(dirInfo);

        Hashtable ht = new Hashtable();

        Hashtable ht2 = new Hashtable();

        IndexReader ir = IndexReader.Open(dir, false);

        DefaultSimilarity similarity = new DefaultSimilarity();

        string[] label = null;

        int[] freq = null;


        foreach (var obj in ir.GetTermFreqVectors(1))
        {
            label = obj.GetTerms();
            freq  = obj.GetTermFrequencies();
        }

        Response.Write("s1<br>");



        for (int i = 0; i <= label.Length - 1; i++)
        {
            ht.Add(label[i], freq[i]);


            if (!TotalNoun.Contains(label[i]))
            {
                TotalNoun.Add(label[i]);
            }
        }

        foreach (DictionaryEntry obj in ht)
        {
            float tf    = similarity.Tf(Convert.ToInt32(obj.Value));
            float wf    = TFIDF.WF(tf);
            Term  t     = new Term("Text", obj.Key.ToString());
            float idf   = similarity.Idf(ir.DocFreq(t), ir.NumDocs());
            float wfidf = wf * idf;

            sb.AppendFormat(string.Format("label:{0} freq:{1}<BR>", obj.Key, obj.Value));
            sb.AppendFormat(string.Format("TF:{0}<br>", similarity.Tf(Convert.ToInt32(obj.Value))));
            sb.AppendFormat(string.Format("IDF:{0}<br>", similarity.Idf(ir.DocFreq(t), ir.NumDocs())));
            sb.AppendFormat(string.Format("WF-IDF:{0}<br>", wfidf));
            ht2.Add(obj.Key, wfidf);
        }

        Response.Write(sb.ToString());
        return(ht2);
    }
        private void Button_Click(object sender, RoutedEventArgs e)
        {
            string[] texts =
            {
                @"The concept of grouping students together in a centralized location for learning has existed since Classical antiquity. Formal schools have existed at least since ancient Greece (see Academy), ancient Rome (see Education in Ancient Rome) ancient India (see Gurukul), and ancient China (see History of education in China). The Byzantine Empire had an established schooling system beginning at the primary level. According to Traditions and Encounters, the founding of the primary education system began in 425 AD and ... military personnel usually had at least a primary education .... The sometimes efficient and often large government of the Empire meant that educated citizens were a must. Although Byzantium lost much of the grandeur of Roman culture and extravagance in the process of surviving, the Empire emphasized efficiency in its war manuals. The Byzantine education system continued until the empire's collapse in 1453 AD.[4]",
                @"In Western Europe a considerable number of cathedral schools were founded during the Early Middle Ages in order to teach future clergy and administrators, with the oldest still existing, and continuously operated, cathedral schools being The King's School, Canterbury (established 597 CE), King's School, Rochester (established 604 CE), St Peter's School, York (established 627 CE) and Thetford Grammar School (established 631 CE). Beginning in the 5th century CE monastic schools were also established throughout Western Europe, teaching both religious and secular subjects.",
                @"Islam was another culture that developed a school system in the modern sense of the word. Emphasis was put on knowledge, which required a systematic way of teaching and spreading knowledge, and purpose-built structures. At first, mosques combined both religious performance and learning activities, but by the 9th century, the madrassa was introduced, a school that was built independently from the mosque, such as al-Qarawiyyin, founded in 859 CE. They were also the first to make the Madrassa system a public domain under the control of the Caliph.",
                @"Under the Ottomans, the towns of Bursa and Edirne became the main centers of learning. The Ottoman system of Külliye, a building complex containing a mosque, a hospital, madrassa, and public kitchen and dining areas, revolutionized the education system, making learning accessible to a wider public through its free meals, health care and sometimes free accommodation.",
                @"In Europe, universities emerged during the 12th century; here, scholasticism was an important tool, and the academicians were called schoolmen. During the Middle Ages and much of the Early Modern period, the main purpose of schools (as opposed to universities) was to teach the Latin language. This led to the term grammar school, which in the United States informally refers to a primary school, but in the United Kingdom means a school that selects entrants based on ability or aptitude. Following this, the school curriculum has gradually broadened to include literacy in the vernacular language as well as technical, artistic, scientific and practical subjects.",
                @"Obligatory school attendance became common in parts of Europe during the 18th century. In Denmark-Norway, this was introduced as early as in 1739-1741, the primary end being to increase the literacy of the almue, i.e. the regular people.[5] Many of the earlier public schools in the United States and elsewhere were one-room schools where a single teacher taught seven grades of boys and girls in the same classroom. Beginning in the 1920s, one-room schools were consolidated into multiple classroom facilities with transportation increasingly provided by kid hacks and school buses."
            };

            string[][] words = texts.Tokenize();

            var Bow = new BagOfWords(words);

            // Create a new TF-IDF with options:
            var codebook = new TFIDF()
            {
                Tf  = TermFrequency.Log,
                Idf = InverseDocumentFrequency.Default,
            };

            // Compute the codebook (note: this would have to be done only for the training set)
            codebook.Learn(words);

            // Now, we can use the learned codebook to extract fixed-length
            // representations of the different texts (paragraphs) above:

            // Extract a feature vector from the text 1:
            List <double[]> lstDocumentsScores = new List <double[]>();

            for (int i = 0; i < texts.Length; i++)
            {
                lstDocumentsScores.Add(codebook.Transform(words[i]));
            }

            var teacher = new MulticlassSupportVectorLearning <Linear>()
            {
                // using LIBLINEAR's L2-loss SVC dual for each SVM
                Learner = (p) => new LinearDualCoordinateDescent()
                {
                    Loss = Loss.L2
                }
            };

            double[][] features = Bow.Transform(words);

            teacher.ParallelOptions.MaxDegreeOfParallelism = 1; // (Remove, comment, or change this line to enable full parallelism)

            // Learn a machine
            var machine = teacher.Learn(features, new int[] { 0, 0, 1, 1, 0, 0 });

            int[] predicted = machine.Decide(features);

            double error = new ZeroOneLoss(new int[] { 0, 0, 1, 1, 0, 0 }).Loss(predicted);

            // Extract a feature vector from the text 2:
            //example
            // double[] bow2 = codebook.Transform(words[1]);

            var indexSerachedTerm = Bow.StringToCode["Ottomans".ToLower()];

            double dblMaxScore     = double.MinValue;
            int    irWhichDocument = int.MinValue;

            for (int i = 0; i < texts.Length; i++)
            {
                if (lstDocumentsScores[i][indexSerachedTerm] > dblMaxScore)
                {
                    irWhichDocument = i;
                    dblMaxScore     = lstDocumentsScores[i][indexSerachedTerm];
                }
            }
        }
Beispiel #31
0
        public KeyValuePair <string, bool> SynonymMatch(List <string> vocabList, List <ChatIntent> intentList)
        {
            bool   hasMatch                   = false;
            string responseMessage            = contentManager.IntentPossibleMatchedResponse;
            int    counter                    = 0;
            AskMezPossibleMatch possibleMatch = new AskMezPossibleMatch(Message, Node);

            LevenshteinDistance dist   = new LevenshteinDistance();
            TFIDF         getVocab     = new TFIDF();
            List <string> responseList = new List <string>();

            foreach (string vocab in vocabList)
            {
                string json;
                url = url + vocab;
                List <string> synonymList = new List <string>();
                using (WebClient client = new WebClient())
                {
                    try
                    {
                        json = client.DownloadString(url);
                        SynonymDto synonym = Newtonsoft.Json.JsonConvert.DeserializeObject <SynonymDto>(json);
                        foreach (var def in synonym.def)
                        {
                            foreach (var tr in def.tr)
                            {
                                foreach (var mean in tr.mean)
                                {
                                    synonymList.Add(mean.text);
                                }
                            }
                        }
                    }
                    catch (Exception)
                    {
                    }
                }



                foreach (ChatIntent intent in intentList)
                {
                    if (possibleMatch.CheckIfRedirect(intent, intentList))
                    {
                        continue;
                    }
                    string        intentDesc      = intent.IntentDescription;
                    List <string> intentvocabList = getVocab.GetVocabulary(intentDesc);

                    bool hasSynonm = synonymList.Intersect(intentvocabList).Any();
                    if (hasSynonm && counter <= 3)
                    {
                        counter = counter + 1;
                        responseList.Add(intentDesc);
                    }
                }
            }
            responseList = (responseList.Count > 1) ? responseList.Distinct().Take(3).ToList() : responseList;
            foreach (string response in responseList)
            {
                responseMessage = responseMessage + "<br>";
                responseMessage = responseMessage + response;
            }

            responseMessage = responseMessage + "<br>" + contentManager.IntentSuggestionResponse;

            if (counter > 0)
            {
                return(new KeyValuePair <string, bool>(responseMessage, true));
            }

            return(new KeyValuePair <string, bool>(responseMessage, hasMatch));
        }