Exemplo n.º 1
0
        public IHttpActionResult UpdateDocumentType(int fileId)
        {
            HttpContent requestContent = Request.Content;

            System.Diagnostics.Debug.WriteLine(requestContent);
            string jsonContent = requestContent.ReadAsStringAsync().Result;
            //System.Diagnostics.Debug.WriteLine(jsonContent.GetType());
            //string text = JsonConvert.DeserializeObject<String>(jsonContent);
            dynamic jsonText = JsonConvert.DeserializeObject(jsonContent);

            string type = JsonConvert.SerializeObject(jsonText.type);
            int    val  = Int32.Parse(type);

            /*
             * string text = "";
             * for(int i = 0; i < 40; i++)
             * {
             *  text += jsonText.text[i].Text;
             *  text += jsonText.text[i].Coords;
             * }
             */

            //System.Diagnostics.Debug.WriteLine(type);

            //string text = jsonText.text;
            OCRDatabaseEntities db  = new OCRDatabaseEntities();
            Document            doc = db.Documents.Find(fileId);

            if (doc == null)
            {
                return(NotFound());
            }
            doc.DocumentType = type;
            System.Diagnostics.Debug.WriteLine("Type: " + type);
            db.SaveChanges();
            string dataFilePath  = System.Web.HttpContext.Current.Server.MapPath("~/Data/data_train.csv");
            string processedText = TextPreprocessorService.parseJSONText(db.Documents.Find(fileId).DocumentText);

            //System.Diagnostics.Debug.WriteLine("Nakon JSON parse-a:" + processedText);

            processedText = TextPreprocessorService.ProcessText(ref processedText);

            FileIO.CSVWrite(processedText, val, dataFilePath);
            fileService.UnlockDocument(fileId);
            return(Ok());
            //
            //Document doc = db.Documents.Find(fileId);
            //if (doc.DocumentText == null)
            //{
            //    OCRService ocr = new OCRService();

            //    if (!ocr.RecognizeText(fileId))
            //    {
            //        //Debug.WriteLine("cao1");
            //        return NotFound();
            //    }
            //}
            ////Debug.WriteLine("cao");
            //return Ok(doc.DocumentText);
        }
Exemplo n.º 2
0
        public Dictionary <int, double> PredictByText(string input)
        {
            // STEP 4: Read the data

            string        dataFilePath = System.Web.HttpContext.Current.Server.MapPath("~/Data/data_train.csv");
            var           dataTable    = DataAccess.DataTable.New.ReadCsv(dataFilePath);
            List <string> x            = dataTable.Rows.Select(row => row["Text"]).ToList();

            double[] y = dataTable.Rows.Select(row => double.Parse(row["Type"])).ToArray();

            var vocabulary = x.SelectMany(GetWords).Distinct().OrderBy(word => word).ToList();

            Console.WriteLine("Creating problem");
            var problemBuilder = new DataPreprocess.TextClassificationProblemBuilder();
            var problem        = problemBuilder.CreateProblem(x, y, vocabulary.ToList());

            //        // If you want you can save this problem with :
            //        //ProblemHelper.WriteProblem(@"C:\Users\", problem);
            //        // And then load it again using:
            //        //var problem2 = ProblemHelper.ReadProblem(@"D:\MACHINE_LEARNING\SVM\Tutorial\sunnyData.problem");

            System.Diagnostics.Debug.WriteLine("Creating model");
            const int C     = 1;
            var       model = new C_SVC(problem, KernelHelper.LinearKernel(), C, 100, true);

            var accuracy = model.GetCrossValidationAccuracy(10);

            System.Diagnostics.Debug.WriteLine(new string('=', 50));
            System.Diagnostics.Debug.WriteLine("Accuracy of the model is {0:P}", accuracy);
            model.Export(string.Format(@"model_{0}_accuracy.model", accuracy));

            System.Diagnostics.Debug.WriteLine(new string('=', 50));
            System.Diagnostics.Debug.WriteLine("The model is trained. \r\nEnter a sentence to make a prediction.");
            System.Diagnostics.Debug.WriteLine(new string('=', 50));

            _predictionDictionary = new Dictionary <int, string> {
                { 1, "ID" }, { 2, "Documents" }, { 3, "Forme" }
            };

            int    numOFWords    = 0;
            string processedText = TextPreprocessorService.parseJSONText(input);

            processedText = TextPreprocessorService.ProcessText(ref processedText);
            Dictionary <int, double> dict = new Dictionary <int, double>()
            {
                { 1, 0 }, { 2, 0 }, { 3, 0 }
            };

            if (processedText.Equals(""))
            {
                return(dict);
            }

            var newX       = TextClassificationProblemBuilder.CreateNode(processedText, vocabulary);
            var predictedY = model.Predict(newX);

            System.Diagnostics.Debug.WriteLine(predictedY);

            dict = model.PredictProbabilities(newX);
            System.Diagnostics.Debug.WriteLine("Prob(1): " + dict[1]);
            System.Diagnostics.Debug.WriteLine("Prob(2): " + dict[2]);
            System.Diagnostics.Debug.WriteLine("Prob(3): " + dict[3]);

            System.Diagnostics.Debug.WriteLine("The prediction is {0}  value is {1} ", _predictionDictionary[(int)predictedY], predictedY);

            return(dict);
        }
Exemplo n.º 3
0
        public string classify(int id)
        {
            OCRDatabaseEntities db  = new OCRDatabaseEntities();
            Document            doc = db.Documents.Find(id);
            string text             = doc.DocumentText;

            if ((text == null))
            {
                text = RecognizeDocText(id);
                text = TextPreprocessorService.parseJSONText(text);
                text = TextPreprocessorService.ProcessText(ref text);

                if (text == null)
                {
                    return(null);
                }
            }

            Dictionary <int, double> dict = PredictByText(text);

            System.Diagnostics.Debug.WriteLine("ByText");
            System.Diagnostics.Debug.WriteLine(dict[1].ToString());
            System.Diagnostics.Debug.WriteLine(dict[2].ToString());
            System.Diagnostics.Debug.WriteLine(dict[3].ToString());

            int predictionNumOFWords = PredictByNumOfWords();

            System.Diagnostics.Debug.WriteLine(predictionNumOFWords);
            if (predictionNumOFWords == 2)
            {
                dict[2] += 0.2;
            }

            System.Diagnostics.Debug.WriteLine("ByNumOfWords");
            System.Diagnostics.Debug.WriteLine(dict[1].ToString());
            System.Diagnostics.Debug.WriteLine(dict[2].ToString());
            System.Diagnostics.Debug.WriteLine(dict[3].ToString());

            System.Diagnostics.Debug.WriteLine(OCRService.FaceFlag);

            int faceFlag = OCRService.FaceFlag;

            System.Diagnostics.Debug.WriteLine("FaceFlag");
            System.Diagnostics.Debug.WriteLine(faceFlag);

            if (faceFlag == 1)
            {
                dict[1] += 0.75;
            }

            System.Diagnostics.Debug.WriteLine("ByFace");
            System.Diagnostics.Debug.WriteLine(dict[1].ToString());
            System.Diagnostics.Debug.WriteLine(dict[2].ToString());
            System.Diagnostics.Debug.WriteLine(dict[3].ToString());

            double maxPerc = dict.Values.Max();

            if (maxPerc <= 0.1)
            {
                return("0");
            }
            string retVal = dict.FirstOrDefault(x => x.Value == maxPerc).Key.ToString();

            string type = "-" + retVal;

            doc.DocumentType = type;
            System.Diagnostics.Debug.WriteLine(doc.DocumentType);
            db.SaveChanges();
            return(type);
        }