private void ParseFile(string filePath, string modelPath) { List <Dictionary <string, object> > pages; Dictionary <string, object> cmeta; // READ FILE var provider = FileFormatRepository.GetMatchingProvider(filePath); provider.ReadFile(filePath, out pages, out cmeta); // DETECT LANGUAGE cmeta.Add("LANGUAGE", LanguageDetectorHelper.DetectLanguage(ref pages)); AbstractCorpusAdapter corpus; HashSet <string> list; Dictionary <string, double> vecs; ExecuteProcessingWorkflow(out corpus, out list, out vecs, pages, cmeta); if (corpus == null) { return; } // SAVE MODEL corpus.Save(modelPath, false); Serializer.Serialize(list, modelPath + ".list", false); Serializer.Serialize(vecs.ToArray(), modelPath + ".vecs", false); }
public static void Inline(ref string inlineText, out AbstractCorpusAdapter corpus, out HashSet <string> list, out Dictionary <string, double> vecs) { var pages = new List <Dictionary <string, object> > { new Dictionary <string, object> { { "Text", inlineText }, { "PAGE", 1 } } }; // DETECT LANGUAGE var cmeta = new Dictionary <string, object> { { "LANGUAGE", LanguageDetectorHelper.DetectLanguage(ref pages) } }; ExecuteProcessingWorkflow(out corpus, out list, out vecs, pages, cmeta); }