private void Initialize( string segmentedSource, string segmentedTarget, string alignment, SegmentationMethod segmentationMethod, string targetCode) { this.SegmentedSourceSentence = segmentedSource.Split(' '); this.SegmentedTranslation = segmentedTarget.Split(' '); this.SegmentedAlignmentSourceToTarget = TranslationPair.ParseAlignmentString( alignment, SegmentedSourceSentence.Length - 1, SegmentedTranslation.Length - 1, false); this.SegmentedAlignmentTargetToSource = TranslationPair.ParseAlignmentString( alignment, SegmentedTranslation.Length - 1, SegmentedSourceSentence.Length - 1, true); this.AlignmentString = alignment; this.Segmentation = segmentationMethod; }
static int Main(string[] args) { CanvasCommon.Utilities.LogCommandLine(args); string inFile = null; string outFile = null; bool needHelp = false; bool isGermline = false; string bedPath = null; double alpha = Segmentation.DefaultAlpha; SegmentSplitUndo undoMethod = SegmentSplitUndo.None; SegmentationMethod partitionMethod = SegmentationMethod.Wavelets; int maxInterBinDistInSegment = 1000000; OptionSet p = new OptionSet() { { "i|infile=", "input file - usually generated by CanvasClean", v => inFile = v }, { "o|outfile=", "text file to output", v => outFile = v }, { "h|help", "show this message and exit", v => needHelp = v != null }, { "a|alpha=", "alpha parameter to CBS. Default: " + alpha, v => alpha = float.Parse(v) }, { "m|method=", "segmentation method (Wavelets/CBS). Default: " + partitionMethod, v => partitionMethod = (SegmentationMethod)Enum.Parse(typeof(SegmentationMethod), v) }, { "s|split=", "CBS split method (None/Prune/SDUndo). Default: " + undoMethod, v => undoMethod = (SegmentSplitUndo)Enum.Parse(typeof(SegmentSplitUndo), v) }, { "b|bedfile=", "bed file to exclude (don't span these intervals)", v => bedPath = v }, { "g|germline", "flag indicating that input file represents germline genome", v => isGermline = v != null }, { "d|maxInterBinDistInSegment=", "the maximum distance between adjacent bins in a segment (negative numbers turn off splitting segments after segmentation). Default: " + maxInterBinDistInSegment, v => maxInterBinDistInSegment = int.Parse(v) }, }; List <string> extraArgs = p.Parse(args); if (needHelp) { ShowHelp(p); return(0); } if (inFile == null || outFile == null) { ShowHelp(p); return(0); } if (!File.Exists(inFile)) { Console.WriteLine("CanvasPartition.exe: File {0} does not exist! Exiting.", inFile); return(1); } if (!string.IsNullOrEmpty(bedPath) && !File.Exists(bedPath)) { Console.WriteLine("CanvasPartition.exe: File {0} does not exist! Exiting.", bedPath); return(1); } // no command line parameter for segmentation method Segmentation SegmentationEngine = new Segmentation(inFile, bedPath, maxInterBinDistInSegment: maxInterBinDistInSegment); SegmentationEngine.Alpha = alpha; SegmentationEngine.UndoMethod = undoMethod; SegmentationEngine.SegmentGenome(outFile, partitionMethod, isGermline); return(0); }
public TranslationPair( string translation, string segmentedSource, string segmentedTarget, string alignment, SegmentationMethod segmentationMethod, string targetCode) { this.Translation = translation; this.Initialize(segmentedSource, segmentedTarget, alignment, segmentationMethod, targetCode); }
public TranslationPair( string segmentedSource, string translationAndAlignment, SegmentationMethod segmentationMethod, string targetLanguage) { var lastSeparator = translationAndAlignment.LastIndexOf("|||"); var segmentedTranslation = translationAndAlignment.Substring(0, lastSeparator - 1); this.RawTranslation = segmentedTranslation; var alignment = translationAndAlignment.Substring(lastSeparator + "||| ".Length); this.Initialize(segmentedSource, segmentedTranslation, alignment, segmentationMethod, targetLanguage); }
public void SegmentGenome(string outPath, SegmentationMethod method, bool isGermline) { switch (method) { case SegmentationMethod.Wavelets: default:// use Wavelets if CBS is not selected Console.WriteLine("{0} Running Wavelet Partitioning", DateTime.Now); this.Wavelets(isGermline, verbose: 2); break; case SegmentationMethod.CBS: Console.WriteLine("{0} Running CBS Partitioning", DateTime.Now); this.CBS(verbose: 2); break; } Console.WriteLine("{0} Write CanvasPartition results:", DateTime.Now); this.WriteCanvasPartitionResults(outPath); Console.WriteLine("{0} CanvasPartition results written out", DateTime.Now); }
public void SegmentGenome(string outPath, SegmentationMethod method, bool isGermline) { switch (method) { case SegmentationMethod.Wavelets: default: // use Wavelets if CBS is not selected Console.WriteLine("{0} Running Wavelet Partitioning", DateTime.Now); this.Wavelets(isGermline, verbose: 2); break; case SegmentationMethod.CBS: Console.WriteLine("{0} Running CBS Partitioning", DateTime.Now); this.CBS(verbose: 2); break; } Console.WriteLine("{0} Write CanvasPartition results:", DateTime.Now); this.WriteCanvasPartitionResults(outPath); Console.WriteLine("{0} CanvasPartition results written out", DateTime.Now); }
internal static void WriteTranslationToDb( string sourceText, TranslationPair translation, string model, SegmentationMethod segmentationMethod, string targetLanguage) { TranslationDbHelper.shortTermMtStorage.GetOrAdd(new Tuple <string, string>(sourceText, model), translation); if (OpusCatMTEngineSettings.Default.CacheMtInDatabase) { try { TranslationDbHelper.WriteTranslationToSqliteDb(sourceText, translation, model, segmentationMethod, targetLanguage); } catch (Exception ex) { Log.Error(ex.ToString()); TranslationDbHelper.SetupTranslationDb(); } } }
private static void WriteTranslationToSqliteDb( string sourceText, TranslationPair translation, string model, SegmentationMethod segmentationMethod, string targetLanguage) { var translationDb = new FileInfo(HelperFunctions.GetOpusCatDataPath(OpusCatMTEngineSettings.Default.TranslationDBName)); if (translationDb.Length == 0) { translationDb.Delete(); } if (!translationDb.Exists) { TranslationDbHelper.CreateTranslationDb(); } using (var m_dbConnection = new SQLiteConnection($"Data Source={translationDb};Version=3;")) { m_dbConnection.Open(); using (SQLiteCommand insert = new SQLiteCommand( "INSERT or REPLACE INTO translations (sourcetext, translation, segmentedsource, segmentedtranslation, alignment, model, additiondate, segmentationmethod, targetlanguage) VALUES (@sourcetext,@translation,@segmentedsource,@segmentedtranslation,@alignment,@model,CURRENT_TIMESTAMP,@segmentationmethod,@targetlanguage)", m_dbConnection)) { insert.Parameters.Add(new SQLiteParameter("@sourcetext", sourceText)); insert.Parameters.Add(new SQLiteParameter("@translation", translation.Translation)); insert.Parameters.Add(new SQLiteParameter("@segmentedsource", String.Join(" ", translation.SegmentedSourceSentence))); insert.Parameters.Add(new SQLiteParameter("@segmentedtranslation", String.Join(" ", translation.SegmentedTranslation))); insert.Parameters.Add(new SQLiteParameter("@alignment", translation.AlignmentString)); insert.Parameters.Add(new SQLiteParameter("@model", model)); insert.Parameters.Add(new SQLiteParameter("@segmentationmethod", segmentationMethod.ToString())); insert.Parameters.Add(new SQLiteParameter("@targetlanguage", targetLanguage)); insert.ExecuteNonQuery(); } } }
public MarianBatchTranslator( string modelDir, IsoLanguage sourceLang, IsoLanguage targetLang, SegmentationMethod segmentation, bool includePlaceholderTags, bool includeTagPairs) { this.SourceCode = sourceLang.OriginalCode; this.TargetCode = targetLang.OriginalCode; this.segmentation = segmentation; this.includePlaceholderTags = includePlaceholderTags; this.includeTagPairs = includeTagPairs; this.modelDir = new DirectoryInfo(modelDir); this.SystemName = $"{this.SourceCode}-{this.TargetCode}_" + this.modelDir.Name; //Check if batch.yml exists, if not create it from decode.yml var batchYaml = this.modelDir.GetFiles("batch.yml"); if (batchYaml.Length == 0) { var decoderYaml = this.modelDir.GetFiles("decoder.yml").Single(); var deserializer = new Deserializer(); var decoderSettings = deserializer.Deserialize <MarianDecoderConfig>(decoderYaml.OpenText()); decoderSettings.miniBatch = "16"; decoderSettings.log = Path.Combine(this.modelDir.FullName, "batch.log"); decoderSettings.alignment = "hard"; var serializer = new Serializer(); var configPath = Path.Combine(this.modelDir.FullName, "batch.yml"); using (var writer = File.CreateText(configPath)) { serializer.Serialize(writer, decoderSettings, typeof(MarianDecoderConfig)); } } }
internal static WordsearchSolutionEvaluator EvaluateWordsearchBitmap(Bitmap wordsearchBitmap, string[] wordsToFind, Dictionary<string, List<WordPosition>> correctSolutions, SegmentationAlgorithm segmentationAlgorithm, bool segmentationRemoveSmallRowsAndCols, SegmentationMethod segmentationMethod, Classifier probabilisticRotationCorrectionClassifier, Classifier classifier, Solver wordsearchSolver) { /* * Wordsearch Segmentation */ Segmentation segmentation = segmentationAlgorithm.Segment(wordsearchBitmap); //Remove erroneously small rows and columns from the segmentation if that option is specified if(segmentationRemoveSmallRowsAndCols) { segmentation = segmentation.RemoveSmallRowsAndCols(); } /* * Wordsearch Rotation Correction */ WordsearchRotation originalRotation; //If we're using fixed row & col width if (segmentationMethod == SegmentationMethod.FixedWidth) { originalRotation = new WordsearchRotation(wordsearchBitmap, segmentation.NumRows, segmentation.NumCols); } else //Otherwise we're using varied row/col width segmentation, use the Segmentation object { originalRotation = new WordsearchRotation(wordsearchBitmap, segmentation); } WordsearchRotation rotatedWordsearch = WordsearchRotationCorrection.CorrectOrientation(originalRotation, probabilisticRotationCorrectionClassifier); Bitmap rotatedImage = rotatedWordsearch.Bitmap; //If the wordsearch has been rotated if (rotatedImage != wordsearchBitmap) { //Update the segmentation //If the wordsearch rotation won't have been passed a segmentation if (segmentationMethod == SegmentationMethod.FixedWidth) { //Make a new fixed width segmentation from the WordsearchRotation segmentation = new Segmentation(rotatedWordsearch.Rows, rotatedWordsearch.Cols, rotatedImage.Width, rotatedImage.Height); } else { //Use the rotated segmentation segmentation = rotatedWordsearch.Segmentation; } } /* * Classification */ //Split image up into individual characters Bitmap[,] rawCharImgs = null; //If we're using fixed row & col width if (segmentationMethod == SegmentationMethod.FixedWidth) { ResizeBicubic resize = new ResizeBicubic(Constants.CHAR_WITH_WHITESPACE_WIDTH * segmentation.NumCols, Constants.CHAR_WITH_WHITESPACE_HEIGHT * segmentation.NumRows); Bitmap resizedImage = resize.Apply(rotatedImage); rawCharImgs = SplitImage.Grid(resizedImage, segmentation.NumRows, segmentation.NumCols); //Resized image no longer required resizedImage.Dispose(); } else //Otherwise we're using varied row/col width segmentation { rawCharImgs = SplitImage.Segment(rotatedImage, segmentation); //If the Segmentation Method is to resize the raw char imgs, resize them if (segmentationMethod == SegmentationMethod.VariedWidthWithResize) { ResizeBicubic resize = new ResizeBicubic(Constants.CHAR_WITH_WHITESPACE_WIDTH, Constants.CHAR_WITH_WHITESPACE_HEIGHT); for (int i = 0; i < rawCharImgs.GetLength(0); i++) { for (int j = 0; j < rawCharImgs.GetLength(1); j++) { //Only do the resize if it isn't already that size if (rawCharImgs[i, j].Width != Constants.CHAR_WITH_WHITESPACE_WIDTH || rawCharImgs[i, j].Height != Constants.CHAR_WITH_WHITESPACE_HEIGHT) { Bitmap orig = rawCharImgs[i, j]; rawCharImgs[i, j] = resize.Apply(orig); //Remove the now unnecessary original/not resized image orig.Dispose(); } } } } } //Full sized rotated image no longer required rotatedImage.Dispose(); //Get the part of the image that actually contains the character (without any whitespace) Bitmap[,] charImgs = CharImgExtractor.ExtractAll(rawCharImgs); //Raw char img's are no longer required rawCharImgs.ToSingleDimension().DisposeAll(); //Perform the classification on all of the images (returns probabilities for each possible class) double[][][] classifierOutput = classifier.Classify(charImgs); //Actual images of the characters are no longer required charImgs.ToSingleDimension().DisposeAll(); /* * Solve Wordsearch */ Solution solution = wordsearchSolver.Solve(classifierOutput, wordsToFind); /* * Evaluate the Proposed Solution */ WordsearchSolutionEvaluator evaluator = new WordsearchSolutionEvaluator(solution, correctSolutions); return evaluator; }
private static double Evaluate(List<Image> images, SegmentationAlgorithm detectionSegmentationAlgorithm, bool detectionSegmentationRemoveSmallRowsAndCols, SegmentationAlgorithm segmentationAlgorithm, bool segmentationRemoveSmallRowsAndCols, SegmentationMethod segmentationMethod, Classifier probabilisticRotationCorrectionClassifier, Classifier classifier, Solver wordsearchSolver) { DefaultLog.Info("Evaluating Full System . . ."); int numCorrect = 0; List<WordsearchSolutionEvaluator> evaluators = new List<WordsearchSolutionEvaluator>(); foreach(Image image in images) { //Register an interest in the Bitmap of the image image.RegisterInterestInBitmap(); /* * Wordsearch Detection */ Tuple<List<IntPoint>, Bitmap> wordsearchImageTuple = DetectionAlgorithm.ExtractBestWordsearch(image.Bitmap, detectionSegmentationAlgorithm, detectionSegmentationRemoveSmallRowsAndCols); //Original wordsearch image is no longer required image.DeregisterInterestInBitmap(); //If the system failed to find anything remotely resembling a wordsearch, fail now if(wordsearchImageTuple == null) { continue; } //Get the words to look for later from this image & the correct solutions string[] wordsToFind = null; //Requires default, but won't even get used Dictionary<string, List<WordPosition>> correctSolutions = null; //If the image contains more than one wordsearch, we need to work out which one has been found if(image.WordsearchImages.Length > 1) { List<IntPoint> coordinates = wordsearchImageTuple.Item1; bool found = false; //Select the wordsearch found using the algorithm for checking if the returned wordsearch is correct in EvaluateWordsearchDetection foreach(WordsearchImage wordsearchImage in image.WordsearchImages) { //If it's this wordsearch if(EvaluateWordsearchDetection.IsWordsearch(coordinates, wordsearchImage)) { wordsToFind = wordsearchImage.Wordsearch.Words; correctSolutions = wordsearchImage.Wordsearch.Solutions; found = true; break; } } //If this isn't one of the wordsearches in the image, then fail now if(!found) { //Clean up wordsearchImageTuple.Item2.Dispose(); continue; } } else //Otherwise just use the one wordsearch that's in the image { wordsToFind = image.WordsearchImages[0].Wordsearch.Words; correctSolutions = image.WordsearchImages[0].Wordsearch.Solutions; } Bitmap extractedImage = wordsearchImageTuple.Item2; /* * Image Segmentation onwards happen in EvaluateWordsearchBitmap */ WordsearchSolutionEvaluator evaluator = EvaluateWordsearchBitmap(extractedImage, wordsToFind, correctSolutions, segmentationAlgorithm, segmentationRemoveSmallRowsAndCols, segmentationMethod, probabilisticRotationCorrectionClassifier, classifier, wordsearchSolver); //Clean up extractedImage.Dispose(); //Log Evaluation evaluators.Add(evaluator); DefaultLog.Info(evaluator.ToString()); if(evaluator.Correct) { numCorrect++; } } DefaultLog.Info("System found all words correctly for {0} / {1} Images correctly", numCorrect, images.Count); //Calculate some extra statistics int numWordsearchesNoWordsFound = 0; int numDidntReachEvaluation = images.Count - evaluators.Count; double fMeasureSum = 0; int numValidFMeasures = 0; foreach (WordsearchSolutionEvaluator evaluator in evaluators) { //If no words were found correctly if(evaluator.TruePositive == 0) { numWordsearchesNoWordsFound++; } //If there was a valid F-Measure if(!double.IsNaN(evaluator.FMeasure)) { fMeasureSum += evaluator.FMeasure; numValidFMeasures++; } } DefaultLog.Info("In {0} wordsearches no words were found correctly at all", numWordsearchesNoWordsFound); DefaultLog.Info("{0} wordsearch images got discarded before reaching the evaluation stage", numDidntReachEvaluation); DefaultLog.Info("Average F-Measure (when not NaN): {0}", fMeasureSum / numValidFMeasures); DefaultLog.Info("Full System Evaluation Completed"); return (double)numCorrect / images.Count; }