Пример #1
0
        private void Initialize(
            string segmentedSource,
            string segmentedTarget,
            string alignment,
            SegmentationMethod segmentationMethod,
            string targetCode)
        {
            this.SegmentedSourceSentence = segmentedSource.Split(' ');

            this.SegmentedTranslation             = segmentedTarget.Split(' ');
            this.SegmentedAlignmentSourceToTarget = TranslationPair.ParseAlignmentString(
                alignment,
                SegmentedSourceSentence.Length - 1,
                SegmentedTranslation.Length - 1,
                false);

            this.SegmentedAlignmentTargetToSource = TranslationPair.ParseAlignmentString(
                alignment,
                SegmentedTranslation.Length - 1,
                SegmentedSourceSentence.Length - 1,
                true);

            this.AlignmentString = alignment;
            this.Segmentation    = segmentationMethod;
        }
Пример #2
0
        static int Main(string[] args)
        {
            CanvasCommon.Utilities.LogCommandLine(args);
            string             inFile          = null;
            string             outFile         = null;
            bool               needHelp        = false;
            bool               isGermline      = false;
            string             bedPath         = null;
            double             alpha           = Segmentation.DefaultAlpha;
            SegmentSplitUndo   undoMethod      = SegmentSplitUndo.None;
            SegmentationMethod partitionMethod = SegmentationMethod.Wavelets;
            int       maxInterBinDistInSegment = 1000000;
            OptionSet p = new OptionSet()
            {
                { "i|infile=", "input file - usually generated by CanvasClean", v => inFile = v },
                { "o|outfile=", "text file to output", v => outFile = v },
                { "h|help", "show this message and exit", v => needHelp = v != null },
                { "a|alpha=", "alpha parameter to CBS. Default: " + alpha, v => alpha = float.Parse(v) },
                { "m|method=", "segmentation method (Wavelets/CBS). Default: " + partitionMethod, v => partitionMethod = (SegmentationMethod)Enum.Parse(typeof(SegmentationMethod), v) },
                { "s|split=", "CBS split method (None/Prune/SDUndo). Default: " + undoMethod, v => undoMethod = (SegmentSplitUndo)Enum.Parse(typeof(SegmentSplitUndo), v) },
                { "b|bedfile=", "bed file to exclude (don't span these intervals)", v => bedPath = v },
                { "g|germline", "flag indicating that input file represents germline genome", v => isGermline = v != null },
                { "d|maxInterBinDistInSegment=", "the maximum distance between adjacent bins in a segment (negative numbers turn off splitting segments after segmentation). Default: " + maxInterBinDistInSegment, v => maxInterBinDistInSegment = int.Parse(v) },
            };

            List <string> extraArgs = p.Parse(args);

            if (needHelp)
            {
                ShowHelp(p);
                return(0);
            }

            if (inFile == null || outFile == null)
            {
                ShowHelp(p);
                return(0);
            }

            if (!File.Exists(inFile))
            {
                Console.WriteLine("CanvasPartition.exe: File {0} does not exist! Exiting.", inFile);
                return(1);
            }

            if (!string.IsNullOrEmpty(bedPath) && !File.Exists(bedPath))
            {
                Console.WriteLine("CanvasPartition.exe: File {0} does not exist! Exiting.", bedPath);
                return(1);
            }

            // no command line parameter for segmentation method
            Segmentation SegmentationEngine = new Segmentation(inFile, bedPath, maxInterBinDistInSegment: maxInterBinDistInSegment);

            SegmentationEngine.Alpha      = alpha;
            SegmentationEngine.UndoMethod = undoMethod;
            SegmentationEngine.SegmentGenome(outFile, partitionMethod, isGermline);
            return(0);
        }
Пример #3
0
        public TranslationPair(
            string translation,
            string segmentedSource,
            string segmentedTarget,
            string alignment,
            SegmentationMethod segmentationMethod,
            string targetCode)
        {
            this.Translation = translation;

            this.Initialize(segmentedSource, segmentedTarget, alignment, segmentationMethod, targetCode);
        }
Пример #4
0
        public TranslationPair(
            string segmentedSource,
            string translationAndAlignment,
            SegmentationMethod segmentationMethod,
            string targetLanguage)
        {
            var lastSeparator        = translationAndAlignment.LastIndexOf("|||");
            var segmentedTranslation = translationAndAlignment.Substring(0, lastSeparator - 1);

            this.RawTranslation = segmentedTranslation;
            var alignment = translationAndAlignment.Substring(lastSeparator + "||| ".Length);

            this.Initialize(segmentedSource, segmentedTranslation, alignment, segmentationMethod, targetLanguage);
        }
Пример #5
0
 public void SegmentGenome(string outPath, SegmentationMethod method, bool isGermline)
 {
     switch (method)
     {
         case SegmentationMethod.Wavelets:
         default:// use Wavelets if CBS is not selected
             Console.WriteLine("{0} Running Wavelet Partitioning", DateTime.Now);
             this.Wavelets(isGermline, verbose: 2);
             break;
         case SegmentationMethod.CBS:
             Console.WriteLine("{0} Running CBS Partitioning", DateTime.Now);
             this.CBS(verbose: 2);
             break;
     }
     Console.WriteLine("{0} Write CanvasPartition results:", DateTime.Now);
     this.WriteCanvasPartitionResults(outPath);
     Console.WriteLine("{0} CanvasPartition results written out", DateTime.Now);
 }
Пример #6
0
        public void SegmentGenome(string outPath, SegmentationMethod method, bool isGermline)
        {
            switch (method)
            {
            case SegmentationMethod.Wavelets:
            default:    // use Wavelets if CBS is not selected
                Console.WriteLine("{0} Running Wavelet Partitioning", DateTime.Now);
                this.Wavelets(isGermline, verbose: 2);
                break;

            case SegmentationMethod.CBS:
                Console.WriteLine("{0} Running CBS Partitioning", DateTime.Now);
                this.CBS(verbose: 2);
                break;
            }
            Console.WriteLine("{0} Write CanvasPartition results:", DateTime.Now);
            this.WriteCanvasPartitionResults(outPath);
            Console.WriteLine("{0} CanvasPartition results written out", DateTime.Now);
        }
Пример #7
0
 internal static void WriteTranslationToDb(
     string sourceText,
     TranslationPair translation,
     string model,
     SegmentationMethod segmentationMethod,
     string targetLanguage)
 {
     TranslationDbHelper.shortTermMtStorage.GetOrAdd(new Tuple <string, string>(sourceText, model), translation);
     if (OpusCatMTEngineSettings.Default.CacheMtInDatabase)
     {
         try
         {
             TranslationDbHelper.WriteTranslationToSqliteDb(sourceText, translation, model, segmentationMethod, targetLanguage);
         }
         catch (Exception ex)
         {
             Log.Error(ex.ToString());
             TranslationDbHelper.SetupTranslationDb();
         }
     }
 }
Пример #8
0
        private static void WriteTranslationToSqliteDb(
            string sourceText,
            TranslationPair translation,
            string model,
            SegmentationMethod segmentationMethod,
            string targetLanguage)
        {
            var translationDb = new FileInfo(HelperFunctions.GetOpusCatDataPath(OpusCatMTEngineSettings.Default.TranslationDBName));

            if (translationDb.Length == 0)
            {
                translationDb.Delete();
            }

            if (!translationDb.Exists)
            {
                TranslationDbHelper.CreateTranslationDb();
            }

            using (var m_dbConnection = new SQLiteConnection($"Data Source={translationDb};Version=3;"))
            {
                m_dbConnection.Open();

                using (SQLiteCommand insert =
                           new SQLiteCommand(
                               "INSERT or REPLACE INTO translations (sourcetext, translation, segmentedsource, segmentedtranslation, alignment, model, additiondate, segmentationmethod, targetlanguage) VALUES (@sourcetext,@translation,@segmentedsource,@segmentedtranslation,@alignment,@model,CURRENT_TIMESTAMP,@segmentationmethod,@targetlanguage)", m_dbConnection))
                {
                    insert.Parameters.Add(new SQLiteParameter("@sourcetext", sourceText));
                    insert.Parameters.Add(new SQLiteParameter("@translation", translation.Translation));
                    insert.Parameters.Add(new SQLiteParameter("@segmentedsource", String.Join(" ", translation.SegmentedSourceSentence)));
                    insert.Parameters.Add(new SQLiteParameter("@segmentedtranslation", String.Join(" ", translation.SegmentedTranslation)));
                    insert.Parameters.Add(new SQLiteParameter("@alignment", translation.AlignmentString));
                    insert.Parameters.Add(new SQLiteParameter("@model", model));
                    insert.Parameters.Add(new SQLiteParameter("@segmentationmethod", segmentationMethod.ToString()));
                    insert.Parameters.Add(new SQLiteParameter("@targetlanguage", targetLanguage));
                    insert.ExecuteNonQuery();
                }
            }
        }
        public MarianBatchTranslator(
            string modelDir,
            IsoLanguage sourceLang,
            IsoLanguage targetLang,
            SegmentationMethod segmentation,
            bool includePlaceholderTags,
            bool includeTagPairs)
        {
            this.SourceCode   = sourceLang.OriginalCode;
            this.TargetCode   = targetLang.OriginalCode;
            this.segmentation = segmentation;

            this.includePlaceholderTags = includePlaceholderTags;
            this.includeTagPairs        = includeTagPairs;
            this.modelDir   = new DirectoryInfo(modelDir);
            this.SystemName = $"{this.SourceCode}-{this.TargetCode}_" + this.modelDir.Name;

            //Check if batch.yml exists, if not create it from decode.yml
            var batchYaml = this.modelDir.GetFiles("batch.yml");

            if (batchYaml.Length == 0)
            {
                var decoderYaml     = this.modelDir.GetFiles("decoder.yml").Single();
                var deserializer    = new Deserializer();
                var decoderSettings = deserializer.Deserialize <MarianDecoderConfig>(decoderYaml.OpenText());
                decoderSettings.miniBatch = "16";
                decoderSettings.log       = Path.Combine(this.modelDir.FullName, "batch.log");
                decoderSettings.alignment = "hard";

                var serializer = new Serializer();
                var configPath = Path.Combine(this.modelDir.FullName, "batch.yml");
                using (var writer = File.CreateText(configPath))
                {
                    serializer.Serialize(writer, decoderSettings, typeof(MarianDecoderConfig));
                }
            }
        }
Пример #10
0
        internal static WordsearchSolutionEvaluator EvaluateWordsearchBitmap(Bitmap wordsearchBitmap, string[] wordsToFind,
            Dictionary<string, List<WordPosition>> correctSolutions, SegmentationAlgorithm segmentationAlgorithm, 
            bool segmentationRemoveSmallRowsAndCols, SegmentationMethod segmentationMethod,
            Classifier probabilisticRotationCorrectionClassifier, Classifier classifier, Solver wordsearchSolver)
        {
            /*
             * Wordsearch Segmentation
             */
            Segmentation segmentation = segmentationAlgorithm.Segment(wordsearchBitmap);

            //Remove erroneously small rows and columns from the segmentation if that option is specified
            if(segmentationRemoveSmallRowsAndCols)
            {
                segmentation = segmentation.RemoveSmallRowsAndCols();
            }

            /*
             * Wordsearch Rotation Correction
             */
            WordsearchRotation originalRotation;

            //If we're using fixed row & col width
            if (segmentationMethod == SegmentationMethod.FixedWidth)
            {
                originalRotation = new WordsearchRotation(wordsearchBitmap, segmentation.NumRows, segmentation.NumCols);
            }
            else //Otherwise we're using varied row/col width segmentation, use the Segmentation object
            {
                originalRotation = new WordsearchRotation(wordsearchBitmap, segmentation);
            }

            WordsearchRotation rotatedWordsearch = WordsearchRotationCorrection.CorrectOrientation(originalRotation, probabilisticRotationCorrectionClassifier);

            Bitmap rotatedImage = rotatedWordsearch.Bitmap;

            //If the wordsearch has been rotated
            if (rotatedImage != wordsearchBitmap)
            {
                //Update the segmentation

                //If the wordsearch rotation won't have been passed a segmentation
                if (segmentationMethod == SegmentationMethod.FixedWidth)
                {
                    //Make a new fixed width segmentation from the WordsearchRotation
                    segmentation = new Segmentation(rotatedWordsearch.Rows, rotatedWordsearch.Cols,
                        rotatedImage.Width, rotatedImage.Height);
                }
                else
                {
                    //Use the rotated segmentation 
                    segmentation = rotatedWordsearch.Segmentation;
                }
            }

            /*
             * Classification
             */

            //Split image up into individual characters
            Bitmap[,] rawCharImgs = null;

            //If we're using fixed row & col width
            if (segmentationMethod == SegmentationMethod.FixedWidth)
            {
                ResizeBicubic resize = new ResizeBicubic(Constants.CHAR_WITH_WHITESPACE_WIDTH * segmentation.NumCols,
                    Constants.CHAR_WITH_WHITESPACE_HEIGHT * segmentation.NumRows);
                Bitmap resizedImage = resize.Apply(rotatedImage);

                rawCharImgs = SplitImage.Grid(resizedImage, segmentation.NumRows, segmentation.NumCols);

                //Resized image no longer required
                resizedImage.Dispose();
            }
            else //Otherwise we're using varied row/col width segmentation
            {
                rawCharImgs = SplitImage.Segment(rotatedImage, segmentation);

                //If the Segmentation Method is to resize the raw char imgs, resize them
                if (segmentationMethod == SegmentationMethod.VariedWidthWithResize)
                {
                    ResizeBicubic resize = new ResizeBicubic(Constants.CHAR_WITH_WHITESPACE_WIDTH, Constants.CHAR_WITH_WHITESPACE_HEIGHT);

                    for (int i = 0; i < rawCharImgs.GetLength(0); i++)
                    {
                        for (int j = 0; j < rawCharImgs.GetLength(1); j++)
                        {
                            //Only do the resize if it isn't already that size
                            if (rawCharImgs[i, j].Width != Constants.CHAR_WITH_WHITESPACE_WIDTH
                                || rawCharImgs[i, j].Height != Constants.CHAR_WITH_WHITESPACE_HEIGHT)
                            {
                                Bitmap orig = rawCharImgs[i, j];

                                rawCharImgs[i, j] = resize.Apply(orig);

                                //Remove the now unnecessary original/not resized image
                                orig.Dispose();
                            }
                        }
                    }
                }
            }

            //Full sized rotated image no longer required
            rotatedImage.Dispose();

            //Get the part of the image that actually contains the character (without any whitespace)
            Bitmap[,] charImgs = CharImgExtractor.ExtractAll(rawCharImgs);

            //Raw char img's are no longer required
            rawCharImgs.ToSingleDimension().DisposeAll();

            //Perform the classification on all of the images (returns probabilities for each possible class)
            double[][][] classifierOutput = classifier.Classify(charImgs);

            //Actual images of the characters are no longer required
            charImgs.ToSingleDimension().DisposeAll();

            /*
             * Solve Wordsearch
             */
            Solution solution = wordsearchSolver.Solve(classifierOutput, wordsToFind);

            /*
             * Evaluate the Proposed Solution
             */
            WordsearchSolutionEvaluator evaluator = new WordsearchSolutionEvaluator(solution, correctSolutions);

            return evaluator;
        }
Пример #11
0
        private static double Evaluate(List<Image> images, SegmentationAlgorithm detectionSegmentationAlgorithm, 
            bool detectionSegmentationRemoveSmallRowsAndCols, SegmentationAlgorithm segmentationAlgorithm, 
            bool segmentationRemoveSmallRowsAndCols, SegmentationMethod segmentationMethod, 
            Classifier probabilisticRotationCorrectionClassifier, Classifier classifier, Solver wordsearchSolver)
        {
            DefaultLog.Info("Evaluating Full System . . .");

            int numCorrect = 0;
            List<WordsearchSolutionEvaluator> evaluators = new List<WordsearchSolutionEvaluator>();

            foreach(Image image in images)
            {
                //Register an interest in the Bitmap of the image
                image.RegisterInterestInBitmap();

                /*
                 * Wordsearch Detection
                 */
                Tuple<List<IntPoint>, Bitmap> wordsearchImageTuple = DetectionAlgorithm.ExtractBestWordsearch(image.Bitmap, detectionSegmentationAlgorithm, detectionSegmentationRemoveSmallRowsAndCols);
                
                //Original wordsearch image is no longer required
                image.DeregisterInterestInBitmap();

                //If the system failed to find anything remotely resembling a wordsearch, fail now
                if(wordsearchImageTuple == null)
                {
                    continue;
                }

                //Get the words to look for later from this image & the correct solutions
                string[] wordsToFind = null; //Requires default, but won't even get used
                Dictionary<string, List<WordPosition>> correctSolutions = null;
                //If the image contains more than one wordsearch, we need to work out which one has been found
                if(image.WordsearchImages.Length > 1)
                {
                    List<IntPoint> coordinates = wordsearchImageTuple.Item1;
                    bool found = false;

                    //Select the wordsearch found using the algorithm for checking if the returned wordsearch is correct in EvaluateWordsearchDetection
                    foreach(WordsearchImage wordsearchImage in image.WordsearchImages)
                    {
                        //If it's this wordsearch
                        if(EvaluateWordsearchDetection.IsWordsearch(coordinates, wordsearchImage))
                        {
                            wordsToFind = wordsearchImage.Wordsearch.Words;
                            correctSolutions = wordsearchImage.Wordsearch.Solutions;
                            found = true;
                            break;
                        }
                    }

                    //If this isn't one of the wordsearches in the image, then fail now 
                    if(!found)
                    {
                        //Clean up
                        wordsearchImageTuple.Item2.Dispose();

                        continue;
                    }
                }
                else //Otherwise just use the one wordsearch that's in the image
                {
                    wordsToFind = image.WordsearchImages[0].Wordsearch.Words;
                    correctSolutions = image.WordsearchImages[0].Wordsearch.Solutions;
                }
                
                Bitmap extractedImage = wordsearchImageTuple.Item2;

                /*
                 * Image Segmentation onwards happen in EvaluateWordsearchBitmap
                 */
                WordsearchSolutionEvaluator evaluator = EvaluateWordsearchBitmap(extractedImage, wordsToFind, correctSolutions,
                    segmentationAlgorithm, segmentationRemoveSmallRowsAndCols, segmentationMethod, 
                    probabilisticRotationCorrectionClassifier, classifier, wordsearchSolver);

                //Clean up
                extractedImage.Dispose();

                //Log Evaluation
                evaluators.Add(evaluator);

                DefaultLog.Info(evaluator.ToString());

                if(evaluator.Correct)
                {
                    numCorrect++;
                }
            }

            DefaultLog.Info("System found all words correctly for {0} / {1} Images correctly", numCorrect, images.Count);

            //Calculate some extra statistics
            int numWordsearchesNoWordsFound = 0;
            int numDidntReachEvaluation = images.Count - evaluators.Count;
            double fMeasureSum = 0;
            int numValidFMeasures = 0;

            foreach (WordsearchSolutionEvaluator evaluator in evaluators)
            {
                //If no words were found correctly
                if(evaluator.TruePositive == 0)
                {
                    numWordsearchesNoWordsFound++;
                }

                //If there was a valid F-Measure
                if(!double.IsNaN(evaluator.FMeasure))
                {
                    fMeasureSum += evaluator.FMeasure;
                    numValidFMeasures++;
                }
            }

            DefaultLog.Info("In {0} wordsearches no words were found correctly at all", numWordsearchesNoWordsFound);
            DefaultLog.Info("{0} wordsearch images got discarded before reaching the evaluation stage", numDidntReachEvaluation);
            DefaultLog.Info("Average F-Measure (when not NaN): {0}", fMeasureSum / numValidFMeasures);

            DefaultLog.Info("Full System Evaluation Completed");

            return (double)numCorrect / images.Count;
        }