An example of training and evaluating a Region Of Interest detection model in Myriad, based on the Passive Aggressive algorithm.  The code below assumes training data are separated into two folders, one for samples known to contain ROI and one for samples known to not contain ROI.  Several initial configurations of the model are prepared, from which the program down-selects the best model by training and testing on the available data.

 * Copyright (c) 2016 Emphysic LLC.  All rights reserved.

package com.emphysic.myriad.core.examples;


import java.nio.file.Files;
import java.util.Map;

 * Demonstrates building a sample data set, splitting into test and train subsets, training candidate models,
 * and saving the best model for later use.
 * Created by ccoughlin on 9/12/2016.
public class MachineLearning {
    public static void main(String[] args) throws Exception{
        if (args == null || args.length < 2) {
            System.out.println("Usage: java -jar [jarname] posFolder negFolder");
            System.out.println("Where posFolder is the folder containing ROI samples and");
            System.out.println("negFolder is the folder containing samples that do not contain ROI.\n");
            System.out.println("The unit test resources folder is one option: /resources/data/ml/generated");
            System.out.println("e.g. java -jar  /resources/data/ml/generated/signal /resources/data/ml/generated/noise");
        // Folders containing a set of positive (has ROI) and negative (doesn't have ROI) samples
        String posFolder = args[0];
        String negFolder = args[1];

        // Get a set of candidate models
        MLROIFinder[] candidates = genCandidateModels();

        // Find the values assigned to positive and negative samples for this type of model
        double posLabel = candidates[0].positiveClass();
        double negLabel = candidates[0].negativeClass();

        // Compile our sample data...
        MLDataCompiler mldc = new MLDataCompiler(new File(posFolder), new File(negFolder), posLabel, negLabel);

        // ...75% of which will be used for training 25% for testing
        double trainingRatio = 0.75;
        // ROI detection can often be improved with a preprocessing operation.  Here we'll use simple Sobel edge
        // detection.
        DatasetOperation preproc = new SobelOperation();

        // Finding the best candidate - train/test on the compiled sample data over 10 rounds, then report on which
        // candidate had the highest accuracy.
        MonteCarloCV cv = new MonteCarloCV(mldc.readData(preproc), trainingRatio);

        // Results are returned as the best model and its accuracy between 0 and 1
        Map.Entry<MLROIFinder, Double> bestSGDModel = cv.findBestModel(10, candidates);
        PassiveAggressiveROIFinder best = (PassiveAggressiveROIFinder) bestSGDModel.getKey();
        System.out.println("Best model was " + best + " with an accuracy of "
                + (int)(bestSGDModel.getValue() * 100) + "%"

        // Finally, save our best model for later use
        File outFolder = new File(System.getProperty(""), "myriadml_demo");
        if (!outFolder.exists()) {
        File out = new File(outFolder, "bestModel");
        System.out.println("Saving to " + out.getAbsolutePath());;

     * Generates candidate machine learning models based on a grid search (varying initial parameters).
     * @return array of untrained models with different parameters
    private static MLROIFinder[] genCandidateModels() {
        double[] learningRates = {0.01, 0.05, 0.1, 0.25, 0.5};
        PassiveAggressiveROIFinder[] models = new PassiveAggressiveROIFinder[learningRates.length];
        for (int i=0; i<learningRates.length; i++) {
            models[i] = new PassiveAggressiveROIFinder(learningRates[i]);
        return models;