Blazing fast custom neural() function for DeepLearning

It's approximately 40-times faster by bypassing the R bridge during test mode.

Put this into your R Script. This function batch predicts all signals contained in the signal file:

Code

library(dplyr, quietly = TRUE) #just load it together with your other packages / install if necessary

neural.cache  <- function(model, XY) {
  XY <- data.frame(XY) # convert to data frame. some models will need a matrix, but df OK for caret
  wdate <- XY[,1]
  X <- XY[,2:(ncol(XY)-1)]
  
  writethis <- cbind(wdate, round(predict(Models[[model]], newdata = X, type = "prob")["L"], digits = 3)) %>% arrange(desc(wdate))
  write.csv(writethis, file = cachepath, row.names = F)
}

Now copy this into your Zorro script. Adapt the file path(s) to your R script(s) and enable loading of your R script by using #define.

In my case it's

Code

#define XGBOOST  //my favorite

Not really necessary (delete function calls if you don't include it). I wrote this to really see what's going on under the hood.

Code

int flagsum()
{
	int a;
	if(is(INITRUN)) a = a + INITRUN;
	if(is(FIRSTINITRUN)) a = a + FIRSTINITRUN;
	if(is(FIRSTRUN)) a = a + FIRSTRUN;
	if(is(LOOKBACK)) a = a + LOOKBACK;
	if(is(TRAINMODE)) a = a + TRAINMODE;
	if(is(EXITRUN)) a = a + EXITRUN;
	if(is(PARCYCLE)) a = a + PARCYCLE;
	if(is(RULCYCLE)) a = a + RULCYCLE;
	return (a);
}


int lastflagsum;

void flagprint(string mystring, int noextraprint, ...)
{
	if(flagsum() == lastflagsum && noextraprint > 0) return;
	{
		lastflagsum = flagsum();
		
		print(TO_ANY, "\nBar %d | ", Bar);
		print(TO_ANY, "\mode = %s", mystring);
		if(is(INITRUN)) print(TO_ANY, " INITRUN");
		if(is(FIRSTINITRUN)) print(TO_ANY, " FIRSTINITRUN");
		if(is(FIRSTRUN)) print(TO_ANY, " FIRSTRUN");
		if(is(LOOKBACK)) print(TO_ANY, " LOOKBACK");
		if(is(TRAINMODE)) print(TO_ANY, " TRAINMODE");
		if(is(EXITRUN)) print(TO_ANY, " EXITRUN");
		if(is(PARCYCLE)) print(TO_ANY, " PARCYCLE");
		if(is(RULCYCLE)) print(TO_ANY, " RULCYCLE");
		print(TO_ANY, " WFOCycle = %d", WFOCycle);
	}
}

I also put

Code

flagprint("MAIN_FUNCTION", 1);

into my run function. Again, it's only for educational purposes.

Now the important part:

Code

string filename(int param, int offset, ...)
{
       //helper function to create unique, meaningful filenames

	#define SIGNALFILE 1 
	#define PREDCACHE  2
	#define MODEL      3
	#define SUBFOLDER  4

	string returnstring;
	if(param == 1)	returnstring = (strf("%sData\\%s\\%s\\%d_sigs.csv", 	 ZorroFolder, Script, strx(Asset,"/",""), WFOCycle + offset));
	if(param == 2) returnstring = (strf("%sData\\%s\\%s\\%d_pred.csv",	 ZorroFolder, Script, strx(Asset,"/",""), WFOCycle));
	if(param == 3) returnstring = (strf("%sData\\%s\\%s\\%d_model.ml", 	 ZorroFolder, Script, strx(Asset,"/",""), WFOCycle));
	if(param == 4)	returnstring = (strf("%sData\\%s\\%s",						 ZorroFolder, Script, strx(Asset,"/","")));
	return returnstring;	
}

//redefine function -> Offset as var
var dataFromCSV(int Handle,const char* Format, const char* Filename,int Column,var Offset) 
{
	if(dataFind(Handle,0) < 0) { // data array not yet loaded
		if(!dataParse(Handle,Format,Filename)) return 0;
	}
	int Row = dataFind(Handle,wdate(0)-Offset/1440.);
	return dataVar(Handle,Row,Column); 
}

var advise_cache(int new, ...)
{
        //this function reads the cache

	int temp;
	if(new) temp = dataNew(stridx(Asset)+1, 0, 0); //reset cache
	return(dataFromCSV(stridx(Asset)+1, "%w,f", filename(PREDCACHE), 1, -0.2));
	//minor Offset of -0.2 minutes which makes entries visible 12 seconds early (not enough to enable future peeking),
	//because rounding errors occuring during the creation of the signal.csv files can lead to invisibility of entries
	//that should be visible. 
}

int cache_errors, big_errors; //check if errors occur because of caching

var neural(int mode, int model, int numSignals, void* Data)
{
	if(!wait(-200)) return 0;

	if(mode == NEURAL_INIT) 
	{
		flagprint("NEURAL_INIT");
		if(!Rstart("", 2)) //enable verbose output
		//if(!Rstart("", 1)) //less output
		{
			print(TO_ANY, "\nError - could not start R session! NEURAL_INIT");
			quit();
		}
		#ifdef ANN
		Rx("if(!exists('neural.init', mode='function')) source('D:/Zorro/Strategy/HumanTraderANN.r')", 3);
		#endif
		#ifdef XGBOOST
		Rx("if(!exists('neural.init', mode='function')) source('D:/Zorro/Strategy/HumanTraderXGBOOST.r')", 3);
		#endif
		#ifdef AUTOKERAS
		Rx("if(!exists('neural.init', mode='function')) source('D:/Zorro/Strategy/HumanTraderAUTOKERAS.r')", 3);
		#endif
		
		Rx("neuralfromZ <- T");
		Rx("neural.init()");
		return 1;
	}
// export batch training samples to a file to be read by R	
	if(mode == NEURAL_TRAIN)
	{
		flagprint("NEURAL_TRAIN");
		//check if signalfile already exists
		if(file_length(filename(SIGNALFILE)) == 0) 
		{
			//NO
			//create subfolders if necessary
			Rx(strf("dir.create(file.path('%s'), recursive = T, showWarnings = T)", slash(filename(SUBFOLDER))));
			file_write(filename(SIGNALFILE),Data,0);
		} else {
			//YES
			//Uncomment next line, if you don't want to retrain your models if the *.ml files already exist
			//if(file_length(filename(MODEL)) > 0) return 0;
		}
		Rx(strf("XY <- read.csv('%s',header = F)", slash(filename(SIGNALFILE))));

		if(!Rx(strf("neural.train(%i,XY)",model+1),2)) 
			return 0; //model successfully trained
		return 1;
	}
// predict the target	
	if(mode == NEURAL_PREDICT) 
	{
		if(is(FIRSTRUN))
		{
			Rset("wdate",wdate());
			Rset("WFOCycle", WFOCycle);
		}
		flagprint("NEURAL_PREDICT",1);
		var pred, cached_pred;
		cached_pred = advise_cache(); // = zero if not found

		//let's make sure, the cache is working properly. If you want less or more than 100 Bars from the beginning
		//and the end of the cache file, just edit the numbers that are set to 100 right now.
		if(is(TRADEMODE) || cached_pred == 0 || between(WFOBar, 0, 100)|| wdate() >= dataVar(stridx(Asset)+1, 100, 0) //etwas umständlicher für between(WFOBar, Ende-100, Ende)
		)
		{
			Rset("X",(double*)Data,numSignals);
			Rx(strf("Y <- neural.predict(%i,X)",model+1));
			pred = Rd("Y[1]"); //save return from R, no need to query multiple times -> save time!
			if(is(TRADEMODE)) return pred; //abort during live trading, as there is no cache
			
			//if still here, it's testmode
			//uncomment next line if you want to see what's going on:
			//printf("\npred = %.3f, cached = %.3f, Bar %d", pred, cached_pred, WFOBar);
			if(abs(pred - cached_pred) > 0.001) //I'm rounding my classifiers predictions to 3 decimals anyways
			{
				print(TO_ANY, "\nDeviation pred = %.3f, cached_pred = %.3f", pred, cached_pred);
				cache_errors++;
			}
			if(abs(pred - cached_pred) > 0.1)
			{
				print(TO_ANY, "\nWARNING! Huge deviation: pred = %.3f, cached_pred = %.3d", pred, cached_pred);
				big_errors++;
			}
			
			//Somewhere between the testing periods, the error counters must get evaluated and reset:
			if(cache_errors + big_errors > 0 && (WFOBar > 100 || wdate() >= dataVar(stridx(Asset)+1, 100, 0)))
			{
				if(cache_errors > 0)
				{
					print(TO_ANY, "\n%d minor deviations in the predictions cache!", cache_errors);
					cache_errors = 0;
				}
				if(big_errors > 0)
				{
					print(TO_ANY, "\nACHTUNG: %d Grosse Fehler im Predictions Cache!", big_errors);
					big_errors = 0;
				}
			}
		}
		return cached_pred;
	}
// save all trained models	
	if(mode == NEURAL_SAVE) 
	{
		flagprint("NEURAL_SAVE");
		print(TO_ANY,"\nStore %s",strrchr(Data,'\\')+1);
		//return Rx(strf("neural.save('%s')",slash(Data)),2); // Zorros default naming scheme
		return Rx(strf("neural.save('%s')",slash(filename(MODEL))),2);
	}
// load all trained models	
	if(mode == NEURAL_LOAD) 
	{
		flagprint("NEURAL_LOAD");
		cache_errors = 0;
		
		printf("\nLoad %s",strrchr(Data,'\\')+1);
		int r; //don't return yet, as we need to create the cache file if it's not there yet.
		r = Rx(strf("load('%s')",slash(filename(MODEL))),2);
		
		//if cache file is missing, create it:
		if(file_length(filename(PREDCACHE)) == 0)
		{
			if(file_length(slash(filename(SIGNALFILE, 1)) == 0))
			{
				//can this even happen???
				print(TO_ANY, "\nWARNING: Signalfile missing. Cache won't be available!");
				return r;
			}
			Rx(strf("XY <- read.csv('%s',header = F)", slash(filename(SIGNALFILE, 1))));
			Rx(strf("cachepath <- '%s'", slash(filename(PREDCACHE))));
			Rx(strf("neural.cache(%i,XY)",model+1));
		}
		advise_cache(1); //delete old cache
		return r;
 	}
	return 1;
}

Important note: You must use wdate() as your first signal in your adviseLong or adviseShort functions! This will put a timestamp in the first column of your signal file. This is how to remove it in your neural.train function in R before training:

Code

X <- XY[,2:(ncol(XY)-1)]

Blazing fast custom neural() function for DeepLearning

Blazing fast custom neural() function for DeepLearning - 05/22/20 13:52

Re: Blazing fast custom neural() function for DeepLearning - 05/22/20 14:18

Re: Blazing fast custom neural() function for DeepLearning - 02/04/22 18:06

Re: Blazing fast custom neural() function for DeepLearning - 02/09/22 19:27