Posted By: Smon
Blazing fast custom neural() function for DeepLearning - 05/22/20 13:52
It's approximately 40-times faster by bypassing the R bridge during test mode.
Put this into your R Script. This function batch predicts all signals contained in the signal file:
Now copy this into your Zorro script. Adapt the file path(s) to your R script(s) and enable loading of your R script by using #define.
In my case it's
Not really necessary (delete function calls if you don't include it). I wrote this to really see what's going on under the hood.
I also put
into my run function. Again, it's only for educational purposes.
Now the important part:
Important note: You must use wdate() as your first signal in your adviseLong or adviseShort functions! This will put a timestamp in the first column of your signal file. This is how to remove it in your neural.train function in R before training:
Put this into your R Script. This function batch predicts all signals contained in the signal file:
Code
library(dplyr, quietly = TRUE) #just load it together with your other packages / install if necessary neural.cache <- function(model, XY) { XY <- data.frame(XY) # convert to data frame. some models will need a matrix, but df OK for caret wdate <- XY[,1] X <- XY[,2:(ncol(XY)-1)] writethis <- cbind(wdate, round(predict(Models[[model]], newdata = X, type = "prob")["L"], digits = 3)) %>% arrange(desc(wdate)) write.csv(writethis, file = cachepath, row.names = F) }
Now copy this into your Zorro script. Adapt the file path(s) to your R script(s) and enable loading of your R script by using #define.
In my case it's
Code
#define XGBOOST //my favorite
Not really necessary (delete function calls if you don't include it). I wrote this to really see what's going on under the hood.
Code
int flagsum() { int a; if(is(INITRUN)) a = a + INITRUN; if(is(FIRSTINITRUN)) a = a + FIRSTINITRUN; if(is(FIRSTRUN)) a = a + FIRSTRUN; if(is(LOOKBACK)) a = a + LOOKBACK; if(is(TRAINMODE)) a = a + TRAINMODE; if(is(EXITRUN)) a = a + EXITRUN; if(is(PARCYCLE)) a = a + PARCYCLE; if(is(RULCYCLE)) a = a + RULCYCLE; return (a); } int lastflagsum; void flagprint(string mystring, int noextraprint, ...) { if(flagsum() == lastflagsum && noextraprint > 0) return; { lastflagsum = flagsum(); print(TO_ANY, "\nBar %d | ", Bar); print(TO_ANY, "\mode = %s", mystring); if(is(INITRUN)) print(TO_ANY, " INITRUN"); if(is(FIRSTINITRUN)) print(TO_ANY, " FIRSTINITRUN"); if(is(FIRSTRUN)) print(TO_ANY, " FIRSTRUN"); if(is(LOOKBACK)) print(TO_ANY, " LOOKBACK"); if(is(TRAINMODE)) print(TO_ANY, " TRAINMODE"); if(is(EXITRUN)) print(TO_ANY, " EXITRUN"); if(is(PARCYCLE)) print(TO_ANY, " PARCYCLE"); if(is(RULCYCLE)) print(TO_ANY, " RULCYCLE"); print(TO_ANY, " WFOCycle = %d", WFOCycle); } }
I also put
Code
flagprint("MAIN_FUNCTION", 1);
into my run function. Again, it's only for educational purposes.
Now the important part:
Code
string filename(int param, int offset, ...) { //helper function to create unique, meaningful filenames #define SIGNALFILE 1 #define PREDCACHE 2 #define MODEL 3 #define SUBFOLDER 4 string returnstring; if(param == 1) returnstring = (strf("%sData\\%s\\%s\\%d_sigs.csv", ZorroFolder, Script, strx(Asset,"/",""), WFOCycle + offset)); if(param == 2) returnstring = (strf("%sData\\%s\\%s\\%d_pred.csv", ZorroFolder, Script, strx(Asset,"/",""), WFOCycle)); if(param == 3) returnstring = (strf("%sData\\%s\\%s\\%d_model.ml", ZorroFolder, Script, strx(Asset,"/",""), WFOCycle)); if(param == 4) returnstring = (strf("%sData\\%s\\%s", ZorroFolder, Script, strx(Asset,"/",""))); return returnstring; } //redefine function -> Offset as var var dataFromCSV(int Handle,const char* Format, const char* Filename,int Column,var Offset) { if(dataFind(Handle,0) < 0) { // data array not yet loaded if(!dataParse(Handle,Format,Filename)) return 0; } int Row = dataFind(Handle,wdate(0)-Offset/1440.); return dataVar(Handle,Row,Column); } var advise_cache(int new, ...) { //this function reads the cache int temp; if(new) temp = dataNew(stridx(Asset)+1, 0, 0); //reset cache return(dataFromCSV(stridx(Asset)+1, "%w,f", filename(PREDCACHE), 1, -0.2)); //minor Offset of -0.2 minutes which makes entries visible 12 seconds early (not enough to enable future peeking), //because rounding errors occuring during the creation of the signal.csv files can lead to invisibility of entries //that should be visible. } int cache_errors, big_errors; //check if errors occur because of caching var neural(int mode, int model, int numSignals, void* Data) { if(!wait(-200)) return 0; if(mode == NEURAL_INIT) { flagprint("NEURAL_INIT"); if(!Rstart("", 2)) //enable verbose output //if(!Rstart("", 1)) //less output { print(TO_ANY, "\nError - could not start R session! NEURAL_INIT"); quit(); } #ifdef ANN Rx("if(!exists('neural.init', mode='function')) source('D:/Zorro/Strategy/HumanTraderANN.r')", 3); #endif #ifdef XGBOOST Rx("if(!exists('neural.init', mode='function')) source('D:/Zorro/Strategy/HumanTraderXGBOOST.r')", 3); #endif #ifdef AUTOKERAS Rx("if(!exists('neural.init', mode='function')) source('D:/Zorro/Strategy/HumanTraderAUTOKERAS.r')", 3); #endif Rx("neuralfromZ <- T"); Rx("neural.init()"); return 1; } // export batch training samples to a file to be read by R if(mode == NEURAL_TRAIN) { flagprint("NEURAL_TRAIN"); //check if signalfile already exists if(file_length(filename(SIGNALFILE)) == 0) { //NO //create subfolders if necessary Rx(strf("dir.create(file.path('%s'), recursive = T, showWarnings = T)", slash(filename(SUBFOLDER)))); file_write(filename(SIGNALFILE),Data,0); } else { //YES //Uncomment next line, if you don't want to retrain your models if the *.ml files already exist //if(file_length(filename(MODEL)) > 0) return 0; } Rx(strf("XY <- read.csv('%s',header = F)", slash(filename(SIGNALFILE)))); if(!Rx(strf("neural.train(%i,XY)",model+1),2)) return 0; //model successfully trained return 1; } // predict the target if(mode == NEURAL_PREDICT) { if(is(FIRSTRUN)) { Rset("wdate",wdate()); Rset("WFOCycle", WFOCycle); } flagprint("NEURAL_PREDICT",1); var pred, cached_pred; cached_pred = advise_cache(); // = zero if not found //let's make sure, the cache is working properly. If you want less or more than 100 Bars from the beginning //and the end of the cache file, just edit the numbers that are set to 100 right now. if(is(TRADEMODE) || cached_pred == 0 || between(WFOBar, 0, 100)|| wdate() >= dataVar(stridx(Asset)+1, 100, 0) //etwas umständlicher für between(WFOBar, Ende-100, Ende) ) { Rset("X",(double*)Data,numSignals); Rx(strf("Y <- neural.predict(%i,X)",model+1)); pred = Rd("Y[1]"); //save return from R, no need to query multiple times -> save time! if(is(TRADEMODE)) return pred; //abort during live trading, as there is no cache //if still here, it's testmode //uncomment next line if you want to see what's going on: //printf("\npred = %.3f, cached = %.3f, Bar %d", pred, cached_pred, WFOBar); if(abs(pred - cached_pred) > 0.001) //I'm rounding my classifiers predictions to 3 decimals anyways { print(TO_ANY, "\nDeviation pred = %.3f, cached_pred = %.3f", pred, cached_pred); cache_errors++; } if(abs(pred - cached_pred) > 0.1) { print(TO_ANY, "\nWARNING! Huge deviation: pred = %.3f, cached_pred = %.3d", pred, cached_pred); big_errors++; } //Somewhere between the testing periods, the error counters must get evaluated and reset: if(cache_errors + big_errors > 0 && (WFOBar > 100 || wdate() >= dataVar(stridx(Asset)+1, 100, 0))) { if(cache_errors > 0) { print(TO_ANY, "\n%d minor deviations in the predictions cache!", cache_errors); cache_errors = 0; } if(big_errors > 0) { print(TO_ANY, "\nACHTUNG: %d Grosse Fehler im Predictions Cache!", big_errors); big_errors = 0; } } } return cached_pred; } // save all trained models if(mode == NEURAL_SAVE) { flagprint("NEURAL_SAVE"); print(TO_ANY,"\nStore %s",strrchr(Data,'\\')+1); //return Rx(strf("neural.save('%s')",slash(Data)),2); // Zorros default naming scheme return Rx(strf("neural.save('%s')",slash(filename(MODEL))),2); } // load all trained models if(mode == NEURAL_LOAD) { flagprint("NEURAL_LOAD"); cache_errors = 0; printf("\nLoad %s",strrchr(Data,'\\')+1); int r; //don't return yet, as we need to create the cache file if it's not there yet. r = Rx(strf("load('%s')",slash(filename(MODEL))),2); //if cache file is missing, create it: if(file_length(filename(PREDCACHE)) == 0) { if(file_length(slash(filename(SIGNALFILE, 1)) == 0)) { //can this even happen??? print(TO_ANY, "\nWARNING: Signalfile missing. Cache won't be available!"); return r; } Rx(strf("XY <- read.csv('%s',header = F)", slash(filename(SIGNALFILE, 1)))); Rx(strf("cachepath <- '%s'", slash(filename(PREDCACHE)))); Rx(strf("neural.cache(%i,XY)",model+1)); } advise_cache(1); //delete old cache return r; } return 1; }
Important note: You must use wdate() as your first signal in your adviseLong or adviseShort functions! This will put a timestamp in the first column of your signal file. This is how to remove it in your neural.train function in R before training:
Code
X <- XY[,2:(ncol(XY)-1)]