|
Blazing fast custom neural() function for DeepLearning
#480162
05/22/20 13:52
05/22/20 13:52
|
Joined: Dec 2014
Posts: 206 Germany
Smon
OP
Member
|
OP
Member
Joined: Dec 2014
Posts: 206
Germany
|
It's approximately 40-times faster by bypassing the R bridge during test mode. Put this into your R Script. This function batch predicts all signals contained in the signal file: library(dplyr, quietly = TRUE) #just load it together with your other packages / install if necessary
neural.cache <- function(model, XY) {
XY <- data.frame(XY) # convert to data frame. some models will need a matrix, but df OK for caret
wdate <- XY[,1]
X <- XY[,2:(ncol(XY)-1)]
writethis <- cbind(wdate, round(predict(Models[[model]], newdata = X, type = "prob")["L"], digits = 3)) %>% arrange(desc(wdate))
write.csv(writethis, file = cachepath, row.names = F)
} Now copy this into your Zorro script. Adapt the file path(s) to your R script(s) and enable loading of your R script by using #define. In my case it's #define XGBOOST //my favorite Not really necessary (delete function calls if you don't include it). I wrote this to really see what's going on under the hood. int flagsum()
{
int a;
if(is(INITRUN)) a = a + INITRUN;
if(is(FIRSTINITRUN)) a = a + FIRSTINITRUN;
if(is(FIRSTRUN)) a = a + FIRSTRUN;
if(is(LOOKBACK)) a = a + LOOKBACK;
if(is(TRAINMODE)) a = a + TRAINMODE;
if(is(EXITRUN)) a = a + EXITRUN;
if(is(PARCYCLE)) a = a + PARCYCLE;
if(is(RULCYCLE)) a = a + RULCYCLE;
return (a);
}
int lastflagsum;
void flagprint(string mystring, int noextraprint, ...)
{
if(flagsum() == lastflagsum && noextraprint > 0) return;
{
lastflagsum = flagsum();
print(TO_ANY, "\nBar %d | ", Bar);
print(TO_ANY, "\mode = %s", mystring);
if(is(INITRUN)) print(TO_ANY, " INITRUN");
if(is(FIRSTINITRUN)) print(TO_ANY, " FIRSTINITRUN");
if(is(FIRSTRUN)) print(TO_ANY, " FIRSTRUN");
if(is(LOOKBACK)) print(TO_ANY, " LOOKBACK");
if(is(TRAINMODE)) print(TO_ANY, " TRAINMODE");
if(is(EXITRUN)) print(TO_ANY, " EXITRUN");
if(is(PARCYCLE)) print(TO_ANY, " PARCYCLE");
if(is(RULCYCLE)) print(TO_ANY, " RULCYCLE");
print(TO_ANY, " WFOCycle = %d", WFOCycle);
}
}
I also put flagprint("MAIN_FUNCTION", 1); into my run function. Again, it's only for educational purposes. Now the important part:
string filename(int param, int offset, ...)
{
//helper function to create unique, meaningful filenames
#define SIGNALFILE 1
#define PREDCACHE 2
#define MODEL 3
#define SUBFOLDER 4
string returnstring;
if(param == 1) returnstring = (strf("%sData\\%s\\%s\\%d_sigs.csv", ZorroFolder, Script, strx(Asset,"/",""), WFOCycle + offset));
if(param == 2) returnstring = (strf("%sData\\%s\\%s\\%d_pred.csv", ZorroFolder, Script, strx(Asset,"/",""), WFOCycle));
if(param == 3) returnstring = (strf("%sData\\%s\\%s\\%d_model.ml", ZorroFolder, Script, strx(Asset,"/",""), WFOCycle));
if(param == 4) returnstring = (strf("%sData\\%s\\%s", ZorroFolder, Script, strx(Asset,"/","")));
return returnstring;
}
//redefine function -> Offset as var
var dataFromCSV(int Handle,const char* Format, const char* Filename,int Column,var Offset)
{
if(dataFind(Handle,0) < 0) { // data array not yet loaded
if(!dataParse(Handle,Format,Filename)) return 0;
}
int Row = dataFind(Handle,wdate(0)-Offset/1440.);
return dataVar(Handle,Row,Column);
}
var advise_cache(int new, ...)
{
//this function reads the cache
int temp;
if(new) temp = dataNew(stridx(Asset)+1, 0, 0); //reset cache
return(dataFromCSV(stridx(Asset)+1, "%w,f", filename(PREDCACHE), 1, -0.2));
//minor Offset of -0.2 minutes which makes entries visible 12 seconds early (not enough to enable future peeking),
//because rounding errors occuring during the creation of the signal.csv files can lead to invisibility of entries
//that should be visible.
}
int cache_errors, big_errors; //check if errors occur because of caching
var neural(int mode, int model, int numSignals, void* Data)
{
if(!wait(-200)) return 0;
if(mode == NEURAL_INIT)
{
flagprint("NEURAL_INIT");
if(!Rstart("", 2)) //enable verbose output
//if(!Rstart("", 1)) //less output
{
print(TO_ANY, "\nError - could not start R session! NEURAL_INIT");
quit();
}
#ifdef ANN
Rx("if(!exists('neural.init', mode='function')) source('D:/Zorro/Strategy/HumanTraderANN.r')", 3);
#endif
#ifdef XGBOOST
Rx("if(!exists('neural.init', mode='function')) source('D:/Zorro/Strategy/HumanTraderXGBOOST.r')", 3);
#endif
#ifdef AUTOKERAS
Rx("if(!exists('neural.init', mode='function')) source('D:/Zorro/Strategy/HumanTraderAUTOKERAS.r')", 3);
#endif
Rx("neuralfromZ <- T");
Rx("neural.init()");
return 1;
}
// export batch training samples to a file to be read by R
if(mode == NEURAL_TRAIN)
{
flagprint("NEURAL_TRAIN");
//check if signalfile already exists
if(file_length(filename(SIGNALFILE)) == 0)
{
//NO
//create subfolders if necessary
Rx(strf("dir.create(file.path('%s'), recursive = T, showWarnings = T)", slash(filename(SUBFOLDER))));
file_write(filename(SIGNALFILE),Data,0);
} else {
//YES
//Uncomment next line, if you don't want to retrain your models if the *.ml files already exist
//if(file_length(filename(MODEL)) > 0) return 0;
}
Rx(strf("XY <- read.csv('%s',header = F)", slash(filename(SIGNALFILE))));
if(!Rx(strf("neural.train(%i,XY)",model+1),2))
return 0; //model successfully trained
return 1;
}
// predict the target
if(mode == NEURAL_PREDICT)
{
if(is(FIRSTRUN))
{
Rset("wdate",wdate());
Rset("WFOCycle", WFOCycle);
}
flagprint("NEURAL_PREDICT",1);
var pred, cached_pred;
cached_pred = advise_cache(); // = zero if not found
//let's make sure, the cache is working properly. If you want less or more than 100 Bars from the beginning
//and the end of the cache file, just edit the numbers that are set to 100 right now.
if(is(TRADEMODE) || cached_pred == 0 || between(WFOBar, 0, 100)|| wdate() >= dataVar(stridx(Asset)+1, 100, 0) //etwas umständlicher für between(WFOBar, Ende-100, Ende)
)
{
Rset("X",(double*)Data,numSignals);
Rx(strf("Y <- neural.predict(%i,X)",model+1));
pred = Rd("Y[1]"); //save return from R, no need to query multiple times -> save time!
if(is(TRADEMODE)) return pred; //abort during live trading, as there is no cache
//if still here, it's testmode
//uncomment next line if you want to see what's going on:
//printf("\npred = %.3f, cached = %.3f, Bar %d", pred, cached_pred, WFOBar);
if(abs(pred - cached_pred) > 0.001) //I'm rounding my classifiers predictions to 3 decimals anyways
{
print(TO_ANY, "\nDeviation pred = %.3f, cached_pred = %.3f", pred, cached_pred);
cache_errors++;
}
if(abs(pred - cached_pred) > 0.1)
{
print(TO_ANY, "\nWARNING! Huge deviation: pred = %.3f, cached_pred = %.3d", pred, cached_pred);
big_errors++;
}
//Somewhere between the testing periods, the error counters must get evaluated and reset:
if(cache_errors + big_errors > 0 && (WFOBar > 100 || wdate() >= dataVar(stridx(Asset)+1, 100, 0)))
{
if(cache_errors > 0)
{
print(TO_ANY, "\n%d minor deviations in the predictions cache!", cache_errors);
cache_errors = 0;
}
if(big_errors > 0)
{
print(TO_ANY, "\nACHTUNG: %d Grosse Fehler im Predictions Cache!", big_errors);
big_errors = 0;
}
}
}
return cached_pred;
}
// save all trained models
if(mode == NEURAL_SAVE)
{
flagprint("NEURAL_SAVE");
print(TO_ANY,"\nStore %s",strrchr(Data,'\\')+1);
//return Rx(strf("neural.save('%s')",slash(Data)),2); // Zorros default naming scheme
return Rx(strf("neural.save('%s')",slash(filename(MODEL))),2);
}
// load all trained models
if(mode == NEURAL_LOAD)
{
flagprint("NEURAL_LOAD");
cache_errors = 0;
printf("\nLoad %s",strrchr(Data,'\\')+1);
int r; //don't return yet, as we need to create the cache file if it's not there yet.
r = Rx(strf("load('%s')",slash(filename(MODEL))),2);
//if cache file is missing, create it:
if(file_length(filename(PREDCACHE)) == 0)
{
if(file_length(slash(filename(SIGNALFILE, 1)) == 0))
{
//can this even happen???
print(TO_ANY, "\nWARNING: Signalfile missing. Cache won't be available!");
return r;
}
Rx(strf("XY <- read.csv('%s',header = F)", slash(filename(SIGNALFILE, 1))));
Rx(strf("cachepath <- '%s'", slash(filename(PREDCACHE))));
Rx(strf("neural.cache(%i,XY)",model+1));
}
advise_cache(1); //delete old cache
return r;
}
return 1;
}
Important note: You must use wdate() as your first signal in your adviseLong or adviseShort functions! This will put a timestamp in the first column of your signal file. This is how to remove it in your neural.train function in R before training:
Last edited by Smon; 05/23/20 03:43.
|
|
|
Re: Blazing fast custom neural() function for DeepLearning
[Re: Smon]
#480163
05/22/20 14:18
05/22/20 14:18
|
Joined: Dec 2014
Posts: 206 Germany
Smon
OP
Member
|
OP
Member
Joined: Dec 2014
Posts: 206
Germany
|
I hope this helps somebody. I still have questions and I hope, somebody will help me: I'm also trying to speed up my main script. It's really slow with custom Indicators that make heavy usage of the R-Bridge. I want to do hyperparameter optimization in addition to parameter optimization. So I really need a way to bypass frequent signal file generation unless I'm willing to wait for ages for a result. This is my plan: 1) calculate a hash from the very first signal and use this as part of my filenames. By doing so, I can make sure to never unnecessarily recreate an existing signal file. 2) if the signal file exist, just skip the whole main script during RULCYCLE The idea for 2) was this: if(!is(EXITRUN) && is(RULCYCLE) && file_length(filename(SIGNALFILE)) > 0)
{
adviseLong(NEURAL, 0, 0);
//the real adviseLong() call comes later. But if I'm skipping here, I need to make sure that no series calls come later
//I tried if it's working without this arbitrary adviseLong() call, but it's not.
return;
} My problem is that NEURAL_TRAIN isn't triggered and I don't know why. My flagprint() function shows me this in Trainmode: Bar 74520 | mode = MAIN_FUNCTION LOOKBACK TRAINMODE RULCYCLE WFOCycle = 1 Bar 93719 | mode = MAIN_FUNCTION TRAINMODE RULCYCLE WFOCycle = 1 Bar 184558 | mode = MAIN_FUNCTION TRAINMODE EXITRUN RULCYCLE WFOCycle = 1 Train USDCAD_L -Sep 2013- Bar 184558 | mode = NEURAL_TRAIN TRAINMODE EXITRUN RULCYCLE WFOCycle = 1 Bar 184558 | mode = NEURAL_SAVE TRAINMODE EXITRUN RULCYCLE WFOCycle = 1 Store MyStrategy_USDCAD_1.ml Bar 0 | mode = MAIN_FUNCTION INITRUN LOOKBACK TRAINMODE RULCYCLE WFOCycle = 2 Bar 90549 | mode = MAIN_FUNCTION FIRSTRUN LOOKBACK TRAINMODE RULCYCLE WFOCycle = 2 Bar 90550 | mode = MAIN_FUNCTION LOOKBACK TRAINMODE RULCYCLE WFOCycle = 2 Bar 109749 | mode = MAIN_FUNCTION TRAINMODE RULCYCLE WFOCycle = 2 So to me it looks like NEURAL_TRAIN is fired from the adviseLong() or adviseShort() function when TRAINMODE EXITRUN RULCYCLE are true. That's why I'm not skipping, if EXITRUN is true. However, if I'm using this bypass, only NEURAL_SAVE is triggered, resulting in empty *.ml files. (only 1kb size). What am I missing? One more question: I still didn't really get how the default naming of the *.ml files works when it comes to retraining the strategy. The last model isn't enumerated by default. I understood it's used for predictions during live training.
Last edited by Smon; 05/22/20 14:38.
|
|
|
Re: Blazing fast custom neural() function for DeepLearning
[Re: Smon]
#485210
02/04/22 18:06
02/04/22 18:06
|
Joined: Aug 2021
Posts: 101
MegaTanker
Member
|
Member
Joined: Aug 2021
Posts: 101
|
Bit late, but thank you for this! Just got this working and now I can actually play around with models in R. The default implementation made that unrealistic, took like 5 minutes to predict just 5000 bars and then I gave up. With this, Zorro can run through 600k bars in maybe 20 seconds (after the predictions were created). Regular backtest speed basically, because it doesn't need to call R anymore after the predictions are loaded at the start.
Imo something like this should be a built-in option to choose in Zorro directly because it really is a game changer.
Last edited by MegaTanker; 02/04/22 18:07.
|
|
|
|