OP
Member
Joined: Dec 2014
Posts: 206
Germany
|
It's approximately 40-times faster by bypassing the R bridge during test mode. Put this into your R Script. This function batch predicts all signals contained in the signal file: library(dplyr, quietly = TRUE) #just load it together with your other packages / install if necessary
neural.cache <- function(model, XY) {
XY <- data.frame(XY) # convert to data frame. some models will need a matrix, but df OK for caret
wdate <- XY[,1]
X <- XY[,2:(ncol(XY)-1)]
writethis <- cbind(wdate, round(predict(Models[[model]], newdata = X, type = "prob")["L"], digits = 3)) %>% arrange(desc(wdate))
write.csv(writethis, file = cachepath, row.names = F)
} Now copy this into your Zorro script. Adapt the file path(s) to your R script(s) and enable loading of your R script by using #define. In my case it's #define XGBOOST //my favorite Not really necessary (delete function calls if you don't include it). I wrote this to really see what's going on under the hood. int flagsum()
{
int a;
if(is(INITRUN)) a = a + INITRUN;
if(is(FIRSTINITRUN)) a = a + FIRSTINITRUN;
if(is(FIRSTRUN)) a = a + FIRSTRUN;
if(is(LOOKBACK)) a = a + LOOKBACK;
if(is(TRAINMODE)) a = a + TRAINMODE;
if(is(EXITRUN)) a = a + EXITRUN;
if(is(PARCYCLE)) a = a + PARCYCLE;
if(is(RULCYCLE)) a = a + RULCYCLE;
return (a);
}
int lastflagsum;
void flagprint(string mystring, int noextraprint, ...)
{
if(flagsum() == lastflagsum && noextraprint > 0) return;
{
lastflagsum = flagsum();
print(TO_ANY, "\nBar %d | ", Bar);
print(TO_ANY, "\mode = %s", mystring);
if(is(INITRUN)) print(TO_ANY, " INITRUN");
if(is(FIRSTINITRUN)) print(TO_ANY, " FIRSTINITRUN");
if(is(FIRSTRUN)) print(TO_ANY, " FIRSTRUN");
if(is(LOOKBACK)) print(TO_ANY, " LOOKBACK");
if(is(TRAINMODE)) print(TO_ANY, " TRAINMODE");
if(is(EXITRUN)) print(TO_ANY, " EXITRUN");
if(is(PARCYCLE)) print(TO_ANY, " PARCYCLE");
if(is(RULCYCLE)) print(TO_ANY, " RULCYCLE");
print(TO_ANY, " WFOCycle = %d", WFOCycle);
}
}
I also put flagprint("MAIN_FUNCTION", 1); into my run function. Again, it's only for educational purposes. Now the important part:
string filename(int param, int offset, ...)
{
//helper function to create unique, meaningful filenames
#define SIGNALFILE 1
#define PREDCACHE 2
#define MODEL 3
#define SUBFOLDER 4
string returnstring;
if(param == 1) returnstring = (strf("%sData\\%s\\%s\\%d_sigs.csv", ZorroFolder, Script, strx(Asset,"/",""), WFOCycle + offset));
if(param == 2) returnstring = (strf("%sData\\%s\\%s\\%d_pred.csv", ZorroFolder, Script, strx(Asset,"/",""), WFOCycle));
if(param == 3) returnstring = (strf("%sData\\%s\\%s\\%d_model.ml", ZorroFolder, Script, strx(Asset,"/",""), WFOCycle));
if(param == 4) returnstring = (strf("%sData\\%s\\%s", ZorroFolder, Script, strx(Asset,"/","")));
return returnstring;
}
//redefine function -> Offset as var
var dataFromCSV(int Handle,const char* Format, const char* Filename,int Column,var Offset)
{
if(dataFind(Handle,0) < 0) { // data array not yet loaded
if(!dataParse(Handle,Format,Filename)) return 0;
}
int Row = dataFind(Handle,wdate(0)-Offset/1440.);
return dataVar(Handle,Row,Column);
}
var advise_cache(int new, ...)
{
//this function reads the cache
int temp;
if(new) temp = dataNew(stridx(Asset)+1, 0, 0); //reset cache
return(dataFromCSV(stridx(Asset)+1, "%w,f", filename(PREDCACHE), 1, -0.2));
//minor Offset of -0.2 minutes which makes entries visible 12 seconds early (not enough to enable future peeking),
//because rounding errors occuring during the creation of the signal.csv files can lead to invisibility of entries
//that should be visible.
}
int cache_errors, big_errors; //check if errors occur because of caching
var neural(int mode, int model, int numSignals, void* Data)
{
if(!wait(-200)) return 0;
if(mode == NEURAL_INIT)
{
flagprint("NEURAL_INIT");
if(!Rstart("", 2)) //enable verbose output
//if(!Rstart("", 1)) //less output
{
print(TO_ANY, "\nError - could not start R session! NEURAL_INIT");
quit();
}
#ifdef ANN
Rx("if(!exists('neural.init', mode='function')) source('D:/Zorro/Strategy/HumanTraderANN.r')", 3);
#endif
#ifdef XGBOOST
Rx("if(!exists('neural.init', mode='function')) source('D:/Zorro/Strategy/HumanTraderXGBOOST.r')", 3);
#endif
#ifdef AUTOKERAS
Rx("if(!exists('neural.init', mode='function')) source('D:/Zorro/Strategy/HumanTraderAUTOKERAS.r')", 3);
#endif
Rx("neuralfromZ <- T");
Rx("neural.init()");
return 1;
}
// export batch training samples to a file to be read by R
if(mode == NEURAL_TRAIN)
{
flagprint("NEURAL_TRAIN");
//check if signalfile already exists
if(file_length(filename(SIGNALFILE)) == 0)
{
//NO
//create subfolders if necessary
Rx(strf("dir.create(file.path('%s'), recursive = T, showWarnings = T)", slash(filename(SUBFOLDER))));
file_write(filename(SIGNALFILE),Data,0);
} else {
//YES
//Uncomment next line, if you don't want to retrain your models if the *.ml files already exist
//if(file_length(filename(MODEL)) > 0) return 0;
}
Rx(strf("XY <- read.csv('%s',header = F)", slash(filename(SIGNALFILE))));
if(!Rx(strf("neural.train(%i,XY)",model+1),2))
return 0; //model successfully trained
return 1;
}
// predict the target
if(mode == NEURAL_PREDICT)
{
if(is(FIRSTRUN))
{
Rset("wdate",wdate());
Rset("WFOCycle", WFOCycle);
}
flagprint("NEURAL_PREDICT",1);
var pred, cached_pred;
cached_pred = advise_cache(); // = zero if not found
//let's make sure, the cache is working properly. If you want less or more than 100 Bars from the beginning
//and the end of the cache file, just edit the numbers that are set to 100 right now.
if(is(TRADEMODE) || cached_pred == 0 || between(WFOBar, 0, 100)|| wdate() >= dataVar(stridx(Asset)+1, 100, 0) //etwas umständlicher für between(WFOBar, Ende-100, Ende)
)
{
Rset("X",(double*)Data,numSignals);
Rx(strf("Y <- neural.predict(%i,X)",model+1));
pred = Rd("Y[1]"); //save return from R, no need to query multiple times -> save time!
if(is(TRADEMODE)) return pred; //abort during live trading, as there is no cache
//if still here, it's testmode
//uncomment next line if you want to see what's going on:
//printf("\npred = %.3f, cached = %.3f, Bar %d", pred, cached_pred, WFOBar);
if(abs(pred - cached_pred) > 0.001) //I'm rounding my classifiers predictions to 3 decimals anyways
{
print(TO_ANY, "\nDeviation pred = %.3f, cached_pred = %.3f", pred, cached_pred);
cache_errors++;
}
if(abs(pred - cached_pred) > 0.1)
{
print(TO_ANY, "\nWARNING! Huge deviation: pred = %.3f, cached_pred = %.3d", pred, cached_pred);
big_errors++;
}
//Somewhere between the testing periods, the error counters must get evaluated and reset:
if(cache_errors + big_errors > 0 && (WFOBar > 100 || wdate() >= dataVar(stridx(Asset)+1, 100, 0)))
{
if(cache_errors > 0)
{
print(TO_ANY, "\n%d minor deviations in the predictions cache!", cache_errors);
cache_errors = 0;
}
if(big_errors > 0)
{
print(TO_ANY, "\nACHTUNG: %d Grosse Fehler im Predictions Cache!", big_errors);
big_errors = 0;
}
}
}
return cached_pred;
}
// save all trained models
if(mode == NEURAL_SAVE)
{
flagprint("NEURAL_SAVE");
print(TO_ANY,"\nStore %s",strrchr(Data,'\\')+1);
//return Rx(strf("neural.save('%s')",slash(Data)),2); // Zorros default naming scheme
return Rx(strf("neural.save('%s')",slash(filename(MODEL))),2);
}
// load all trained models
if(mode == NEURAL_LOAD)
{
flagprint("NEURAL_LOAD");
cache_errors = 0;
printf("\nLoad %s",strrchr(Data,'\\')+1);
int r; //don't return yet, as we need to create the cache file if it's not there yet.
r = Rx(strf("load('%s')",slash(filename(MODEL))),2);
//if cache file is missing, create it:
if(file_length(filename(PREDCACHE)) == 0)
{
if(file_length(slash(filename(SIGNALFILE, 1)) == 0))
{
//can this even happen???
print(TO_ANY, "\nWARNING: Signalfile missing. Cache won't be available!");
return r;
}
Rx(strf("XY <- read.csv('%s',header = F)", slash(filename(SIGNALFILE, 1))));
Rx(strf("cachepath <- '%s'", slash(filename(PREDCACHE))));
Rx(strf("neural.cache(%i,XY)",model+1));
}
advise_cache(1); //delete old cache
return r;
}
return 1;
}
Important note: You must use wdate() as your first signal in your adviseLong or adviseShort functions! This will put a timestamp in the first column of your signal file. This is how to remove it in your neural.train function in R before training:
Last edited by Smon; 05/23/20 03:43.
|