Twin Horizon Bandit Weaver is an adaptive trading strategy that watches the market through two horizons and learns which internal settings work best as conditions change. The first horizon is the quick listener that reacts to recent movement. The second horizon is the slow listener that provides context and steadiness. These two horizons are not just different chart speeds. They are separate views of the same market story, one focused on immediate rhythm and the other focused on the broader cadence.

From each horizon, the strategy builds two families of clues. The first family is a value pressure clue that compares how price behaves versus a gently drifting reference that represents carry like influence. If price swings wildly while the reference changes smoothly, the value pressure clue becomes stronger. The second family is a structure clue that asks whether recent returns look like they are dominated by one main direction or spread across competing directions. It also captures how active the structure is overall and whether the paired return streams tend to move together or against each other.

Those horizon clues are packaged into a compact signal set and passed into a lightweight learning module that outputs a preference for long exposure and a preference for short exposure. In training mode, the strategy forces learning by taking alternating long and short trades so the learner receives outcomes and does not stall. In trading mode, it converts the two preferences into a single directional intent and then shapes that intent into a position decision.

The most distinctive feature is the parameter orchestra. Instead of fixing window lengths, thresholds, and scaling values forever, the strategy assigns one small bandit agent to each parameter. Each agent has a short menu of allowed choices. When the strategy is flat, all agents pick their choices for the next episode. Most of the time they exploit what has worked best so far, but occasionally they explore a different option to avoid getting stuck.

Rewards are taken from the realized account change when a trade closes. That reward is shared back to all bandit agents that participated in the episode, nudging their chosen settings toward or away from future selection. Risk control stays simple: trades are not allowed to linger beyond a chosen holding limit, entries require a minimum confidence threshold, and a dominance filter can require at least one horizon to show clear directional structure before allowing intent to remain active. The strategy also logs each episode so you can audit what it chose, what it saw, and how it learned.


Code
// ============================================================================
// EIG2TF - RL15 (15 bandit agents: one per parameter)
//
// FIXES INCLUDED (lite-C / Zorro):
// - No string-array initializer: ParName[] is filled by initParNames().
// - No randf(): use random(Max).
// - No NumClosedTotal: reward detected by transition (PrevOpenTotal>0 && NumOpenTotal==0),
//   reward computed as Balance - LastBalance.
// - CSV header + format strings are single literals (no split string concatenation).
// - DO NOT return during LOOKBACK in Train (otherwise forced training trades never happen).
// - Bootstrap in Test/Trade if model not trained (PredL==0 && PredS==0): open a random trade.
// ============================================================================

#define NPAR 15
#define MAXARMS 64

#define EPSILON 0.10
#define ALPHA   0.10

#define P_INT 0
#define P_VAR 1

// Parameter indices
#define P_TF1       0
#define P_TF2       1
#define P_KMAX1     2
#define P_W1        3
#define P_WEIG1     4
#define P_L1        5
#define P_KMAX2     6
#define P_W2        7
#define P_WEIG2     8
#define P_L2        9
#define P_LEVSCALE  10
#define P_MAXLEV    11
#define P_PREDTHR   12
#define P_HOLDBARS  13
#define P_DOMTHR    14

// -----------------------------
// RL/bandit storage
// -----------------------------
string ParName[NPAR];

void initParNames()
{
    ParName[P_TF1]      = "TF1";
    ParName[P_TF2]      = "TF2";
    ParName[P_KMAX1]    = "Kmax1";
    ParName[P_W1]       = "W1";
    ParName[P_WEIG1]    = "Weig1";
    ParName[P_L1]       = "L1";
    ParName[P_KMAX2]    = "Kmax2";
    ParName[P_W2]       = "W2";
    ParName[P_WEIG2]    = "Weig2";
    ParName[P_L2]       = "L2";
    ParName[P_LEVSCALE] = "LevScale";
    ParName[P_MAXLEV]   = "MaxLev";
    ParName[P_PREDTHR]  = "PredThr";
    ParName[P_HOLDBARS] = "HoldBars";
    ParName[P_DOMTHR]   = "DomThr";
}

int ParType[NPAR] =
{
    P_INT,P_INT,P_INT,P_INT,P_INT,P_INT,P_INT,P_INT,P_INT,P_INT,
    P_VAR,P_VAR,P_VAR,P_INT,P_VAR
};

// Defaults / ranges / steps
var ParDef[NPAR];
var ParMin[NPAR];
var ParMax[NPAR];
var ParStep[NPAR];

// Learned value estimates and counts
var Q[NPAR][MAXARMS];
int Ncnt[NPAR][MAXARMS];
int ArmsCount[NPAR];
int CurArm[NPAR];

// Helper: clamp arm count to MAXARMS
int calcArms(var mn, var mx, var stp)
{
    if(stp <= 0) return 1;
    int n = (int)floor((mx - mn)/stp + 1.000001);
    if(n < 1) n = 1;
    if(n > MAXARMS) n = MAXARMS;
    return n;
}

var armValue(int p, int a)
{
    var v = ParMin[p] + (var)a*ParStep[p];
    if(v < ParMin[p]) v = ParMin[p];
    if(v > ParMax[p]) v = ParMax[p];
    if(ParType[p] == P_INT) v = (var)(int)(v + 0.5);
    return v;
}

int bestArm(int p)
{
    int a, best = 0;
    var bestQ = Q[p][0];
    for(a=1; a<ArmsCount[p]; a++)
        if(Q[p][a] > bestQ) { bestQ = Q[p][a]; best = a; }
    return best;
}

int selectArm(int p)
{
    // epsilon-greedy using random(Max)
    if(random(1) < EPSILON)
        return (int)random((var)ArmsCount[p]);  // 0..ArmsCount-1
    return bestArm(p);
}

void updateArm(int p, int a, var reward)
{
    Q[p][a] = Q[p][a] + ALPHA*(reward - Q[p][a]);
    Ncnt[p][a] += 1;
}

// Initialize parameter table and RL tables
void initParams()
{
    // Keep arms <= MAXARMS
    ParDef[P_TF1] = 1;   ParMin[P_TF1] = 1;   ParMax[P_TF1] = 3;   ParStep[P_TF1] = 1;
    ParDef[P_TF2] = 5;   ParMin[P_TF2] = 2;   ParMax[P_TF2] = 12;  ParStep[P_TF2] = 1;

    ParDef[P_KMAX1] = 60; ParMin[P_KMAX1] = 20; ParMax[P_KMAX1] = 120; ParStep[P_KMAX1] = 2;
    ParDef[P_W1]    = 80; ParMin[P_W1]    = 30; ParMax[P_W1]    = 150; ParStep[P_W1]    = 2;

    ParDef[P_WEIG1] = 80; ParMin[P_WEIG1] = 30; ParMax[P_WEIG1] = 200; ParStep[P_WEIG1] = 5;
    ParDef[P_L1]    = 5;  ParMin[P_L1]    = 1;  ParMax[P_L1]    = 20;  ParStep[P_L1]    = 1;

    ParDef[P_KMAX2] = 24; ParMin[P_KMAX2] = 10; ParMax[P_KMAX2] = 80;  ParStep[P_KMAX2] = 2;
    ParDef[P_W2]    = 40; ParMin[P_W2]    = 20; ParMax[P_W2]    = 120; ParStep[P_W2]    = 2;

    ParDef[P_WEIG2] = 40; ParMin[P_WEIG2] = 20; ParMax[P_WEIG2] = 150; ParStep[P_WEIG2] = 5;
    ParDef[P_L2]    = 2;  ParMin[P_L2]    = 1;  ParMax[P_L2]    = 10;  ParStep[P_L2]    = 1;

    ParDef[P_LEVSCALE] = 10;   ParMin[P_LEVSCALE] = 2;    ParMax[P_LEVSCALE] = 30;  ParStep[P_LEVSCALE] = 1;
    ParDef[P_MAXLEV]   = 0.5;  ParMin[P_MAXLEV]   = 0.1;  ParMax[P_MAXLEV]   = 1.0; ParStep[P_MAXLEV]   = 0.1;

    ParDef[P_PREDTHR]  = 0.02; ParMin[P_PREDTHR]  = 0.00; ParMax[P_PREDTHR]  = 0.20; ParStep[P_PREDTHR] = 0.01;

    ParDef[P_HOLDBARS] = 5;    ParMin[P_HOLDBARS] = 1;    ParMax[P_HOLDBARS] = 30;  ParStep[P_HOLDBARS] = 1;

    ParDef[P_DOMTHR]   = 1.5;  ParMin[P_DOMTHR]   = 1.1;  ParMax[P_DOMTHR]   = 5.0; ParStep[P_DOMTHR]   = 0.1;

    int p, a;
    for(p=0; p<NPAR; p++)
    {
        ArmsCount[p] = calcArms(ParMin[p], ParMax[p], ParStep[p]);
        CurArm[p] = 0;
        for(a=0; a<ArmsCount[p]; a++)
        {
            Q[p][a] = 0;
            Ncnt[p][a] = 0;
        }
    }
}

// Pick new parameter actions when flat
void pickParams()
{
    int p;
    for(p=0; p<NPAR; p++)
        CurArm[p] = selectArm(p);
}

function run()
{
    // ------------------------------------------------------------------------
    // Session setup
    // ------------------------------------------------------------------------
    BarPeriod = 1440;
    StartDate = 20100101;
    EndDate   = 0;

    set(PLOTNOW|RULES|LOGFILE);

    asset("EUR/USD");
    algo("EIG2TF_RL15");

    var eps = 1e-12;

    if(Train) DataSplit = 50;
    else      DataSplit = 0;

    // IMPORTANT: keep enough history for your biggest windows
    LookBack = 2600;

    // ------------------------------------------------------------------------
    // One-time init
    // ------------------------------------------------------------------------
    static int Inited = 0;
    static int PrevOpenTotal = 0;
    static var LastBalance   = 0;

    string LogFN = "Log\\EIG2TF_RL15.csv";

    if(is(FIRSTINITRUN))
    {
        Inited = 0;
        PrevOpenTotal = 0;
        LastBalance   = 0;

        file_delete(LogFN);
        file_append(LogFN,"Date,Time,Mode,Bar,TF1,TF2,Kmax1,W1,Weig1,L1,Kmax2,W2,Weig2,L2,LevScale,MaxLev,PredThr,HoldBars,DomThr,Dom1,Tr1,Corr1,Dom2,Tr2,Corr2,EV1,EV2,PredL,PredS,Pred,Lev,Reward\n");
    }

    if(!Inited)
    {
        // optional deterministic randomness:
        // seed(12345);

        initParNames();
        initParams();
        pickParams();

        LastBalance = Balance;
        PrevOpenTotal = NumOpenTotal;

        Inited = 1;
    }

    // ------------------------------------------------------------------------
    // Convert chosen arms -> concrete parameter values
    // ------------------------------------------------------------------------
    int TF1     = (int)armValue(P_TF1, CurArm[P_TF1]);
    int TF2     = (int)armValue(P_TF2, CurArm[P_TF2]);
    if(TF2 <= TF1) TF2 = TF1 + 1;
    if(TF2 > 12)   TF2 = 12;

    int Kmax1   = (int)armValue(P_KMAX1, CurArm[P_KMAX1]);
    int W1      = (int)armValue(P_W1,    CurArm[P_W1]);
    int Weig1   = (int)armValue(P_WEIG1, CurArm[P_WEIG1]);
    int L1      = (int)armValue(P_L1,    CurArm[P_L1]);

    int Kmax2   = (int)armValue(P_KMAX2, CurArm[P_KMAX2]);
    int W2      = (int)armValue(P_W2,    CurArm[P_W2]);
    int Weig2   = (int)armValue(P_WEIG2, CurArm[P_WEIG2]);
    int L2      = (int)armValue(P_L2,    CurArm[P_L2]);

    var LevScale = armValue(P_LEVSCALE, CurArm[P_LEVSCALE]);
    var MaxLev   = armValue(P_MAXLEV,   CurArm[P_MAXLEV]);
    var PredThr  = armValue(P_PREDTHR,  CurArm[P_PREDTHR]);
    int HoldBars = (int)armValue(P_HOLDBARS, CurArm[P_HOLDBARS]);
    var DomThr   = armValue(P_DOMTHR, CurArm[P_DOMTHR]);

    // ------------------------------------------------------------------------
    // Carry proxy + discount rate
    // ------------------------------------------------------------------------
    var carryDaily = 0.015/252.;
    var r_d = 0.0001;

    // ------------------------------------------------------------------------
    // Timeframe 1 series and feature buffers
    // ------------------------------------------------------------------------
    TimeFrame = TF1;
    vars P1   = series(priceClose());
    vars R1tf = series(log(max(eps,P1[0]) / max(eps,P1[1])));
    vars D1   = series(carryDaily*(var)TF1);
    vars Px1  = series(0);
    vars EV1S = series(0);
    vars Dom1S  = series(0);
    vars Tr1S   = series(0);
    vars Corr1S = series(0);

    // ------------------------------------------------------------------------
    // Timeframe 2 series and feature buffers
    // ------------------------------------------------------------------------
    TimeFrame = TF2;
    vars P2   = series(priceClose());
    vars R2tf = series(log(max(eps,P2[0]) / max(eps,P2[1])));
    vars D2   = series(carryDaily*(var)TF2);
    vars Px2  = series(0);
    vars EV2S = series(0);
    vars Dom2S  = series(0);
    vars Tr2S   = series(0);
    vars Corr2S = series(0);

    // back to base timeframe
    TimeFrame = 1;

    // ------------------------------------------------------------------------
    // Warmup gate
    // ------------------------------------------------------------------------
    int NeedTF1 = max(max(Kmax1, W1), (Weig1 + L1 + 2));
    int NeedTF2 = max(max(Kmax2, W2), (Weig2 + L2 + 2));
    int WarmupBars = max(TF1 * NeedTF1, TF2 * NeedTF2) + 10;

    if(Bar < WarmupBars) return;

    // KEY FIX: do NOT return during LOOKBACK in Train
    if(is(LOOKBACK) && !Train) return;

    // ============================================================
    // Feature block A: EV ratios
    // ============================================================
    TimeFrame = TF1;
    {
        var sumDisc1=0, disc1=1; int k;
        for(k=1;k<=Kmax1;k++){ disc1/=(1+r_d); sumDisc1 += disc1*D1[k]; }
        Px1[0]=sumDisc1;

        var meanP1=0, meanPx1=0; int i;
        for(i=0;i<W1;i++){ meanP1+=P1[i]; meanPx1+=Px1[i]; }
        meanP1/=W1; meanPx1/=W1;

        var varP1=0, varPx1=0;
        for(i=0;i<W1;i++){
            var a=P1[i]-meanP1, b=Px1[i]-meanPx1;
            varP1+=a*a; varPx1+=b*b;
        }
        varP1/=(W1-1); varPx1/=(W1-1);
        EV1S[0]=varP1/(varPx1+eps);
    }

    TimeFrame = TF2;
    {
        var sumDisc2=0, disc2=1; int k2;
        for(k2=1;k2<=Kmax2;k2++){ disc2/=(1+r_d); sumDisc2 += disc2*D2[k2]; }
        Px2[0]=sumDisc2;

        var meanP2=0, meanPx2=0; int j;
        for(j=0;j<W2;j++){ meanP2+=P2[j]; meanPx2+=Px2[j]; }
        meanP2/=W2; meanPx2/=W2;

        var varP2=0, varPx2=0;
        for(j=0;j<W2;j++){
            var a=P2[j]-meanP2, b=Px2[j]-meanPx2;
            varP2+=a*a; varPx2+=b*b;
        }
        varP2/=(W2-1); varPx2/=(W2-1);
        EV2S[0]=varP2/(varPx2+eps);
    }

    // ============================================================
    // Feature block B: Eigen dominance
    // ============================================================
    TimeFrame = TF1;
    {
        int i; var meanX=0, meanY=0;
        for(i=1;i<=Weig1;i++){ meanX+=R1tf[i]; meanY+=R1tf[i+L1]; }
        meanX/=Weig1; meanY/=Weig1;

        var covXX=0,covYY=0,covXY=0;
        for(i=1;i<=Weig1;i++){
            var dx=R1tf[i]-meanX, dy=R1tf[i+L1]-meanY;
            covXX+=dx*dx; covYY+=dy*dy; covXY+=dx*dy;
        }
        covXX/=(Weig1-1); covYY/=(Weig1-1); covXY/=(Weig1-1);

        var trace=covXX+covYY;
        var det=covXX*covYY-covXY*covXY;
        var root=sqrt(max(0, trace*trace-4*det));

        var lam1=0.5*(trace+root), lam2=0.5*(trace-root);
        var lamMax=ifelse(lam1>=lam2,lam1,lam2);
        var lamMin=ifelse(lam1>=lam2,lam2,lam1);

        Dom1S[0]=lamMax/(lamMin+eps);
        Tr1S[0]=trace;
        Corr1S[0]=clamp(covXY/sqrt(max(eps,covXX*covYY)),-1,1);
    }

    TimeFrame = TF2;
    {
        int j; var meanX2=0, meanY2=0;
        for(j=1;j<=Weig2;j++){ meanX2+=R2tf[j]; meanY2+=R2tf[j+L2]; }
        meanX2/=Weig2; meanY2/=Weig2;

        var covXX2=0,covYY2=0,covXY2=0;
        for(j=1;j<=Weig2;j++){
            var dx2=R2tf[j]-meanX2, dy2=R2tf[j+L2]-meanY2;
            covXX2+=dx2*dx2; covYY2+=dy2*dy2; covXY2+=dx2*dy2;
        }
        covXX2/=(Weig2-1); covYY2/=(Weig2-1); covXY2/=(Weig2-1);

        var trace2=covXX2+covYY2;
        var det2=covXX2*covYY2-covXY2*covXY2;
        var root2=sqrt(max(0, trace2*trace2-4*det2));

        var lam12=0.5*(trace2+root2), lam22=0.5*(trace2-root2);
        var lamMax2=ifelse(lam12>=lam22,lam12,lam22);
        var lamMin2=ifelse(lam12>=lam22,lam22,lam12);

        Dom2S[0]=lamMax2/(lamMin2+eps);
        Tr2S[0]=trace2;
        Corr2S[0]=clamp(covXY2/sqrt(max(eps,covXX2*covYY2)),-1,1);
    }

    // back to base timeframe
    TimeFrame = 1;

    // ============================================================
    // ML + trading
    // ============================================================
    int MethodTrain = PERCEPTRON + FUZZY + BALANCED + RETURNS;
    int MethodPred  = PERCEPTRON + FUZZY + BALANCED;

    var Sig[8];
    Sig[0] = clamp(log(max(eps,Dom1S[0])), -2, 2);
    Sig[1] = clamp(0.25*log(max(eps,Tr1S[0])), -2, 2);
    Sig[2] = Corr1S[0];
    Sig[3] = clamp(log(max(eps,Dom2S[0])), -2, 2);
    Sig[4] = clamp(0.25*log(max(eps,Tr2S[0])), -2, 2);
    Sig[5] = Corr2S[0];
    Sig[6] = clamp(log(max(eps,EV1S[0])), -2, 2);
    Sig[7] = clamp(log(max(eps,EV2S[0])), -2, 2);

    // Exit after HoldBars
    if(NumOpenTotal > 0)
        for(open_trades)
            if(TradeBars >= HoldBars)
                exitTrade(ThisTrade);

    var PredL=0, PredS=0, Pred=0, Lev=0;
    static int Flip = 0;

    if(Train)
    {
        // Forced trades so ML gets samples
        if(NumOpenTotal == 0)
        {
            Flip = 1 - Flip;
            LastBalance = Balance;

            if(Flip) { adviseLong(MethodTrain,0,Sig,8); Lots=1; enterLong(); }
            else     { adviseShort(MethodTrain,0,Sig,8); Lots=1; enterShort(); }
        }
    }
    else
    {
        PredL = adviseLong(MethodPred,0,Sig,8);
        PredS = adviseShort(MethodPred,0,Sig,8);

        // Bootstrap if model not trained / no signal
        if(NumOpenTotal == 0 && PredL == 0 && PredS == 0)
        {
            LastBalance = Balance;
            if(random(1) < 0.5) { Lots=1; enterLong(); }
            else                { Lots=1; enterShort(); }
        }
        else
        {
            Pred = (PredL - PredS) / 100.0;
            Lev  = clamp(Pred*LevScale, -MaxLev, MaxLev);

            // Apply Dom filter only when signal is nonzero
            if(Lev != 0 && Dom1S[0] < DomThr && Dom2S[0] < DomThr)
                Lev = 0;

            if(Lev > PredThr)       { exitShort(); Lots=1; enterLong();  }
            else if(Lev < -PredThr) { exitLong();  Lots=1; enterShort(); }
            else                    { exitLong();  exitShort(); }
        }
    }

    // ============================================================
    // RL update: reward when going from in-position -> flat
    // ============================================================
    var Reward = 0;
    if(PrevOpenTotal > 0 && NumOpenTotal == 0)
    {
        Reward = Balance - LastBalance;

        if(Reward != 0)
        {
            int p;
            for(p=0; p<NPAR; p++)
                updateArm(p, CurArm[p], Reward);
        }

        pickParams();
        LastBalance = Balance;
    }
    PrevOpenTotal = NumOpenTotal;

    // ------------------------------------------------------------------------
    // CSV log output (single literal format string)
    // ------------------------------------------------------------------------
    string ModeStr="Trade";
    if(Train) ModeStr="Train"; else if(Test) ModeStr="Test";

    file_append(LogFN, strf("%04i-%02i-%02i,%02i:%02i,%s,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%.4f,%.3f,%.4f,%d,%.4f,%.8f,%.8f,%.8f,%.8f,%.8f,%.8f,%.8f,%.8f,%.4f,%.4f,%.4f,%.4f,%.6f\n",
        year(0),month(0),day(0), hour(0),minute(0),
        ModeStr, Bar, TF1, TF2,
        Kmax1, W1, Weig1, L1, Kmax2, W2, Weig2, L2,
        LevScale, MaxLev, PredThr, HoldBars, DomThr,
        Dom1S[0],Tr1S[0],Corr1S[0],
        Dom2S[0],Tr2S[0],Corr2S[0],
        EV1S[0],EV2S[0],
        PredL,PredS,Pred,Lev,
        Reward
    ));

    // Plots (optional)
    plot("TF1_Dom", Dom1S[0], NEW, 0);
    plot("TF2_Dom", Dom2S[0], 0, 0);
    plot("EV_TF1",  EV1S[0],  0, 0);
    plot("EV_TF2",  EV2S[0],  0, 0);
    plot("Pred",    Pred, 0, 0);
    plot("Lev",     Lev, 0, 0);
    plot("Reward",  Reward, 0, 0);
}