//bs.ctp Copyright (C) 1989-2001 I.Pedley (CTPP) Fri 29-Mar-2002 at 22:25:46

//ChaOS bootstrap code, one source file containing partition table executable,
//boot sector executable and bootstrap

//the idea is, this one file contains ALL the ChaOS real-mode startup code,
//and, unlike DOS, the bootstrap is never overwritten by the system, so it can
//be re-entered to reboot ChaOS; also, we have here source code for the
//multi-boot partition table, and boot sector, so they can be altered if
//required

//the bootstrap also contains callbacks which are used by ChaOS to transfer
//control to real mode routines such as software interrupts, real mode far calls
//etc. These are used by ChaOS to call BIOS interrupts, for example

///////////////////////////
//partition table section//
///////////////////////////

//MSDOS idea of only one active partition one each hard disk precludes
//multi-operating system configurations which include DOS; a solution to this
//is to intercept the boot process in the executable of the partition table,
//and provide a keyboard option choose which partition to boot from
//
//29.10.2000 default boot partition and timeout controlled by
//       timeout     db  05ah    ;90 timer ticks is about 5 sec
//       def         db  031h    ;default boot partition is 1
//these values, along with partition names name1-name4 can be changed by a host
//to quickly modify boot behaviour. struct PTSECTOR in ChaOS can be used
//to access this layout. CHANGE THINGS AT YOUR PERIL!
//
//quaint interest only, this partition table relocated to 0:0700, rather than
//0:600 as most partition executables I have seen; this means the table is not
//overwritten by the boot sector loading a DOS root directory to 0:500. When
//a ChaOS boot from a Dos-esque partition, all components of the bootstrap
//can be viewed from above in their repective locations:
//
//                      initial location    resting place       length (hex)
//   partition table     0000:7c00           0000:0500           0200
//   boot sector         0000:7c00           0000:0700           0200
//   root dir 1st sector 0000:0500           0000:0900           0200
//   LOADER.BIN          0000:0900           0000:0b00     about 0800 ish
//   OS.XEC 1st sector  boot sector stack about 0:7a00           0200
//   extmove buffer      5000:0000           5000:0000     up to 127 disk sectors

#include {boot.htm}
#include {bootmap.htm}

//#define FTXEC   2
//#define FTMULTI 3

EX  UC  e1[];
EX  UC  e2[];
EX  UC  e3[];
EX  UC  e4[];
EX  UC  m1;
EX  UC  m2[];
EX  UC  m2s[];
EX  UC  m3s[];
EX  UC  m4s[];
EX  UC  cr;
EX  UC  lf;
EX  UC  actual;
EX  UC  timeout;
EX  UC  def;
EX  UC  edd;
VD  PTL02a(VD);
VD  msg (VD);
EX  UC  ptname1[];
EX  UC  ptname2[];
EX  UC  ptname3[];
EX  UC  ptname4[];
EX  UC  pt;
VD  START(VD);

#pragma inlinedata

#pragma align 1
#pragma rmode16

#asm
    UC  res[bmPART]

PTSTART::                       ;label for linker
    cld                         ;code is executing at CS:7c00, and CS is 0
    jmp     PTL01               ;make our code look a bit different
UC chsig    "Ch"                ;CHAOS "Ch" signature
PTL01:
    cli
    sub     ax,ax
    mov     ss,ax               ;set up stack to be just below this executable
    mov     sp,bmBIOS           ;SS:SP=0x0000:0x7c00
    mov     si,sp
;copy passed-in es:di to 0000:8000
;    mov     es,ax
;    mov     cs:[0x8000],di
;    mov     cs:[0x8002],ax
    push    ax
    push    ax
    pop     es
    pop     ds                  ;set ds,es to base of memory too
    sti

    mov     di,bmPART           ;es:di=0x0000:0x0500
                                ;ds:si=0x0000:0x7c00
    mov     cx,0x100            ;get ready to move 512 bytes
    op      0xf2
    movsw
;    rep     movsw

    op      0xea                ;far jump into relocated code at 0x0000:0500
#endasm
    UI      ptofflo=&PTL02a;
#asm
    UI      ptoffhi
PTL02a:
//EDD check
    mov     ax,0x4100
    mov     bx,0x55aa
    int     0x13
    cmp     bx,0xaa55
    jnz     @F
    or      byte edd,1
@@:
    mov     di,0x46c
    mov     dl,byte timeout

    mov     byte [di],0
PTL02:
    mov     si,&m1
    call    msg
    mov     si,&ptname1
    call    msg
    mov     si,&m2s
    call    msg
    mov     si,&ptname2
    call    msg
    mov     si,&m3s
    call    msg
    mov     si,&ptname3
    call    msg
    mov     si,&m4s
    call    msg
    mov     si,&ptname4
    call    msg
    mov     si,&cr
    call    msg
kbdwait:
    sub     ah,ah
    cmp     byte [di],dl

    jle     @F
timedout:
    mov     al,byte def
    jmp     default
@@:
    mov     ah,1
    int     0x16
    jz      kbdwait
    sub     ax,ax
    int     0x16
    sub     ah,ah
    cmp     al,0x0d      ; key pressed, go for default
    jz      timedout
    cmp     al,0x30
    jl      PTL02a
    cmp     al,0x33
    jg      PTL02a
default:
    push    ax
    mov     si,&lf
    call    msg
    pop     ax
    sub     al,0x30
    mov     byte actual,al      ;store partition # selected by user in actual
    shl     ax,4

    lea     si,pt               ;address of first partition table entry in ds:si
    add     si,ax
PTL03:
    test    byte [si],0x80      ;its bootable so go for it
    jnz     PTL04
    cmp     byte [si],0x00      ;its non-bootable, so loop round
    jnz     ERROR1              ;unless its non-zero, which is a no-no
    shr     ax,4
    add     al,0x30
    mov     bx,&m2              //address of partition # in displaystring
    mov     byte [bx],al
    mov     si,&m2
    call    msg
    jmp     PTL02a

PTL04:
    mov     dx,word [si]        ;save drive/head parms for start of partition
    mov     cx,word [si+2]      ;save track/sector parms
    mov     bp,si               ;and note this partition table address in bp
    jmp     BOOT                ;bootable

ERROR1:
    mov     si,&e1              ;otherwise display message
ERROR0:
    call    msg
PTL08:
    jmp     PTL08               ;curtains, get out of that without moving


BOOT:
    mov     di,5                ;retry count of 5 is in di
RETRYBOOT:
    mov     bx,bmBIOS
    mov     ax,0x201            ;dx and cx already are already set for int 13h
    push    di
    int     0x13
    pop     di
    jnc     GOTBOOTSECTOR
    sub     ax,ax               ;reset disk systems ready for retry
    int     0x13
    dec     di
    jnz     RETRYBOOT
    mov     si,&e2
    call    msg
    mov     si,&e4
    jmp     ERROR0

GOTBOOTSECTOR:

    mov     di,bmBIOS+0x1fe     ;di points to boot sector signature
    cmp     word [di],0xaa55
    jz      @F
    mov     si,&e3              ;no signature, boot sector not safe to execute
    call    msg
    mov     si,&e4
    jmp     ERROR0

@@:
    mov     si,bp

    op      0xea                ;we think it's good enough to jump into
UI  bootoff 0x7c00              ;into boot sector code at 0x0000:0x7c00
UI  bootseg 0

msg:
    lodsb
    cmp     al,0                ;output characters until a zero byte is found
    jz      msg01
    push    si
    mov     ah,0x0e
    mov     bx,7
    int     0x10                ;output a character to the screen
    pop     si
    jmp     msg
msg01:
    ret

;not much space to store meaningful error messages
UC  e1  "Bad partition table"
    op  0x0d 0x0a 0x00
UC  e2  "Can't read"
    op  0x00
UC  e3  "Invalid"
    op  0x00
UC  e4  " boot sector"
    op  0x0d 0x0a 0x00
UC  m1  0x0d
    op  0x0a
UC  m1a "Boot 0="
    op  0x00
UC  m2  "  Not bootable!"
    op  0x0d 0x0a 0x00
UC  m2s " 1="
    op  0x00
UC  m3s " 2="
    op  0x00
UC  m4s " 3="
    op  0x00
UC  cr  0x0d
    op  0x00
UC  lf  0x0a
    op  0x00

//partition table data area, some fields here are used by OS to trace
//boot sequence (don't change unless you alter the OS code
//these field must also match the PTSECTOR structure in \i\drive.htm
//for obvious reasons
org 0x690
UC  netname "NETNAME"
op 0x00 0x00
UC  edd     0x00
UC  pdrv    0x00
UC  actual  0x00
UC  timeout 0x5a
UC  def     0x30
UC  ptname1 "       "
    op 0x00
UC  ptname2 "       "
    op 0x00
UC  ptname3 "       "
    op 0x00
UC  ptname4 "       "
    op 0x00
org 0x6be
UC  pt
org 0x6fe
UI  ptsig   0xaa55

#endasm
///////////////////////
//boot sector section//
///////////////////////

//Notes:
//1- BIOS loads this sector to address 0000:7c00 then does a jump 0000:7c00
//   to enter this executable at START:
//2- floppy disk parameter table at 0000:0078 (vector 1e) is valid
//   this is an 11-byte structure of the form:
//   struct  FDCPARMS    //typical
//       {               //value (1.44Mb)
//        UC spec1;      //0xdf     sort=f,unload=f    1st FDC cmd specify byte
//        UC spec2;      //0x02     hd load=1 mode=DMA 2nd FDC cmd specify byte
//        UC mtrwait;    //0x25     timer ticks till motor off (about 3 sec)
//        UC bps;        //0x02     bytes per sector
//        UC eot;        //0x12     last sector on a track
//        UC gap1;       //0x1b     intersector gap length
//        UC datalen;    //0xff     ff=512
//        UC gap3;       //0x54     gap length for format
//        UC fill;       //0xf6     fill bytes for format command
//        UC headsettle; //0x0f     head settle time (msec)
//        UC motorstart; //0x08     motor start time (1/8sec)
//       };
//   note also that this structure is incorporated into the fdcparms table
//   in ChaOS floppy.ctp
//3- on legacy machines the only other valid input is register DL, which
//   contains the drive ID which delivered the boot sector, usually 0
//   for floppy drive A
//4- Bios Boot spec 1.0 (PnP) states that ES:DI points to the PnP
//   installation check structure (struct PNPBIOS in pnp.hpp)
//   as well DL being set to the drive number. This structure contains
//   the entry point to BIOS PnP services
//5- failure to boot triggers a call to int 19h, which BIOS has set to
//   bootstrap recovery, usually an attempt to boot from the next available
//   device
//6- successful operation of the boot sector goes as follows:
//       -reset diskette system
//       -read first root directory sector to 0000:0900
//       -locate LOADER.BIN in that sector
//       -load LOADER.BIN to address 0000:0b00 and jump to 00b0:0000
//7- size and cluster of LOADER.BIN is taken from the directory entry,
//   but upper 16 bits of size are ignored. In any case, the maximum length
//   of LOADER.BIN is 0x7C00-0x700=0x7500 (29952 bytes), less any stack space
//   in use. LOADER.BIN IS ASSUMED TO BE CONTIGUOUS from its start cluster
//
//8- passed-in parameter to LOADER.BIN are
//       ch=media byte
//       dl=boot drive
//    ax:bx=is logical sector for drive data area
//    root dir sector is still at 0:900
//9- byte at bse1 is a digit displayed at the beginning of the one and only
//   error message from this sector "0:Insert system disk and press a key"
//   by incrementing *bse1 the following error codes are valid:
//       0:  Error resetting disk system
//       1:  Prep track error (root directory sector > sectors per track)
//       2:  Error reading root directory sector
//       3:  LOADER.BIN not found in first root directory sector
//       4:  Prep track error (LOADER.BIN > sectors per track)
//       5:  Error reading LOADER.BIN
//
EX  UC  bse1[];
EX  UC  bsn1[];
EX  UL  bssig;
VD  prep(VD);
VD  BSL02a(VD);

#asm
BSSTART:
    jmp     BSL01
    nop
                            ;offset
UC  oem "CHAOS   "          ; 3
UI  bps 512                 ; b    bytes per sector
UC  spc 1                   ; d    sectors per cluster
UI  srs 1                   ; e    sectors reserved
UC  fts 2                   ;10    FATs
UI  rts 224                 ;11    root entries
UI  tls 2880                ;13    total sectors
UC  med 0xf0                ;15    media byte
UI  spf 9                   ;16    sectors per FAT
UI  spt 18                  ;18    sectors per track
UI  hds 2                   ;1a    heads
UL  hid 0                   ;1c    hidden sectors
UI  rtl 0                   ;20    reserved work area:total sectors
UI  r1  0                   ;22
UC  rdr 0                   ;24    reserved work area:drive
UC  rhd 0                   ;25    reserved work area:head
UC  n29 0x29                ;26
UL  r2  0                   ;27
//UC  vol "OSBOOTYBOOT"       ;2b
;;temporary work area overwriting boot sector volume label
UL  dsc 0                   ;2b    logical sector no for disk data area
UI  trk 0                   ;2f    current track
UC  sec 0                   ;31    sector
UL  rsc 0                   ;32    logical sector no for disk root directory
UC  ftn "FAT12   "          ;36
BSL01:
    cli

//set up boot stack at 7000:f800
//    mov     sp,0xf800       ;stack will grow downwards from 7000:f800
//    mov     ax,0x7000
//    mov     ss,ax
//    sub     ax,ax
//set up boot stack at 0000:f800
//    mov     sp,0xf800       ;stack will grow downwards from 7000:f800
    mov     sp,0xf800         ;stack will grow downwards from 7000:f800
    sub     ax,ax
    mov     ss,ax
    mov     es,ax
    push    es
    pop     ds
    op      0xcc
    mov     si,bmBIOS              ;relocate boot sector image to 0x700
    mov     di,bmBOOT
    mov     cx,0x100
    op      0xf2
    movsw

    op      0xea                    ;far jump into relocated boot sector image
#endasm
    UI      bsofflo=&BSL02a;
#asm
    UI      bsoffhi
BSL02a:
//now boot sector is executing at 0000:0720ish
//    mov     rdr,dl                  ;drive passed in by partition table or BIOS
    mov     byte bssig,dl           ;drive passed in by partition table or BIOS
    inc     byte bse1               ;errno is 1
    sub     ax,ax
    cmp     tls,ax
    jz      BSL02
    mov     cx,tls
    mov     rtl,cx
BSL02:
    mov     al,fts                  ;al=number of FATS
    mul     word spf                ;multiply by sectors per FAT
    add     ax,word hid
    adc     dx,word hid+2           ;add dword hidden sector count
    add     ax,srs                  ;add word reserved sector count
    adc     dx,0                    ;now we have logical sector for root directory
    mov     word rsc,ax
    mov     word rsc+2,dx           ;store in dword rsc
    mov     word dsc,ax
    mov     word dsc+2,dx           ;store in dword dsc

    mov     ax,32                   ;ax is sizeof (DIRECTORY ENTRY)
    mul     word rts                ;multiply root entries
    mov     bx,bps
    add     ax,bx
    dec     ax                      ;add bytes per sector - 1
    div     bx                      ;divide by sector size
    add     word dsc,ax
    adc     word dsc+2,0
    mov     bx,bmROOT               ;es:bx->0000:0900
    mov     ax,word rsc
    mov     dx,word rsc+2

    call    prep
    jc      ERR01
    inc     byte bse1               ;errno is 2
    mov     al,1
    call    read                    ;read root dir 1st sector to 0000:0900
    jc      ERR01
    inc     byte bse1               ;errno is 3

    mov     cx,16                   ;LOADER.BIN must be in first root directory
                                    ;sector, i.e. in the first 16 root entries
nextrootentry:
    mov     di,bx                   ;es:di->root dir entry
    push    cx
    mov     cx,11
    mov     si,&bsn1
    rep     cmpsb
    pop     cx
    jz      strap
    add     bx,32                   ;advance to next entry
    loop    nextrootentry

ERR01:                              ;error, display "Non-System disk...etc"
    mov     si,&bse1
    call    errmsg
    sub     ax,ax
    int     0x16                    ;press a key
    int     0x19
ERR02:
    pop     ax
    pop     ax
    pop     ax
    jmp     ERR01

strap:
    inc     byte bse1               ;errno is 4
//    test    byte [rtPTF],1
//    jnz     skip

    mov     ax,[bx+0x1c]            ;ax is size of LOADER.BIN mod 64k!
    add     ax,511
    mov     cl,9
    shr     ax,cl
    mov     cx,ax                   ;cx is sectors we need

    mov     ax,[bx+0x1a]            ;get cluster number of bootstrap
    dec     ax
    dec     ax
    mov     bl,spc                  ;get sectors per cluster
    sub     bh,bh
    mul     bx
    add     ax,word dsc
    adc     dx,word dsc+2           ;dx:ax is first sector of LOADER.BIN
    mov     bx,bmLOAD               ;set DTA to 0:b00
nextsect:
    push    ax
    push    dx
    push    cx
    call    prep
    jc      ERR02
    mov     al,1
    call    read
    pop     cx
    pop     dx
    pop     ax
    jc      ERR01
    inc     byte bse1               ;errno is 5
    add     ax,1                    ;increment logical sector number
    adc     dx,0
    add     bx,bps                  ;add sector size to DTA for next read
    dec     byte bse1               ;errno is 4 again
    loop    nextsect

;BOOTSTRAP is loaded and ready to rock and roll
;pass some info in registers to next stage

skip:
    mov     ch,med                  ;ch is media byte
//    mov     dl,rdr                  ;dl is boot drive
    mov     dl,byte bssig           ;dl is boot drive
    mov     bx,word dsc             ;ax:bx is logical sector for drive data area
    mov     ax,word dsc+2           ;remember root dir sector is still at 0:900

//    op      0xea 0x00 0x00 0xb0 0x00
    op      0xea 0x00 0x0b 0x00 0x00  //jmp     0000:0b00); jump into LOADER.BIN

;    db      0eah
;    dw      0000h
;    dw      00b0h
;  (jmp     00b0:0000)               ;jump into LOADER.BIN



errmsg:                             ;output ASCIIZ string to screen
    lodsb
    or      al,al
    jz      errout                  ;break on zero byte
    mov     ah,0x0e
    mov     bx,7
    int     0x10
    jmp     errmsg

//prep:                               ;sector is in dx value in ax will be discarded
//    cmp     dx,spt+bmBIOS              ;dx should be less than sectors per track!
//    jnb     setcarry
//    div     spt+bmBIOS                 ;dx is modulo sectors per track
//    inc     dl                      ;now dl is sector number
//    mov     sec+bmBIOS,dl              ;store in temporary work area
//    sub     dx,dx
//    div     hds+bmBIOS                 ;now ax is track number, dl is head (remainder)
//    mov     rhd+bmBIOS,dl              ;store in reserved work area
//    mov     trk+bmBIOS,ax
//    clc
//    ret
//setcarry:
//    stc
errout:
    ret
prep:
    push    dx
    push    ax
    op32
    pop     ax
    push    cx
    op32
    sub     cx,cx
    op32
    sub     dx,dx
    mov     cx,word spt
    op32
    div     cx
    inc     dl
    mov     sec,dl
    sub     dx,dx
    mov     cx,word hds
    op32
    div     cx
    mov     rhd,dl
    mov     trk,ax
    clc
    pop     cx
    ret

read:
    mov     ah,2
    mov     dx,word trk
    mov     cl,6
    shl     dh,cl
    or      dh,byte sec
    mov     cx,dx                   ;ch=sector, cl=track
    xchg    ch,cl                   ;ch=track,  cl=sector
//    mov     dl,byte rdr
    mov     dl,byte bssig
    mov     dh,byte rhd
    int     0x13
    ret


org 0x8c9
UC  bse1  "0:Insert system disk and press a key "
UC  bse2[3] 0x0d 0x0a 0
UC  bsn1  "LOADER  BIN"
UL  bssig 0xaa550000
#endasm

#pragma noinlinedata
#pragma pmode32
#pragma align 4

/////////////////////
//bootstrap section//
/////////////////////

//first successful boot using this loader and real-mode 32-bit memcpy
//to relocate OS image into extended memory, instead of int 15h/87h extmove
//Conditions during boot are quite variable, so my method below is more
//empirical than entirely logical. For instance, ALT-CTL-DEL under DOS/BIOS
//may or may not leave A20 gated on.
//However, this is the current scheme:
//  1-  Gate A20 on
//  2-  Make a protected mode IDT, filled with irets
//  3-  Switch processor to protected mode, but avoid reloading CS selector
//  4-  Load ES,DS,FS and GS with 4Gb selector, but avoid reloading SS selector
//  5-  Switch back to real mode, again without reloading CS selector
//This I think leaves the processor running real-mode with CS and SS as
//64k selectors, whilst DS,ES,FS and GS are all 4Gb-capable.
//Processor address overrides can now be used to address data anywhere in
//32-bit address space. Code, however remains confined to 64k segments, and
//I also find SS needs to remain a 64k selector, otherwise shit happens.
//Finally, real-mode interrupts during execution in this non-standard mode
//only stack the lower 16-bits of ESP, so at present I run a stack at 0x7c00
//being careful to keep the upper 16 bits of ESP clear.
//I guess that it should be possible to run with a 4Gb stack selector provided
//the real-mode SS:SP points to an address below 1Mb+64k, otherwise interrupts
//in real-mode will be unable to place values on the stack for a valid
//return from interrupt.
//The memcpy  function to relocate data into extended memory using
//this method is run with interrupts inhibited, which works. I don't
//yet fully understand why, but interrupts clearly knock things over.
//Could it be that an interrupt occurring in the middle of a rep movsd with
//an address override cancels the effect of the override on return from
//interrupt?

//using 4Gb real-mode selectors whilst elegant, doesn't seem to work on
//all Intel platforms. So I have reverted to using Int15h/87h extmove
//in this bootstrap, which limits the boot strap load to the first 16Mb
//of memory

#pragma breakpoints on

#include {os.htm}
#include {80x86.htm}
#include {rmode.htm}

//on entry at start boot sector passes the following registers:
//
//   es      0
//   ch      media byte
//   dl      boot drive
//   ax:bx   logical sector for drive data area

//          boot sector is at 0:0x0700
//root directory sector is at 0:0x0900
//        START:: below is at 0:0x0b00

    CH* e15 ="\r\nChaOS needs 80386 processor or better...\r\n...Press any non-shift key to retry boot";

//    BOOTSECTOR*  bs=(BOOTSECTOR*)0x7c00;
    BOOTSECTOR*  bs=(BOOTSECTOR*)0x700;

    UL  bsdsc;        //lba of first data sector on boot drive
    UC  bsmed;
    UC  bsdrv;

//ChaOS GDT, loaded just before jump into boot image
    DESC32  rmGDT[7]=
           {
            {     0,0,0,   0,     0},
            {     0,0,0,   0,     0},
            {0xffff,0,0,   0xcf9a,0},  //brave, but design states that only
            {     0,0,0,   0,     0},
            {0xffff,0,0,   0xcf92,0},  //2 descriptors are needed
            {     0,0,0,   0,     0},
            {0xffff,0,0x00,0x009b,0}   //USE16 E/R code selector at 0x00000 0x30
           };
    pdesc   rmGDTboot={0x37, (UL)&rmGDT};

//BIOS GDT for int 0x15/0x87 extmove
    DESC32  BIOSGDT[6]=
           {
            {     0,0,0,   0,     0},
            {     0,0,0,   0,     0},  //GDT location
            {0xffff,0,0,   0x0093,0},  //Src data selector BIOSGDT[2]
            {0xffff,0,0,   0x0093,0},  //Dst data selector BIOSGDT[3]
            {     0,0,0,   0,     0},  //BIOS code selector
            {     0,0,0,   0,     0}   //BIOS stack selector
           };

//not necessary, but good practice, a save area for GDTR while in int 0x15
    pdesc   GDTusr;
//temporary IDT for loader
    pdesc   rmIDTboot={0x7ff,  (UL)0x8000};
//this simple record describes the IDT required for real mode
   pdesc    IDTreal={0x7ff, 0};

VD  pmjump(VD);
VD  PMRM(VD);
VD  L02a(VD);
VD  J01(VD);
VD  J02(VD);
VD  R01(VD);
VD  R02(VD);

//RMPM jump labels
VD  RMPM(VD);
VD  RML03(VD);
VD  rmpmjmp(VD);
//RXPM jump labels
VD  RXPM(VD);
VD  RXL03(VD);
VD  rxpmjmp(VD);

//in order for OS to be able to locate the pm/rm switches, their addresses
//are stored just after 'BANNER  ' string
    MODESWITCHES    ms={{'B','A','N','N','E','R',' ',' '},
                        0,(UL)&PMRM,(UL)&RMPM,(UL)&RXPM};




//don't move these data items, or place anything in front of them for now
//note that 'BANNER  ' is dword aligned, not paragraph aligned when loaded

    UL  grloaded;
    UL  retries;
    UI  spsav;
    UI  sssav;
    UI  spsav1;
    UI  sssav1;
    UC  rmdosPIC1=0xb8;
    UC  rmdosPIC2=0x0d;
//    UC* bsptflags=(UC*)rtPTF;

    BOOTINFO bi;


    UI  __pm16CODE=CS16;
    UI  __pm16DATA=DS16;
//    UI  __rm16CODE=0x6000;
//    UI  __rm16CODE=((UI)&PMRM)>>4;
//    UI  __rm16CODE=(UI)&PMRM;
    UI  __rm16CODE=0;
//    UI  __rm16CODE=0x5555;

    UL  rmlastdata=0x5453414c;


#pragma rmode16
#pragma align 1

VD  displaystring(CH* str);

VD  call32 main      (VD);
VD  call32 dosPICs   (VD);
VD  call32 dispreturn(VD);

#asm
org 0xb00                 //code will run at 0000:0b00

START::
        op  0xcc            //either, for debugging,...
        cli               //...or

        nop
        jmp    @F

UI  bsflags  0x0000     //bsflags dword should be at START+5
UC  vflags   0x00
UI  vmode    0x0003     //default BIOS video mode to set
UI  vesamode 0x0101     //default VESA video mode to set 640*480*8
//UI  vesamode 0x0103     //default VESA video mode to set 800*600*8
//UI  vesamode 0x0105     //default VESA video mode to set 1024*768*8
UL  pmvesabank  0
UL  bsendptr    0
CH  osname  "CHAOS   DIR"
CH  osz         0
UL  int10       0
UI  mt8800      0
UI  mte801lo    0
UI  mte801hi    0
UL  memtop      0

@@:
        pusha
        mov     wp spsav,sp
        mov     wp sssav,ss
        mov     wp bsdsc,bx
        mov     wp bsdsc+2,ax
        mov     bsmed,ch
        mov     bsdrv,dl

//        mov     ax,3
//        int     0x10

        pushf               //do quick processor check before using
        mov     ax,0xf000   //any 386 instructions
        push    ax          //try to set high bits in flags word
        popf
        pushf
        pop     ax          //AX = what was actually stored
        and     ah,0xf0
        popf                //get flags back
        jnz     chipOK      //got high bits, 386 or higher, go for boot

        mov     si,wp e15   //processor not sufficient, display
        mov     ah,0x0e     //message
@@:
        lodsb
        cmp     al,0
        jz      @F
        mov     bx,7
        int     0x10
        jmp     @B
@@:
        sub     ax,ax
        int     0x16
        int     0x19
chipOK:
        mov     ah,0
        mov     dl,bsdrv
        int     0x13
//        cli
//        mov     ax,0x7000
//        mov     ss,ax
//        mov     esp,0xfffc
//        sti
//        sub     esp,esp
        and     esp,0xffff
        sub     edi,edi
        sub     ebp,ebp
        sub     esi,esi

        call    main
        int     0x19
RESTART::
        mov     ax,0
        mov     ds,ax
        mov     es,ax
        lss     sp,dp spsav
        call    dosPICs
        popa
        mov     ax,3
        int     0x10
        call    dispreturn
        int     0x19

rmDUMMYIRET::
        iret
#endasm

#pragma align 4
VD  rmpmrm(VD);
VD  extmove(VD* dst,VD* src,UL bytes)
{
//use with care when mixing with 32-bit rm code
//BIOS int 15/87 uses 16-bit selectors,
//so cannot move data beyond the first 16Mb of memory

    UL  d=(UL)dst;
    UL  s=(UL)src;
    BIOSGDT[2].baseloword=s;
    BIOSGDT[2].basemidbyte=s>>16;
//    BIOSGDT[2].basehibyte=s>>24;
    BIOSGDT[3].baseloword=d;
    BIOSGDT[3].basemidbyte=d>>16;
//    BIOSGDT[3].basehibyte=d>>24;

asm
    {
        sgdt    fword GDTusr
        mov     ecx,bytes
        shr     ecx,1           //ecx is word count to move
        lea     esi,BIOSGDT     //es:esi is GDT
        mov     ah,0x87
        int     0x15

        sub     ah,ah
        lgdt    fword GDTusr
    }
//also, rm seg limits are reset to 64k-1 by the PM descriptor loads
// in the BIOS call; a call to rmpmrm() fixes this, but be sure
//rmIDT is set up first!
    rmpmrm();
}

VD  displaystring(CH* str)
{
    while(*str)
        {
         asm
            {
             mov    esi,str
             ea32
             lodsb
             mov    ah,0x0e
             mov    bx,7
             int    0x10
            }
         str++;
        }
}
VD  outstrn(CH* str,UL len)
{
    while(len--)
        {
         asm
            {
             mov    esi,str
             ea32
             lodsb
             mov    ah,0x0e
             mov    bx,7
             int    0x10
            }
         str++;
        }
}
UL  getkey(VD)
{
asm
    {
     sub    eax,eax
     int    0x16
    }
}
SL  rmmemcmp(VD* s1,VD* s2,UL bytes)          //ANSI C 7.11.4.1
{
#asm
        mov     esi,s1
        mov     edi,s2
        mov     ecx,bytes
        sub     eax,eax
        ea32
        repz    cmpsb
        jz      @F
        sbb     eax,eax
        cmc
        adc     eax,0
@@:
#endasm
}
VD  rmmemcpy(VD* dst,VD* src,UL bytes)        //ANSI C 7.11.2.1
{
#asm
        cli
        mov     esi,src
        mov     edi,dst
        mov     ecx,bytes
        mov     edx,3
        and     edx,ecx
        shr     ecx,2
        ea32
        rep     movsd
        mov     ecx,edx
        ea32
        rep     movsb
        sti
#endasm
}
VD  lmemcpy(VD* dst,VD* src,UL bytes)
{
//    rmmemcpy (dst,src,bytes);
//    if(*bsptflags&ptDBG){rmmemcpy (dst,src,bytes);}
//    else                {extmove(dst,src,bytes);}
    extmove(dst,src,bytes);
}
//VD  memcpy(VD* dst,VD* src,UL bytes)        //ANSI C 7.11.2.1
//{
//#asm
//        mov     esi,src
//        mov     edi,dst
//        mov     ecx,bytes
//        mov     edx,3
//        and     edx,ecx
//        shr     ecx,2
//mc01:
//        ea32
//        movsd
//        loop    mc01
//        mov     ecx,edx
//mc02:
//        ea32
//        movsb
//        loop    mc02
//#endasm
//}

VD  outdword (UL val)
{
#asm

    mov     ebx,10
    mov     eax,val
    mov     ecx,0                    ;at least one
@@:
    push    eax
    inc     ecx
    sub     edx,edx
    div     ebx
    test    eax,eax
    jz      @F
    jmp     @B
@@:
;    mov     ecx,6
    pop     eax
    sub     edx,edx
    div     ebx
    mov     al,dl                   ;now get the remainders
    add     al,0x30
    mov     ah,0x0e
    mov     bx,7
    push    bp
    int     0x10
    pop     bp
    mov     ebx,10
    loop    @B

#endasm
}
VD  outhexdword(UL val)
{
#asm
    mov     ax,0x0e30
    mov     bx,7
    int     0x10
    mov     ax,0x0e78
    mov     bx,7
    int     0x10

    mov     ebx,16
    mov     eax,val
    mov     ecx,0                    ;at least one
@@:
    push    eax
    inc     ecx
    sub     edx,edx
    div     ebx
    test    eax,eax
    jz      @F
    jmp     @B
@@:
;    mov     ecx,6
    pop     eax
    sub     edx,edx
    div     ebx
    mov     al,dl                   ;now get the remainders
    cmp     al,9
    jg      hexdigit
    add     al,0x30
    jmp     digitdone
hexdigit:
    add     al,55  //0x41-10
digitdone:
    mov     ah,0x0e
    mov     bx,7
    int     0x10
    mov     ebx,16
    loop    @B

    mov     ax,0x0e20
    mov     bx,7
    int     0x10
#endasm
}
VD  outhxdword(UL val,UL digits)
{
#asm
//    mov     ax,0x0e30
//    mov     bx,7
//    int     0x10
//    mov     ax,0x0e78
//    mov     bx,7
//    int     0x10

    mov     edx,val
    mov     ecx,digits
    dec     ecx
    and     ecx,7
    inc     ecx
@@:
    cmp     ecx,8
    jz      @F
    rol     eax,4
    inc     ecx
    jmp     @B
@@:
//    mov     ecx,digits
    mov     eax,edx
    rol     eax,4
    and     al,0x0f
    cmp     al,9
    jg      hexdigit
    add     al,0x30
    jmp     digitdone
hexdigit:
    add     al,55  //0x41-10
digitdone:
    mov     ah,0x0e
    mov     bx,7
    int     0x10
    rol     edx,4
    loop    @B

    mov     ax,0x0e20
    mov     bx,7
    int     0x10
#endasm
}
    UL  dcctr=0;
    UL  crypt=0;

VD  decrypt(UC* data,UL sectors)
{
    if(!crypt){return;}
    UL* lptr=(UL*)data;
    UL  ctr=0;

    while(ctrsectors_per_track)+1;
    trk/=bs->sectors_per_track;
//    UL  hd=(trk%bs->heads)&0x3f;
    UL  hd=(trk%bs->heads);
    trk/=bs->heads;
//brk
    UL  bufseg=(UL)buf>>4;
    UL  bufoff=(UL)buf&0x0f;
//    UL  bufseg=(UL)buf>>4;bufseg&=0xf000;
//    UL  bufoff=(UL)buf&0xffff;

if(!quiet)
    {
    displaystring("\rReading ");
    outhexdword(sectors);
    displaystring(" secs,lba ");
    outhexdword(lba);
    displaystring(" buf=");
    outhexdword((UL)buf);

    displaystring(" trk ");
    outhexdword(trk);
    displaystring(" hd ");
    outhexdword(hd);
    displaystring(" sec ");
    outhexdword(sec);
    displaystring(" ");
    }

    UL  retry=8;
asm
    {                             //BIOS int 0x13/0x02 read disk sectors
     push   es
     mov    bx,wp bufseg
     mov    es,bx
     mov    bx,wp bufoff          //es:bs->buffer
//     and    ebx,0xffff

@@:
     mov    al,byte sectors       //al=sectors
     mov    cx,wp trk
     shr    cx,2                  //move top two bits of track into cl
     and    cl,0xc0               //discard lower bits (these go in ch)
     or     cl,byte sec           //cl(7->6)=top bits of track,cl(5->0)=sector
     mov    dh,byte hd            //dh=head
     mov    ch,byte trk           //ch=lower 8 bits of track
     mov    dl,byte drive         //dl=drive 00,01 floppy, 80,81 etc hard disk
     mov    ah,2
//op  0xcc
     int    0x13

     jnc    @F
//op  0xcc
//     mov    ah,0
//     int    0x13

     inc    retries
     dec    retry
     cmp    retry,0
     jnz    @B
     stc
@@:

     pop    es
     pushfd
     pop    eax
     and    eax,1               //return carry flag
     push   eax
    }
}
SL  checkaddressing(UL address)
{
    UL* ptr=(UL*)address;

    UL  ndata;
    UL  fdata;

asm
    {
     mov    esi,address
     mov    eax,dp [esi]
     mov    fdata,eax
     mov    eax,dp [si]
     mov    ndata,eax
    }

    if(ndata==fdata)
        {
         //ndata should access 0000:0000 here,which
         //we are assuming is non-zero
         //if fardata ==== near data, addressing is wrapping
         //instead of true 32-bit
         return 1;
        }
    fdata=*ptr;
    *ptr='PPTC';
    if(*ptr!='PPTC')
        {
         //clearly we need to be able to write the memory
         return 1;
        }
    *ptr=fdata;
    return 0;
}
UL  rminp(UL port)
{
asm
    {
     mov    edx,port
     sub    eax,eax
     in     al,dx
    }
}
VD  rmoutp(UL port,UL val)
{
asm
    {
     mov    edx,port
     mov    eax,val
     out    dx,al
    }
}
VD  dosPICs(VD)
{
//    rmoutp(0xa0,0x20);    //clear pending IRQs
//    rmoutp(0x20,0x20);    //clear pending IRQs

    rmoutp(0x40,0);
    rmoutp(0x40,0);       //reset timer to 18.2Hz

    rmoutp(0x20,0x11);    //start ICW sequence for both PICS using ICW/OCW select
    rmoutp(0xa0,0x11);    //bit 0x10 along with IC4 (ICW4 write required)

    rmoutp(0x21,0x08);    //ICW2 for PIC1 set interrupt vector base to 0x08
    rmoutp(0xa1,0x70);    //ICW2 for PIC2 set interrupt vector base to 0x70

    rmoutp(0x21,0x04);    //ICW3 for PIC1 enable cascade to PIC2 on IRQ2
    rmoutp(0xa1,0x02);    //ICW3 for PIC2 slave ID for cascade is IRQ2

    rmoutp(0x21,0x01);    //ICW4 for PIC1 Intel mode bit
    rmoutp(0xa1,0x01);    //ICW4 for PIC2 Intel mode bit

    rmoutp(0xa1,rmdosPIC2); //restore DOS pic masks
    rmoutp(0x21,rmdosPIC1);

//    rmoutp(0xa0,0x20);    //clear pending IRQs
//    rmoutp(0x20,0x20);    //clear pending IRQs

}
VD  picsoff(VD)
{
    rmdosPIC1=rminp(0x21);
    rmdosPIC2=rminp(0xa1);
    rmoutp(0xa1,0xff);
    rmoutp(0x21,0xff);
    rmoutp(0x70,0x8f);        //NMI off, CMOS 0x0f
    rmoutp(0x71,0x0a);        //CMOS(0x0f) is 10, return from shutdown

    rmoutp(0x3f2,0x0c);       //floppy motors off

    *(UL*)0x467=(UL)RESTART; //set return from shutdown
}
UL  inp(UL port)
{
asm
    {
     sub    eax,eax
     mov    edx,port
     in     al,dx
    }
}
VD  outp(UL port,UL c)
{
asm
    {
     mov    eax,c
     mov    edx,port
     out    dx,al
    }
}
VD  gateA20(VD)
{
    while(inp(0x64)&2);
    outp(0x64,0xd1);
    while(inp(0x64)&2);
    outp(0x60,0xdf);
    while(inp(0x64)&2);
//    outp(0x92,2);
}
//VD  gateA20(VD)
//{
//#asm
//        sub     cx,cx
//        in      al,0x92
//        cmp     al,0xff
//        jz      a20_01
//        or      al,2
//        out     0x92,al
//        jmp     a20_04
//a20_01:
//        in      al,0x64             ;gate A20 line on
//        test    al,0x02
//        loopnz  a20_01
//        mov     al,0xd1
//        out     0x64,al
//        sub     cx,cx
//a20_02:
//        in      al,0x64
//        test    al,0x02
//        loopnz  a20_02
//        mov     al,0xdf
//        out     0x60,al
//        sub     cx,cx
//a20_03:
//        in      al,0x64
//        test    al,0x02
//        loopnz  a20_03
//a20_04:
//        nop
//#endasm
//}
UL  clustertolba(DOSDIRENTRY* d)
{
    UL cluster=d->clusterlo+(d->clusterhi<<16);
    UL lba=bsdsc+(cluster-2)*bs->sectors_per_cluster; //clustertoLBA
    return lba;
}
#define ftMULTI 4
UL  verify(CBJHEADER* c)
{
    if(!crypt){if(c->id&0x80){crypt=1;}}
    decrypt((UC*)c,1);

    if(c->id=='PPTC')
        {
         if(c->ftype==ftMULTI){return 0;}
        }
    else if((c->id!='PPTC')||(!(c->cflags&STANDALONE)))
        {
         return 1;
        }
    return 0;
}
VD  displayversion(UL ver)
{
    displaystring("v");
    outdword(ver>24);
    displaystring(".");
    outdword((ver&0xff0000)>>16);
    displaystring(".");
    outdword(ver&0xffff);
}

VD  greypalette_8(VD)
{
    UL  n;
    UC  hpalette[768];
    UC* ptr=&hpalette;

    for(n=0;n<256;n++)
        {
         hpalette[n*3+0]=n>>2;
         hpalette[n*3+1]=n>>2;
         hpalette[n*3+2]=n>>2;
        }

asm
    {
     mov    eax,0x1012
     mov    edx,ptr
     mov    ebx,0
     mov    ecx,256
     int    0x10
    }
}

VD  loadgraphic(VD)
{
    DOSDIRENTRY* d=(DOSDIRENTRY*)bmROOT;
    UL n,m;

    UL cluster;
    UL lba;

    for(n=0;n<16;n++)   //look for 'BANNER  ' file in root directory
        {
         if(!rmmemcmp(d,ms.idstring,8))
            {
             cluster=d->clusterlo+(d->clusterhi<<16);
             lba=clustertolba(d);
//             asm
//                {
//                 mov    ax,0x13
//                 int    0x10
//                }
//             greypalette_8();
//             grloaded=1;

//             bsread(bsdrv,lba,125,(UC*)0xa0000,0);

             UL  sectors=125;
             UL  spt2=bs->sectors_per_track;
             UL  readsectors;
             UC* ptr=(UC*)0x60000;

             while(sectors)
                {
                 readsectors=spt2-(lba%spt2);
                 if(readsectors>sectors){readsectors=sectors;}
                 if(bsread(bsdrv,lba,readsectors,ptr,1)){return;}
                 ptr+=512*readsectors;
                 lba+=readsectors;
                 sectors-=readsectors;
                }
             asm
                {
                 mov    ax,0x13
                 int    0x10
                }
             greypalette_8();
             grloaded=1;
             lmemcpy(0xa0000,0x60000,64000);
             vmode=0x13;
             return;
            }
         d++;
        }
}
VD  makebootinfo(DOSDIRENTRY* d,UL lba,UL subdir)
{
//first fill in drive and partition from trail left by
//partition table code and boot sector code
//note that changes to that code may break this function.
//with things as they are, partition table sector is at lin 0x00000500
//and boot sector is at lin 0x00000700, so:
    bi.partition=*(UC*)rtPART;
    bi.drive    =*(UC*)rtDRV;
    bi.lba=lba;
//    bi.partition=*(UC*)0x089b;
//    bi.drive    =*(UC*)0x7c24;
////copy the directory entry being used for the boot
//    rmmemcpy(&bi.usr,d,32);
//copy the directory entry being used for the boot
    bi.usr=(VD*)d;
//and set BOOTINFO.flags to say that BOOTINFO.d contains valid info
    bi.flags=BIDOSDIRVALID;
    if(subdir){bi.flags|=biSUBDIRBOOT;}
}
SL  bootstrap(DOSDIRENTRY* d,UL subdir)
{
//    UL cluster=d->clusterlo+(d->clusterhi<<16);
//    UL lba=bsdsc+(cluster-2)*bs->sectors_per_cluster; //clustertoLBA
//brk
    UL  cluster=d->clusterlo+(d->clusterhi<<16);

    UL  lba=clustertolba(d);
    UL  bootlba=lba;

    UC  mbuf[2048];
    UC  buf [512];
    UL  quiet=1,multi=0,mhdrlen=0x200;
    SL  bytes=d->size;
    dcctr=0;crypt=0;

BSL01:
    if(bsread(bsdrv,lba,1,buf,1))
        {
         displaystring("\r\nError reading ChaOS header");return 1;
        }
    CBJHEADER* c =(CBJHEADER*)buf;
    CBJMULTI*  cm=(CBJMULTI*)mbuf;
    if(!crypt){if(c->id&0x80){crypt=1;}}
    decrypt((UC*)c,1);
    if(c->id!='PPTC')
        {
BSL02:   displaystring("\r\n'CHAOS' is not a valid bootable ChaOS system file");return 1;
        }
    else
        {
         switch(c->ftype)
            {
             case FTMULTI:
                lmemcpy(cm,c,512);
                lba++;
                while(mhdrlenhdrlen)
                    {
                     if(bsread(bsdrv,lba,1,mbuf+mhdrlen,1))
                        {
                         displaystring("\r\nError reading MULTIXEC header");return 1;
                        }
                     decrypt(mbuf+mhdrlen,1);
                     mhdrlen+=0x200;
                     lba++;
                    }
                multi=1;
                if(!(bsflags&bsALLMULTI))
                    {
                     bytes=cm->lr[0].sectors*0x200;
                    }
                goto BSL01;
             default: goto BSL02;
             case FTXEC:
                if(!(c->cflags&STANDALONE)){goto BSL02;}
                break;
            }
        }

//    if(checkaddressing(c->linear))
//        {
//         displaystring("\r\nUnable to access extended memory");return 1;
//        }

    displaystring("\r\nLoading ChaOS ");
    displayversion(c->ver);

//make OSflags (eventually to be replaced by c->rtflags
    UL osflags=6;
    if(bsflags&bsSYM){osflags=7;}

    UL  minsize=c->hdrlen+c->clen+c->dlen+c->fastrelocs*4;
    if(c->file>minsize){osflags|=1;bsflags|=1;}

////for non debug boot, c->linear needs to be below 16Mb for int 15h/87h to work
//    if(!(*bsptflags&ptDBG)){if(c->linear>0x800000){c->linear=0x800000;}}
////(that's horrible, roll on code to get 4Gb selectors working for a cold boot!)

    UL  extmemaddress=c->linear;
    UC* diskbuf=(UC*)0x50000;

//fill in new rtflags field (eventually to replace OSflags)
//and values passed in CBJHEADER
    c->rtflags=osflags;
    c->rtflags|=rtSTACKALREADY;
    makebootinfo(d,bootlba,subdir);
    c->bootinfo=&bi;
    c->rtflags|=rtBOOTINFOVALID;
    c->rmPIC1=rminp(0x21);
    c->rmPIC2=rminp(0xa1);
    c->heap=memtop;

//move MULTI header into linear memory
    if(multi)
        {
         lmemcpy(c->linear-mhdrlen,mbuf,mhdrlen);
         c->rtflags|=rtMULTIXEC;
        }
//move modified header into boot space
//    lmemcpy(c->linear,buf,512);
//    rmmemcpy(c->linear,buf,512);
    extmemaddress+=512;
    bytes-=512;                     //to reduce sectors to read by 1
    lba++;

//    extmove((VD*)0x100000,buf,512);
//    rmmemcpy((VD*)0x100000,buf,512);

    UL  sectors=(bytes+511)/512;    //Note bytes is now file size less header
//    UL  spt2=bs->sectors_per_track*2;
    UL  spt2=bs->sectors_per_track;
    UL  readsectors;
    UL  n;

    UI  esq,diq,axq,addrq;
    UL  addr;


    if(!(bsflags&bsNOGR)){loadgraphic();}

    if(!grloaded){displaystring("->");}

    while(sectors)
        {
         readsectors=spt2-(lba%spt2);
         if(readsectors>sectors){readsectors=sectors;}
         if(bsread(bsdrv,lba,readsectors,diskbuf,quiet))
            {
//             for(n=0;n");}
        }
    if(!grloaded){displaystring("\r\nBoot Image loaded successfully");}
    else         {esq=0;}

    asm
        {
         push   es
         mov    ax,0x4f0a
         sub    bx,bx
         int    0x10
         mov    bx,es
         pop    es
         cmp    ax,0x4f
         jnz    NOVESABANK     //no vb extensions

         mov    esq,bx
         mov    axq,ax
         mov    diq,di

         push   es
         mov    es,bx
         mov    ax,es:[di]
         pop    es
         mov    addrq,ax
         mov    vflags,2
        }

    UL  pmaddr=esq;
    pmaddr<<=4;
    pmaddr+=diq;
    pmaddr+=addrq;
    pmvesabank=(VD(*)(VD))pmaddr;

NOVESABANK:

    asm
        {
         mov    ax,0x4f00
         mov    edi,bsendptr
         mov    dp [di],'2EBV'
         int    0x10            //VBE get controller info, 'VBE2' set
         cmp    ax,0x004f
         jnz    NOVESA
         add    di,0x200
         mov    ax,0x4f01
         mov    cx,0x101
         sub    bx,bx
         sub    dx,dx
         int    0x10            //VBE get mode info
         cmp    ax,0x004f
         jnz    NOVESA
        }

    bsflags|=bsVESADEV;

    if(bsflags&bsVESA)
        {
//         UL  vkey=getkey()&0xff;
//         if(vkey==0x1b){goto NOVESA;}
         asm
            {
             mov    ax,0x4f00
             mov    edi,bsendptr
             mov    dp [di],'2EBV'
             int    0x10            //VBE get controller info, 'VBE2' set
             cmp    ax,0x004f
             jnz    NOVESA
             add    di,0x200
             mov    ax,0x4f01
             mov    cx,vesamode
             int    0x10            //VBE get mode info
             cmp    ax,0x004f
             jnz    NOVESA
             mov    ax,0x4f02
             mov    bx,vesamode
             or     bx,0x4000
             sub    cx,cx
             sub    dx,dx
//             mov    bx,0x101
             int    0x10            //VBE set mode
             cmp    ax,0x004f
             jnz    NOVESA
             add    di,0x100
             mov    ax,0x4f09
             mov    bx,0x0001
             mov    cx,0x0100
             mov    dx,0x0000
             int    0x10            //VBE get palette data
             cmp    ax,0x004f
             jnz    NOVESA
             or     vflags,1
            }
        }
    else
        {
         asm
            {
             NOVESA:
//             mov    ax,vmode
//             int    0x10
             mov    vflags,0
            }
        }

//    UL  m=0x100000;            //wait till keyboard clears
//    while(m--);

    UL entrypoint=c->boot+c->linear;
    c->vmstart=3;
    c->vmcur=vmode;
    UL osbase=c->linear;

//move modified header into boot space
    lmemcpy(c->linear,buf,512);

asm{cli}
    picsoff();
    gateA20();
    UL  m=0x100000;            //wait till keyboard clears
    while(m--);
asm
    {
     cli
     mov    ebx,osbase
     mov    edx,bytes
     mov    eax,entrypoint
     sub    esi,esi
     mov    si,&pmjump      //NB this works only if code is executing
     mov    dp [esi],eax    //in first 64k of memory

     lgdt   fword rmGDTboot
     lidt   fword rmIDTboot

     sub    edi,edi         //DOS psp is zero for native bootstrap
     sub    esi,esi
     sub    eax,eax
     sub    ecx,ecx

//     mov    ax,ss
//     shl    eax,4
//     add    eax,0x70000
//     add    esp,eax

     mov    eax,0x7f800
     mov    esp,eax

     mov    eax,osflags     //OSflags to DOSLOAD|BIOSBOOT
//     or     eax,rtSTACKALREADY

//     mov    esp,0x80000
//     sub    ebp,ebp

//move real-mode stack so far to
//     mov    ecx,0x7c00
//     sub    ecx,esp
//     shr    ecx,2
//     mov    edi,0x7fffc
//     mov    esi,0x7bfc
//     std
//     ea32
//     rep    movsd
//     cld
//     mov    esp,edi

     mov    esi,cr0
     or     esi,1
     mov    cr0,esi

//     jmp    @F
//@@:

     op 0xeb 0x00

     mov    si,DS32
     mov    es,si
     mov    ss,si
     mov    ds,si
     mov    fs,si
     mov    gs,si

//     op 0x67 0x66 0xea
     op 0x66 0xea
pmjump:
     op 0x90 0x90 0x90 0x90 CS32 0x00

    }
}

VD  interrupt int03(RMIREGS i)
{
    UL  _ds,_es,_ss;
    pdesc gdtc;
    pdesc idtc;
asm
    {
     sub    eax,eax
     mov    ax,ds
     mov    _ds,eax
     mov    ax,es
     mov    _es,eax
     mov    ax,ss
     mov    _ss,eax
     ea32
     sgdt   fp gdtc
     ea32
     sidt   fp idtc
    }
    displaystring("\r\nEAX=");
    outhxdword(i.eax,8);
    displaystring("EBX=");
    outhxdword(i.ebx,8);
    displaystring("ECX=");
    outhxdword(i.ecx,8);
    displaystring("EDX=");
    outhxdword(i.edx,8);
    displaystring("ESI=");
    outhxdword(i.esi,8);
    displaystring("EDI=");
    outhxdword(i.edi,8);
    displaystring("\r\nEBP=");
    outhxdword(i.ebp,8);
    displaystring("ESP=");
    outhxdword(i.esp,8);
    displaystring("FLG=");
    outhxdword(i.flags,4);
    displaystring("CS=");
    outhxdword(i.cs,4);
    displaystring("IP=");
    outhxdword(i.ip,4);
    displaystring("\r\nDS=");
    outhxdword(_ds,4);
    displaystring("ES=");
    outhxdword(_es,4);
    displaystring("SS=");
    outhxdword(_ss,4);
    displaystring("GDT=");
    outhxdword(gdtc.linadd,8);
    displaystring(",");
    outhxdword(gdtc.limit,4);
    displaystring("\r\nIDT=");
    outhxdword(idtc.linadd,8);
    displaystring(",");
    outhxdword(idtc.limit,4);

    if(bsflags&bsKEY){getkey();}
}

VD  rminitRMDBG(VD)
{
//brk
//    UL* vptr=(UL*)0x0c;
    UL* vptr=(UL*)4*3;
//    UL* vptr=(UL*)4*0x80;
    UL n=(UL)int03;
    *vptr=n;

//asm {
//     lidt   fword IDTreal
//    }
}
VD  rmmakeTrapGate(SYSDESC32* d,UL sel,UL off)
{
    d->attribute=0x8f00;        //Programming the 80386 Crawford/Gelsinger P460
//    d->attribute=0x8e00;        //Programming the 80386 Crawford/Gelsinger P460
    d->offsetloword=off;
    d->offsethiword=off>>16;
    d->selector=sel;
}
VD  rminitIDT(VD)
{
    SYSDESC32* i=(SYSDESC32*)0x8000;
    UL n;
    for(n=0;n<256;n++)
        {
         rmmakeTrapGate(i,8,(UL)&rmDUMMYIRET);
         i++;
        }
}
VD  dispreturn(VD)
{
    displaystring("Back in the ChaOS bootstrap -- press non-shift key to boot again\r\n");
    getkey();
}
VD  rmpmrm(VD)
{
//this is a golden nugget
//RM->PM->RM sequence, to load data selectors with
//4Gb segment limits
asm
    {
     lgdt   fword rmGDTboot

     push   ds
     push   es
     push   fs
     push   gs


     mov    esi,cr0
     or     esi,1
     mov    cr0,esi

     op 0xeb 0x00

     mov    si,DS32
     mov    es,si
//     mov    ss,si
     mov    ds,si
     mov    fs,si
     mov    gs,si

     mov    esi,cr0
     and    esi,not 1
     mov    cr0,esi

     op 0xeb 0x00

     pop    gs
     pop    fs
     pop    es
     pop    ds

    }
}
//VD  rmpmrm(VD)
//{
////RM->PM->RM sequence, to load data selectors with
////4Gb segment limits
//
//asm
//    {
//     cli
//
//     mov    si,&R01+3           //PM CS is 0x18, 16bit code selector base 0
//     mov    ax,0x18
//     mov    [si],ax             //patch this into pre-fetch purge jump
//
//     mov    si,&R02+3           //patch CS into
//     mov    ax,cs               //real-mode pre-fetch purge jump
//     mov    [si],ax
//     dec    si
//     dec    si
//     sub    wp [si],bmLOAD
//
//     mov     wp spsav1,sp
//     mov     wp sssav1,ss
//
//     lgdt   fword rmGDTboot
////     lidt   fword rmIDTboot
//
//     mov    esi,1
//     mov    cr0,esi
//
////     op     eb 00
////     jmp    rmpm01
////rmpm01:
//#pragma pmode16
//R01::
//    jmp     far RPR01
//RPR01:
//
//     mov    di,DS32
//     mov    es,di
//     mov    ss,di
//     mov    ds,di
//     mov    fs,di
//     mov    gs,di
//
//     dec    si
//     mov    cr0,esi
//
////     op     eb 00
////     jmp    rmpm02
////rmpm02:
//#pragma rmode16
//R02::
//    jmp     far RPR02
//RPR02:
//
//     mov     ax,0
//     mov     ds,ax
//     mov     es,ax
//     lss     sp,dp spsav1
//
//     sti
//    }
//}


//pmrm switch section
//new version using self-modifying code to patch far jumps used to purge
//processor pre-fetch after mode switch; 16-bit PM code selector is passed in
//in register CS by the far call which gets us here;
//data selector is ASSUMED to be code+0x10
//the only other thing which needs to be passed in is the real mode CS;
//at the moment this is zero, as this loader is located in the first 64k of
//low memory, so by default the code is correct, but if this switch design
//is located elsewhere, then segCS needs to be patched for the line:
//
//jmp   far L02

#asm
#pragma align 16
PMRM::
    cli
    jmp     L00
UC  rmstack[RMSTACKSIZE]
DPMIREGS d
RMINFO   r
L00:
    pushad                  //save general registers
    push    ds
    push    es
    push    fs
    push    gs              //save segment registers

    mov     bx,cs           //get CS
    add     bx,0x10         //pm16 data selector MUST be at CS+0x10 in the GDT
    mov     ds,bx           //make data addressable
    mov     r.pmss,ss
    mov     r.pmesp,esp     //save PM stack pointer
    mov     ss,bx           //load 64k selector into SS
                            //(otherwise RM stack will malfunction)

    mov     si,&L02a            //8 nops below are replaced by
    mov     ebx,dp r.codebyte   //the code bytes passed in in RMINFO
    mov     [si],ebx
    mov     ebx,dp r.codebyte+4
    mov     [si+4],ebx

    mov     si,&J01+3           //real-mode CS is passed by caller in EAX
    mov     [si],ax             //patch this into real-mode pre-fetch purge jump

    mov     si,&J02+3           //patch CS into
    mov     ax,cs               //protected-mode pre-fetch purge jump
    mov     [si],ax

    mov     bl,byte r.rmPIC1   //rm PIC masks passed in RMINFO
    mov     bh,byte r.rmPIC2   //rm PIC masks passed in RMINFO

    mov     si,ds
    mov     es,si
    mov     fs,si
    mov     gs,si           //load segment registers with 64k segment limit

//    jmp     L02

    mov     eax,cr0
    and     eax,~FLGPE
    mov     cr0,eax         //processor is now back in real mode, I think

#pragma rmode16
J01::
    jmp     far L01
L01:
    sub     esp,esp
    mov     ax,cs
    mov     ss,ax
    lea     sp,d        //stack switch: sp points to DPMIREGS

    mov     al,bl
//    or      al,2        //inhibit kbd
    out     0x21,al
    mov     al,bh
    out     0xa1,al     //set rmPIC masks as passed in RMINFO

    popad
    popf
    pop     es
    pop     ds
    pop     fs
    pop     gs          //load registers from DPMIREGS

    sti
L02a:
    nop
    nop
    nop
    nop
    nop
    nop
    nop
    nop

    push    gs
    push    fs
    push    ds
    push    es
    pushf
    pushad                  //store return registers back in DPMIREGS

    mov     eax,cr0
    or      eax,FLGPE
    mov     cr0,eax         //processor back to protected mode

#pragma pmode16
J02::
    jmp     far L02
L02:
    mov     ax,cs           //DS is CS+0x10
    add     ax,0x10
    mov     ds,ax           //make data addressable...
    lss     esp,fp r.pmesp  //to retrieve saved PM stack pointer
L03:

    pop     gs
    pop     fs
    pop     es
    pop     ds

//    op      0xcc

    popad

    op      0x66
    retf                    //long retf
#endasm

//rmpm switch section
//new version using self-modifying code to patch far jumps used to purge
//processor pre-fetch after mode switch; 16-bit PM code selector is set
//above to be 0x68, but eventually will be passed in the MODESWITCHES
//structure by the caller
//PM code seg:off32 to jump to is placed in rmr.ss:rmr.esp prior to using
//this switch

#asm
#pragma align 16
#pragma rmode16
RMPM::
    cli
    jmp     RML01
UC  rmrmstack[RMSTACKSIZE]
DPMIREGS rmd
RMINFO   rmr
RML01:
    mov     cs:rmd.ss,ss
    mov     cs:rmd.sp,sp

    push    cs
    pop     ss
    lea     sp,rmd.ip   //stack switch

    push    gs          //store registers state in DPMIREGS
    push    fs
    push    ds
    push    es
    pushf
    pushad

    push    cs
    pop     ds

    lea     bp,rmd
    les     di,dp rmd.sp    //point es:di to caller stack
    mov     ax,es:[di]      //lift caller ip
    mov     [bp+0x2a],ax    //store
    mov     ax,es:[di+2]    //lift caller cs
    mov     [bp+0x2c],ax    //store
    mov     ax,es:[di+4]    //lift caller flags
    mov     [bp+0x20],ax    //store
//DPMIREGS register state save now complete

//move pmss:pmesp pointer into code stream for jump to PM
    mov     si,&rmpmjmp
    mov     eax,dp rmr.pmesp
    mov     [si],eax
    mov     ax,dp rmr.pmss
    mov     [si+4],ax
//store address of return into this thunk in RMINFO
    mov     si,&RML03
    mov     rmr.rmoff,si
    mov     si,cs
    mov     rmr.rmseg,si

    mov     eax,cr0
    or      eax,FLGPE
    mov     cr0,eax         //processor back to protected mode

    mov     ax,ss           //take note of current real seg
    shl     eax,4

    jmp     RML02
RML02:                      //clear pipeline

                            //load ChaOS segment registers
    mov     si,DS32     ; si is 4Gb linear data seg
    mov     ss,si       ; load ss
    mov     fs,si       ;  "   fs
    mov     gs,si       ;  "   gs
    mov     es,si       ;  "   es
    mov     ds,si       ;  "   ds

//    mov     ax,sp
//    mov     esp,eax
    mov     esp,0x60000     //place stack for RM debugger here for now

    op      0x66 0xea       //jump to protected mode here
rmpmjmp:
UL  jmpoff
UI  jmpsel

//return from protected mode here
#pragma pmode16
RML03:
    op  0x66
    jmp far RML03a
RML03a:
    mov     ax,DS16         //reload PM seg based at linear address 0
    mov     ss,ax           //ss MUST be loaded with such a 64k selector
//    mov     ds,ax         //before switch to real mode, otherwise
//    mov     es,ax         //RM stack won't work and system hangs
    mov     fs,ax
    mov     gs,ax

    mov     eax,cr0
    and     eax,~FLGPE
    mov     cr0,eax         //processor back to real mode

#pragma rmode16
    jmp far RML04
RML04:

//begin restoration of register state from DPMIREGS
//(CS is current the only valid register)

    mov     ax,cs
    mov     ss,ax
    mov     fs,ax
    mov     gs,ax
    mov     ds,ax

    mov     ss,ax
    lea     sp,rmd          //switch to rmrmstack

    lea     bp,rmd
    les     di,dp rmd.sp    //point es:di to caller stack

                            //build IRET return frame
    mov     ax,[bp+0x2a]    //get saved ip
    mov     es:[di],ax      //update caller stack
    mov     ax,[bp+0x2c]    //get saved cs
    mov     es:[di+2],ax    //update caller stack
    mov     ax,[bp+0x20]    //get saved flags
    or      ax,0x200
    mov     es:[di+4],ax    //update caller stack

    popad
    add     sp,2            //flags will be restored by the IRET below!
    pop     es
    pop     ds
    pop     fs
    pop     gs
                            //important! clear upper 16 bits of PM stack pointer
    sub     esp,esp         //(in case 32-bit stack frame is used in real mode)
    mov     ss,cs:rmd.ss
    mov     sp,cs:rmd.sp


    iret

#endasm

//rxpm switch section
//identical in function to rmpm, this extra switch is intended to provide
//a real mode GP fault handler
//just places a byte 0x13 at 0000:8000 to indicate its been triggered

#asm
#pragma align 16
#pragma rmode16
RXPM::
    cli
    jmp     RXL01
UC  rxrmstack[RMSTACKSIZE]
DPMIREGS rxd
RMINFO   rxr
RXL01:
    mov     cs:rxd.ss,ss
    mov     cs:rxd.sp,sp

    push    cs
    pop     ss
    lea     sp,rxd.ip   //stack switch

    push    gs          //store registers state in DPMIREGS
    push    fs
    push    ds
    push    es
    pushf
    pushad

    push    cs
    pop     ds

    lea     bp,rxd
    les     di,dp rxd.sp    //point es:di to caller stack
    mov     ax,es:[di]      //lift caller ip
    mov     [bp+0x2a],ax    //store
    mov     ax,es:[di+2]    //lift caller cs
    mov     [bp+0x2c],ax    //store
    mov     ax,es:[di+4]    //lift caller flags
    mov     [bp+0x20],ax    //store
//DPMIREGS register state save now complete

//move pmss:pmesp pointer into code stream for jump to PM
    mov     si,&rxpmjmp
    mov     eax,dp rxr.pmesp
    mov     [si],eax
    mov     ax,dp rxr.pmss
    mov     [si+4],ax
//store address of return into this thunk in RMINFO
    mov     si,&RXL03
    mov     rxr.rmoff,si
    mov     si,cs
    mov     rxr.rmseg,si

    mov     byte [8000],0x13

    mov     eax,cr0
    or      eax,FLGPE
    mov     cr0,eax         //processor back to protected mode

    mov     ax,ss           //take note of current real seg
    shl     eax,4

    jmp     RXL02
RXL02:                      //clear pipeline

                            //load ChaOS segment registers
    mov     si,DS32     ; si is 4Gb linear data seg
    mov     ss,si       ; load ss
    mov     fs,si       ;  "   fs
    mov     gs,si       ;  "   gs
    mov     es,si       ;  "   es
    mov     ds,si       ;  "   ds

//    mov     ax,sp
//    mov     esp,eax
    mov     esp,0x60000     //place stack for RM debugger here for now

    op      0x66 0xea       //jump to protected mode here
rxpmjmp:
UL  rxjmpoff
UI  rxjmpsel

//return from protected mode here
#pragma pmode16
RXL03:
    op  0x66
    jmp far RXL03a
RXL03a:
    mov     ax,DS16         //reload PM seg based at linear address 0
    mov     ss,ax           //ss MUST be loaded with such a 64k selector
//    mov     ds,ax         //before switch to real mode, otherwise
//    mov     es,ax         //RM stack won't work and system hangs
    mov     fs,ax
    mov     gs,ax

    mov     eax,cr0
    and     eax,~FLGPE
    mov     cr0,eax         //processor back to real mode

#pragma rmode16
    jmp far RXL04
RXL04:

//begin restoration of register state from DPMIREGS
//(CS is current the only valid register)

    mov     ax,cs
    mov     ss,ax
    mov     fs,ax
    mov     gs,ax
    mov     ds,ax

    mov     ss,ax
    lea     sp,rxd          //switch to rxrmstack

    lea     bp,rxd
    les     di,dp rxd.sp    //point es:di to caller stack

                            //build IRET return frame
    mov     ax,[bp+0x2a]    //get saved ip
    mov     es:[di],ax      //update caller stack
    mov     ax,[bp+0x2c]    //get saved cs
    mov     es:[di+2],ax    //update caller stack
    mov     ax,[bp+0x20]    //get saved flags
    or      ax,0x200
    mov     es:[di+4],ax    //update caller stack

    popad
    add     sp,2            //flags will be restored by the IRET below!
    pop     es
    pop     ds
    pop     fs
    pop     gs
                            //important! clear upper 16 bits of PM stack pointer
    sub     esp,esp         //(in case 32-bit stack frame is used in real mode)
    mov     ss,cs:rxd.ss
    mov     sp,cs:rxd.sp


    iret

#endasm

#pragma align 4             //pragmas to correctly finish compilation
#pragma rmode16
UL  getmemtop(VD)
{
    UL  memt=0;
//first call int 0x15/0x88, as a banker, maximum memory report 64Mb less a bit
asm
    {
     sub    eax,eax
     mov    ah,0x88
     int    0x15
     mov    mt8800,ax
    }
//try int 0x15/0xe801, for later BIOS
asm
    {
     sub    eax,eax
     mov    ax,0xe801
     int    0x15
     jc     @F
     mov    mte801lo,ax
     mov    mte801hi,bx
@@:
    }
    if(mte801hi)
        {
         memt=mte801hi<<16;  //memtop is 64k aligned
        }
    else
        {
         memt=mt8800<<10;    //memtop is 1k aligned
        }
    return memt;
}

VD  main(VD)
{
//brk
    UL* rmld=&rmlastdata;
    rmld++;*rmld='DNE!';
    bsendptr=(UL)rmld+4;
    int10=*(UL*)0x40;

    gateA20();          //A20 line on via kbd controller, port92 set to 2
    rmoutp(0x70,0x80);  //NMI off

    rminitIDT  ();      //do this or rmpmrm() crashes
    rmpmrm();           //set rm 4Gb seg limits

//  if(!(*bsptflags&ptDBG)){rmpmrm();}
    if(   bsflags&bsTRC )  {rminitRMDBG();}

    CH  fname[12];
    DOSDIRENTRY* dir=(DOSDIRENTRY*)bmROOT;
    DOSDIRENTRY* d;
//    CH* osname=(CH*)START+9;
//    CH* osname=(CH*)n1;
//    rmmemcpy(fname,osname,11);
    lmemcpy(fname,osname,11);
    fname[11]=0;
    UL  lba,key;
    UC  buf[512];
    CBJHEADER* c=(CBJHEADER*)buf;
    memtop=getmemtop();

    UL n;
//look first for a CHAOS.DIR subdirectory in the first sector of root dir
    for(n=0;n<16;n++)
        {
         if(!rmmemcmp(dir,osname,11))
            {
             if(dir->attr&ftSUBDIR)
                {
                 lba=clustertolba(dir);
                 if(bsread(bsdrv,lba,1,(UC*)0x9000,1))
                    {
                     displaystring("\r\nError reading boot directory");
                     goto ERROR;
                    }
                 dir=d=(DOSDIRENTRY*)0x9000;
                 d+=2;  //skip . and .. entries
                 for(n=0;n<9;n++)
                    {
                     if(d->name[0]&&(!(d->name[0]&0x80)))
                        {
                         displaystring("\r\n");
                         outdword(n+1);
                         displaystring("\x29\x20");
                         outstrn(d->name,11);
                         displaystring("  ");
                         lba=clustertolba(d);
                         if(!bsread(bsdrv,lba,1,buf,1))
                            {
                             crypt=0;dcctr=0;
                             if(!verify(c))
                                {
                                 if(c->ftype==ftMULTI){displaystring("MULTIXEC");}
                                 else
                                    {
                                     outstrn(c->processname+1,15);
                                     displaystring("  ");
                                     displayversion(c->ver);
                                    }
                                }
                            }
                        }
                     d++;
                    }
                 displaystring("\r\nEnter number of image to boot");
                 displaystring("\r\n   -return to partition table");
                 displaystring("\r\n -boot root image");
                 L01:
                 key=getkey()&0xff;
                 if(key==0x1b) {goto ERROR;}
                 if(key==0x0d) {goto L02;}
                 if((key<0x31)&&(key>0x39)){goto L01;}
                 d=dir+(key+2-0x31);
                 if(d->name[0]&&(!(d->name[0]&0x80)))
                    {
                     lba=clustertolba(d);
                     if(!bsread(bsdrv,lba,1,buf,1))
                        {
                         crypt=0;dcctr=0;
                         if(verify(c)){goto L01;}
                         bootstrap(d,1);
                         displaystring("\r\nCouldn't load ");
                         displaystring(d->name);
                         goto ERROR;
                        }
                    }
                 goto L01;
                 //for now, just jump to search first 8 entries of subdirectory
                 dir=(DOSDIRENTRY*)0x9000;
                 goto L02;
                }

             bootstrap(dir,0);   //NB..a successful boot will never return
             displaystring("\r\nCouldn't load ");
             displaystring(fname);
             goto ERROR;
            }
         dir++;
        }
//...or look for a CHAOS standalone file in the first 16 entries in root
L02:
    dir=(DOSDIRENTRY*)bmROOT;
    for(n=0;n<16;n++)
        {
         if(!rmmemcmp(dir,osname,8))
            {
             bootstrap(dir,0);   //NB..a successful boot will never return
             displaystring("\r\nCouldn't load ");
             displaystring(fname);
             goto ERROR;
            }
         dir++;
        }

    displaystring(fname);
    displaystring(" not found...");
ERROR:
    displaystring("\r\n...Press any non-shift key to retry boot\r\n");
    getkey();
}