These procedures are included in MathUtil.inc.
Half-floats are 16bit floats, useful for storing large amounts of float data with limited precision. Recent graphics hardware (including Intel) can use them for colors, normal maps, and vector arrays.
This is an update of previous work, with additional support for +/-infinities and NANS.
When converting from single to half float, any overflows are converted to infinities.
'HALF-FLOAT
'see IEEE 754
'https://en.wikipedia.org/wiki/IEEE_754-1985
'
'TYPE SIZE SIGN EXPONENT FRACTION
'========================================
'HALF 16 1 5 10
'SINGLE 32 1 8 23
'DOUBLE 64 1 11 52
'EXTENDED 80 1 15 63
function FloatToHalfFloat(sys pf,ph,n) as sys
=============================================
'
'USE OF REGISTERS:
'edx sign transform
'eax exponent transform
'ecx fraction transform
'rsi source pointer
'rdi dest pointer
'
mov rsi,pf
mov rdi,ph
(
dec dword n 'iterator down count
jl exit
mov eax,[rsi]
mov edx,eax
and edx,0x80000000 'hold sign bit only
shr edx,0x10 'shift sign bit down 16
and eax,0x7fffffff 'remove sign bit
mov ecx,eax 'for significand
'
'
'TEST FOR ZERO
(
cmp eax,0
jz fwd nzero
)
'TEST FOR NAN
(
cmp eax,0x7f800000
jle exit
mov eax, 0x7fff 'set NAN
jmp fwd nzero
)
'TEST FOR INFINITY
(
jl exit
mov eax, 0x7c00 'set infinity
jmp fwd nzero
)
shr eax,0x17 'shift exponent down 23
shr ecx,0x0d 'reduce fraction 23 bits to 10 bits
sub eax,0x70 'adjust exponent bias -112 == (15-127)
'
'TEST NEGATIVE EXPONENT BIAS (LOSING PRECISION)
(
jge exit 'exclude zero or positive bias
cmp eax,-10
(
jg exit
mov eax,0 'SET ZERO
jmp fwd nzero
)
xchg ecx,eax
neg ecx 'make positive
shr eax,cl 'downshift fraction
mov ecx,eax
mov eax,0 'zero exponent
)
'
'TEST EXPONENT FOR OVERFLOW
(
cmp eax,0x1f
jle exit
mov eax,0x7c00 'CLAMP INFINITY
jmp fwd nzero
)
shl eax,0x0a 'place exponent 10 bits up
and ecx,0x3ff 'mask significand 10 bits
or eax,ecx 'combine exponent and significand
nzero:
or eax,edx 'combine sign
mov [rdi],ax 'store
add rsi,4 'stride next float
add rdi,2 'stride next half-float
repeat
)
return ph
end function
function HalfFloatToFloat(sys ph,pf,n) as sys
=============================================
'
'USE OF REGISTERS:
'edx sign transform
'eax exponent transform
'ecx significand transform
'rsi source pointer
'rdi dest pointer
'
mov rsi,ph
mov rdi,pf
(
dec dword n 'iterator down count
jl exit
xor eax,eax
mov ax,[rsi]
mov edx,eax
and edx,0x8000 'hold sign bit only
shl edx,0x10 'shift sign bit up 16
and eax,0x7fff 'remove sign bit
mov ecx,eax 'for significand
'
'
'TEST FOR ZERO
(
cmp eax,0
jz fwd nzero
)
'TEST FOR NAN
(
cmp eax,0x7c00
jle exit
mov eax, 0x7fffffff 'set NAN
jmp fwd nzero
)
'TEST FOR INFINITY
(
jl exit
mov eax, 0x7f800000 'set infinity
jmp fwd nzero
)
shr eax,0x0A 'shift exponent down 10
add eax,0x70 'adjust exponent bias +112 == (127-15)
shl eax,23 'shift exponent into final position
and ecx,0x3ff 'mask significand 10 bits
shl ecx,0x0d 'shift significand from 10 bits to 23 bits (13)
'
or eax,ecx 'combine exponent and significand
nzero:
or eax,edx 'combine sign
mov [rdi],eax 'store
add rsi,2 'stride next half-float
add rdi,4 'stride next float
repeat
)
return ph
end function