Oxygen Basic
Programming => Example Code => Topic started by: Peter on April 20, 2013, 06:59:26 AM
-
Deleted
-
Hi Peter,
nice banana.
BTW your SW relies OpenGL or what?
-
No Emil. SimpleWindow used only the Windows Api.
But 80% are written in machine code, for more fun.
-
But 80% are written in machine code
which ambler you used, is it Oxygen?
also did you make the scale by only asm algorithm?
do you plan to allow rotation?
-
Scaling is very easy.
you are using point to ellipse algorithm , have you tried to apply blur with your algorithm?
-
simpler blur
point = average of sum of these pixels (x,y) & (x+1,y) & (x-1,y) & (x,y+1) & (x,y-1)
you can achieve that as hardware speed by using SEE & SEE2.
-
Hi Peter,
if you want a better result ,
create getPixelRed ,green ,Blue functions and apply the previous algorithm for color component.
E.g
r_avr = R(x,y)+R(x+1,y) ........../ R_NUM
g_avr = G(x,y)+G(x+1,y) ...... /G_Num
B_avr = B(x,Y)+B(x+1,y)........../B_Num
final_Col = r_ave << 16 + g_avr << 8 + B_avr
-
Hi Peter,
I found that , and i think it may help you , still need optimized.
// build "shuffle" bitfield for a shufps instruction
#define SSE_SHUFFLE(a, b, c, d) (a | (b << 2) | (c << 4) | (d << 6))
// Same as C version, just written in SSE2 assembly.
// Note that source has to be 16 byte aligned and has to be multiple of 4 pixels in length.
void horizontal_average_sse2(ARGB *source, ARGB *dest, size_t source_length)
{
__asm
{
mov esi, source // Load source address
mov edi, dest // Load destination address
mov ecx, source_length // Load value of our counter (number of pixels)
main_loop:
xorps xmm2, xmm2 // clear xmm2
movaps xmm0, [esi] // xmm0 <- [p3. p2, p1, p0] { four pixels }
shufps xmm0, xmm0, SSE_SHUFFLE(0, 2, 1, 3) // xmm0 <- [p3, p1, p2, p0] { shuffle so that pixels we need to average are
// in corresponding parts of the register }
movaps xmm1, xmm0 // xmm1 <- [p3, p1, p2, p0] { just make copy }
punpckhbw xmm0, xmm2 // xmm0 <- [W p3.b, W p3.g, W p3.r, W p3. a | W p1.b, W p1.g, W p1.r, W p1.a]
// ^ { zero extend components of p3 and p1 to the word size (so we do not overflow during addition }
punpcklbw xmm1, xmm2 // xmm1 <- [W p2.b, W p2.g, W p2.r, W p2. a | W p0.b, W p0.g, W p0.r, W p0.a]
// ^ { zero extend components of p2 and p0 to words }
paddw xmm0, xmm1 // xmm0 <- xmm0 + xmm1
// ^ { sum xmm0 and xmm1 as a series of 16 bit integers }
psrlw xmm0, 1 // right shift each word in xmm0 by 1 bit (that is, divide by 2)
packuswb xmm0, xmm0 // now pack words back to bytes (bytes are stored in the lower half of the xmm0)
movsd [edi], xmm0 // store lower half of the xmm0 at the destination address, this writes 2 pixels at once
add esi, 16 // move source address 16 bytes (4 pixels)
add edi, 8 // move destination address 8 bytes (2 pixels)
sub ecx, 4 // decrease counter by 4 pixels
jnz main_loop // if counter is > 0, jump to the main_loop label
}
}