02d6462000-05-29Per Hedbor ; depends on nasm global image_mult_buffer_mmx_x86asm global image_mult_buffers_mmx_x86asm
2a71d82000-06-02Per Hedbor 
02d6462000-05-29Per Hedbor  global image_add_buffers_mmx_x86asm global image_add_buffer_mmx_x86asm
2a71d82000-06-02Per Hedbor 
02d6462000-05-29Per Hedbor  global image_sub_buffer_mmx_x86asm
a178cd2000-06-03Per Hedbor  global image_clear_buffer_mmx_x86asm_eq global image_clear_buffer_mmx_x86asm_from
2a71d82000-06-02Per Hedbor  global image_get_cpuid ; do not use the function from mmxlib, since that might be ; unavailable even when mmx is available. ; ; void image_get_cpuid(int oper,int *cpuid1,int *cpuid2,int *cpuid3,int *a ) ; image_get_cpuid:
6662112011-02-16Per Hedbor  push ebp mov ebp, esp
2a71d82000-06-02Per Hedbor  pushf pop eax mov ecx, eax xor eax, 200000h push eax popf pushf pop eax xor ecx, eax test ecx, 200000h jnz .ok xor eax,eax mov [ebp+12], eax ; cpuid not supported mov [ebp+16], eax mov [ebp+20], eax leave ret
6662112011-02-16Per Hedbor .ok:
2a71d82000-06-02Per Hedbor  push ebx mov eax, [ebp+8] cpuid push ebx mov ebx, [ebp+24] mov [ebx], eax pop ebx mov eax, [ebp+12] mov [eax], ebx mov eax, [ebp+16]
6662112011-02-16Per Hedbor  mov [eax], edx
2a71d82000-06-02Per Hedbor  mov eax, [ebp+20] mov [eax], ecx pop ebx leave ret
a178cd2000-06-03Per Hedbor ; Clear an image to a solid color
6662112011-02-16Per Hedbor ; void image_clear_buffer_mmx_x86asm_eq( char *d,
a178cd2000-06-03Per Hedbor ; int npixels_div_8, ; int colv ); image_clear_buffer_mmx_x86asm_eq:
6662112011-02-16Per Hedbor  push ebp mov ebp, esp
a178cd2000-06-03Per Hedbor  mov ecx, [ebp+12] mov eax, [ebp+16] ;; now in eax: 00 xx shl eax, 8 or eax, [ebp+16] ;; now in mm0: xx xx pinsrw mm0, eax, 0 pinsrw mm0, eax, 1 pinsrw mm0, eax, 2 pinsrw mm0, eax, 3 mov eax, [ebp+8] align 32 .loop: movq [eax], mm0 add eax, 8 movq [eax], mm0 add eax, 8 movq [eax], mm0 add eax, 8 ; 8 dec ecx jnz .loop emms leave ret image_clear_buffer_mmx_x86asm_from:
6662112011-02-16Per Hedbor  push ebp mov ebp, esp
a178cd2000-06-03Per Hedbor  mov eax, [ebp+8] mov ecx, [ebp+12] movq mm0, [eax] add eax, 8 movq mm1, [eax] add eax, 8 movq mm2, [eax] dec ecx add eax, 8 align 32 .loop: movq [eax], mm0 add eax, 8 movq [eax], mm1 add eax, 8 movq [eax], mm2 add eax, 8 loopnz .loop, ecx emms leave ret
02d6462000-05-29Per Hedbor ; Add two images ;
2a71d82000-06-02Per Hedbor ; void image_add_buffers_mmx_x86asm( char *d,
6662112011-02-16Per Hedbor ; char *s1, char *s2,
2a71d82000-06-02Per Hedbor ; int npixels_mult_3_div_8 ) ;
02d6462000-05-29Per Hedbor image_add_buffers_mmx_x86asm:
6662112011-02-16Per Hedbor  push ebp mov ebp, esp
2a71d82000-06-02Per Hedbor  push ebx mov ebx, [ebp+8] mov eax, [ebp+12] mov edx, [ebp+16] mov ecx, [ebp+20]
02d6462000-05-29Per Hedbor 
a178cd2000-06-03Per Hedbor  align 32
02d6462000-05-29Per Hedbor .loop: movq mm0, [edx] add edx, 8 paddusb mm0, [eax] add eax, 8
2a71d82000-06-02Per Hedbor  movq [ebx],mm0 add ebx, 8
02d6462000-05-29Per Hedbor  loopnz .loop,ecx
6662112011-02-16Per Hedbor 
02d6462000-05-29Per Hedbor  emms
2a71d82000-06-02Per Hedbor  pop ebx
02d6462000-05-29Per Hedbor  leave ret
6662112011-02-16Per Hedbor 
02d6462000-05-29Per Hedbor ; Multiply two images ; Add a fixed rgb value to an image
6662112011-02-16Per Hedbor ; ; void image_mult_buffers_mmx_x86asm( char *d, char *s1, char *s2,
2a71d82000-06-02Per Hedbor ; int npixels_div_4 ) ;
02d6462000-05-29Per Hedbor image_mult_buffers_mmx_x86asm:
6662112011-02-16Per Hedbor  push ebp mov ebp, esp
2a71d82000-06-02Per Hedbor  push ebx mov ebx, [ebp+8] mov eax, [ebp+12] mov edx, [ebp+16] mov ecx, [ebp+20]
02d6462000-05-29Per Hedbor  pxor mm4,mm4
a178cd2000-06-03Per Hedbor  align 32
02d6462000-05-29Per Hedbor .loop: movd mm0, [eax]
2a71d82000-06-02Per Hedbor  add eax, 4
02d6462000-05-29Per Hedbor  punpcklbw mm0,mm4
2a71d82000-06-02Per Hedbor  movd mm1, [edx]
02d6462000-05-29Per Hedbor  add edx, 4
2a71d82000-06-02Per Hedbor 
02d6462000-05-29Per Hedbor  punpcklbw mm1,mm4
2a71d82000-06-02Per Hedbor 
02d6462000-05-29Per Hedbor  pmullw mm0,mm1 psrlw mm0, 8 packuswb mm0,mm0
2a71d82000-06-02Per Hedbor  movd [ebx],mm0 add ebx, 4
02d6462000-05-29Per Hedbor  loopnz .loop,ecx emms
2a71d82000-06-02Per Hedbor  pop ebx
02d6462000-05-29Per Hedbor  leave ret ; Subtract a RGB-value value to an image ;
2a71d82000-06-02Per Hedbor ; void image_sub_buffer_mmx_x86asm( char *d, char *source, int npixels,
02d6462000-05-29Per Hedbor ; int rgbr, int gbrg, int brgb ) ;
2a71d82000-06-02Per Hedbor ; edx dest ebp+8 ; eax source ebp+12
02d6462000-05-29Per Hedbor ; mm0 sourcedata [eax]
2a71d82000-06-02Per Hedbor ; ecx numpixels ebp+16 ; mm1 mult1 <rgbr> ebp+20 ; mm2 mult2 <gbrg> ebp+24 ; mm3 mult3 <brgb> ebp+28
02d6462000-05-29Per Hedbor ; mm4 null
2a71d82000-06-02Per Hedbor ; ; This funciton can be changed to do twice the amount of work per ; instruction, but it's all memory bound anyway, so there is no ; significant performance gain. ;
02d6462000-05-29Per Hedbor image_sub_buffer_mmx_x86asm: enter 0,0
2a71d82000-06-02Per Hedbor  mov edx, [ebp+8] mov eax, [ebp+12]
02d6462000-05-29Per Hedbor 
2a71d82000-06-02Per Hedbor  mov ecx, [ebp+16]
02d6462000-05-29Per Hedbor 
2a71d82000-06-02Per Hedbor  movd mm1,[ebp+20] ; rgb r movd mm2,[ebp+24] ; gb rg movd mm3,[ebp+28] ; b rgb
02d6462000-05-29Per Hedbor  ; r g b r g b r g b r g b r g b r g ... ; 0 4 8 12 16 ; ; int is 0: r b g r ; 4: g r b g ; 8: b g r b
a178cd2000-06-03Per Hedbor  align 32
02d6462000-05-29Per Hedbor .loop:
2a71d82000-06-02Per Hedbor  movd mm0,[eax] add eax,4 psubusb mm0,mm1 movd [edx],mm0 add edx,4
02d6462000-05-29Per Hedbor 
2a71d82000-06-02Per Hedbor  movd mm0,[eax] add eax,4
02d6462000-05-29Per Hedbor  psubusb mm0,mm2
2a71d82000-06-02Per Hedbor  movd [edx],mm0 add edx,4
02d6462000-05-29Per Hedbor  movd mm0,[eax] add eax,4
2a71d82000-06-02Per Hedbor  psubusb mm0,mm3 movd [edx],mm0 add edx,4
02d6462000-05-29Per Hedbor  loopnz .loop,ecx emms leave ret ; Add a RGB-value value to an image ;
2a71d82000-06-02Per Hedbor ; void image_add_buffer_mmx_x86asm( char *d, char *source, int npixels,
02d6462000-05-29Per Hedbor ; int rgbr, int gbrg, int brgb ) ;
2a71d82000-06-02Per Hedbor ; edx dest ebp+8 ; eax source ebp+12
02d6462000-05-29Per Hedbor ; mm0 sourcedata [eax]
2a71d82000-06-02Per Hedbor ; ecx numpixels ebp+16 ; mm1 mult1 <rgbr> ebp+20 ; mm2 mult2 <gbrg> ebp+24 ; mm3 mult3 <brgb> ebp+28
02d6462000-05-29Per Hedbor ; mm4 null
2a71d82000-06-02Per Hedbor ; ; This funciton can be changed to do twice the amount of work per ; instruction, but it's all memory bound anyway, so there is no ; significant performance gain. ;
02d6462000-05-29Per Hedbor image_add_buffer_mmx_x86asm: enter 0,0
2a71d82000-06-02Per Hedbor  mov edx, [ebp+8] mov eax, [ebp+12]
02d6462000-05-29Per Hedbor 
2a71d82000-06-02Per Hedbor  mov ecx, [ebp+16]
02d6462000-05-29Per Hedbor 
2a71d82000-06-02Per Hedbor  movd mm1,[ebp+20] ; rgb r movd mm2,[ebp+24] ; gb rg movd mm3,[ebp+28] ; b rgb
02d6462000-05-29Per Hedbor  ; r g b r g b r g b r g b r g b r g ... ; 0 4 8 12 16 ; ; int is 0: r b g r ; 4: g r b g ; 8: b g r b
a178cd2000-06-03Per Hedbor  align 32
02d6462000-05-29Per Hedbor .loop: movd mm0,[eax] add eax,4
2a71d82000-06-02Per Hedbor  paddusb mm0,mm1 movd [edx],mm0 add edx,4
02d6462000-05-29Per Hedbor  movd mm0,[eax] add eax,4
2a71d82000-06-02Per Hedbor  paddusb mm0,mm2 movd [edx],mm0 add edx,4
02d6462000-05-29Per Hedbor  movd mm0,[eax]
2a71d82000-06-02Per Hedbor  add eax,4
02d6462000-05-29Per Hedbor  paddusb mm0,mm3
2a71d82000-06-02Per Hedbor  movd [edx],mm0 add edx,4
02d6462000-05-29Per Hedbor  loopnz .loop,ecx emms leave ret ; Multiply an image with a fixed rgb value ;
2a71d82000-06-02Per Hedbor ; void image_mult_buffer_mmx_x86asm( char *d, char *source, int npixels,
02d6462000-05-29Per Hedbor ; int rgbr, int gbrg, int brgb ) ; ; eax address ebp+8 ; mm0 sourcedata [eax] ; ecx numpixels ebp+12 ; mm1 mult1 <rgbr> ebp+16 ; mm2 mult2 <gbrg> ebp+20 ; mm3 mult3 <brgb> ebp+24 ; mm4 null image_mult_buffer_mmx_x86asm: enter 0,0
2a71d82000-06-02Per Hedbor  mov edx, [ebp+8] mov eax, [ebp+12] mov ecx, [ebp+16]
02d6462000-05-29Per Hedbor  pxor mm4,mm4
2a71d82000-06-02Per Hedbor  movd mm1,[ebp+20] ; rgb r
02d6462000-05-29Per Hedbor  punpcklbw mm1, mm4
2a71d82000-06-02Per Hedbor  movd mm2,[ebp+24] ; gb rg
02d6462000-05-29Per Hedbor  punpcklbw mm2, mm4
2a71d82000-06-02Per Hedbor  movd mm3,[ebp+28] ; b rgb
02d6462000-05-29Per Hedbor  punpcklbw mm3, mm4 ; r g b r g b r g b r g b r g b r g ... ; 0 4 8 12 16 ; ; int is 0: r b g r ; 4: g r b g ; 8: b g r b
a178cd2000-06-03Per Hedbor  align 32
02d6462000-05-29Per Hedbor .loop: movd mm0,[eax]
2a71d82000-06-02Per Hedbor  add eax,4
02d6462000-05-29Per Hedbor  punpcklbw mm0,mm4 pmullw mm0,mm1 psrlw mm0,8 packuswb mm0,mm0
2a71d82000-06-02Per Hedbor  movd [edx],mm0 add edx,4
02d6462000-05-29Per Hedbor  movd mm0,[eax]
2a71d82000-06-02Per Hedbor  add eax,4
02d6462000-05-29Per Hedbor  punpcklbw mm0,mm4 pmullw mm0,mm2 psrlw mm0,8 packuswb mm0,mm0
2a71d82000-06-02Per Hedbor  movd [edx],mm0 add edx,4
02d6462000-05-29Per Hedbor  movd mm0,[eax]
2a71d82000-06-02Per Hedbor  add eax,4
02d6462000-05-29Per Hedbor  punpcklbw mm0,mm4 pmullw mm0,mm3 psrlw mm0,8 packuswb mm0,mm0
2a71d82000-06-02Per Hedbor  movd [edx],mm0 add edx,4
02d6462000-05-29Per Hedbor  loopnz .loop,ecx emms leave ret