;                   DETECTPR.ASM                         Agner Fog 2004-05-30
;
; This file contains two functions which are useful for detecting which
; microprocessor a software program is running on:
;
; DetectProcessor
; ===============
; This function returns a 32-bit integer containing information about the
; type of microprocessor and the available instruction set. This function
; also checks if XMM registers are enabled by the operating system.
; Each feature is indicated by a bit in the returned value.
;
; ProcessorName
; =============
; This function produces a zero-terminated ASCII string containing a name
; for the microprocessor in Human readable format.
;
; These functions can be called from high-level language code as well as
; from assembly in 32-bit operating systems such as Windows and Linux.
; Can also be assembled in 16-bit mode.
;
; Further definition of the functions is given below.
;
; The method for detecting whether XMM instructions are enabled by the 
; operating system is different from the method recommended by Intel.
; The method used here has the advantage that it is independent of the 
; ability of the operating system to catch invalid opcode exceptions. The
; method used here has been thoroughly tested on many different versions of
; Intel and AMD microprocessors, and is believed to work reliably. For further
; discussion of this method, see my manual "How to optimize for the Pentium 
; family of microprocessors", 2004. (www.agner.org/assem/pentopt.pdf).
; 
;  2003, 2004 GNU General Public License www.gnu.org/copyleft/gpl.html
;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
.686
.xmm
.model flat

PublicAlias MACRO MangledName ; macro for giving a function alias public names
        MangledName label near
        public MangledName
ENDM

.code

; ********** DetectProcessor function **********
; C++ prototype:
; extern "C" int DetectProcessor (void);

; return value:
; The return value is 0 if microprocessor has no CPUID instruction
; bits 0-3 = model
; bits 4-7 = family:  4 for 80486, Am486, Am5x86
;                     5 for P1, PMMX, K6
;                     6 for PPro, P2, P3, Athlon, Duron
;                    15 for P4, Athlon64, Opteron
; bit    8 = vendor is Intel
; bit    9 = vendor is AMD
; bit   11 = XMM registers enabled by operating system
; bit   12 = floating point instructions supported
; bit   13 = time stamp counter supported
; bit   14 = CMPXCHG8 instruction supported
; bit   15 = conditional move and FCOMI supported (PPro, P2, P3, P4, Athlon, Duron, Opteron)
; bit   23 = MMX instructions supported (PMMX, P2, P3, P4, K6, Athlon, Duron, Opteron)
; bit   24 = FXSAVE / FXRSTOR supported
; bit   25 = SSE instructions supported (P3, P4, Athlon64, Opteron)
; bit   26 = SSE2 instructions supported (P4, Athlon64, Opteron)
; bit   27 = SSE3 instructions supported (forthcoming "Prescott")
; bit   28 = hyperthreading (P4)
; AMD-specific instructions:
; bit   29 = MMX AMD extension
; bit   30 = 3DNow extension instructions
; bit   31 = 3DNow instructions

DetectProcessor PROC NEAR
PUBLIC  DetectProcessor
PublicAlias _DetectProcessor          ; Underscore needed when called from Windows

        push    ebx
        push    esi
        push    edi
        xor     edi, edi         ; will contain result
; detect if CPUID instruction supported by microprocessor:
        pushfd
        pop     eax
        xor     eax, 1 SHL 21    ; check if CPUID bit can toggle
        push    eax
        popfd
        pushfd
        pop     ebx
        xor     eax, ebx
        and     eax, 1 SHL 21
        jnz     DPEND            ; CPUID not supported
        xor     eax, eax
        cpuid                    ; get number of CPUID functions
        test    eax, eax
        jz      DPEND            ; function 1 not supported
        ; determine vendor
        xor     ebx, ebx
        cmp     ch, 't'
        sete    bl               ; 1 if vendor = Intel
        cmp     ch, 'A'
        sete    dl   
        shl     dl, 1            ; 2 if vendor = AMD
        or      bl, dl
        shl     ebx, 8           ; shift vendor info to bit 8-9
        or      edi, ebx         ; EDI now contains vendor information
        mov     eax, 1
        cpuid                    ; get family and features
        shr     eax, 4
        and     eax, 0FFH        ; family and model
        or      edi, eax         ; combine bits in EDI
        and     ecx, 1           ; SSE3 feature
        shl     ecx, 27          ; move to bit 27
        or      edi, ecx         ; combine bits in EDI
        mov     ecx, edx         ; feature flags
        and     ecx, 17FF8000H   ; these feature flags remain in same position
        or      edi, ecx         ; combine bits in EDI
        mov     ecx, edx         ; shuffle feature bits 0, 4, 8 to 12, 13, 14
        and     ecx, 1           ; FPU feature
        shl     ecx, 12-0        ; move to bit 12
        or      edi, ecx         ; combine bits in EDI
        mov     ecx, edx         ; 
        and     ecx, 10H         ; time stamp counter feature
        shl     ecx, 13-4        ; move to bit 13
        or      edi, ecx         ; combine bits in EDI        
        mov     ecx, edx         ; 
        and     ecx, 100H        ; CMPXCHG8 feature
        shl     ecx, 14-8        ; move to bit 14
        or      edi, ecx         ; combine bits in EDI
        test    edi, 200H        ; is it an AMD?
        jz      Not_AMD
        ; Get AMD-specific features
        mov     eax, 80000000H
        cpuid
        cmp     EAX, 80000001H
        jb      Not_AMD
        mov     eax, 80000001H
        cpuid   ; get AMD-specific features into EDX
        mov     eax, edx
        and     eax, 0C0000000H  ; 3DNow instructions support bit 30 - 31
        or      edi, eax         ; combine bits in EDI
        and     edx, 400000H     ; MMX extensions bit 22        
        shl     edx, 29-22       ; move to bit 29
        or      edi, edx         ; combine bits in EDI
Not_AMD:
        ; test OS support for XMM registers
        test    edi, 2000000H    ; SSE support
        jz      no_SSE_test
        test    edi, 1000000H    ; FXSAVE/FXRSTOR support
        jz      no_SSE_test
        smsw    ax               ; read lower part of CR0 without violating privilege
        test    al, 4            ; test if FXSAVE is emulated
        jnz     no_SSE_test        
        push    ebp
        mov     esi, esp         ; save stack pointer
        sub     esp, 200H        ; allocate space for FXSAVE
        and     esp, -10H        ; align by 16
        mov     ebp, esp
IF @WORDSIZE EQ 2
        @BP EQU <bp>             ; don't use upper word of pointer if 16-bit mode
ELSE
        @BP EQU <ebp>
ENDIF
TESTDATA = 0D95A34BEH            ; random test value
TESTPS   = 10CH                  ; position to write TESTDATA = upper part of XMM6 image
        fxsave  [@BP]            ; save FP/MMX and XMM registers
        mov     eax,[@BP+TESTPS] ; read part of XMM6 register
        xor     DWORD PTR [@BP+TESTPS],TESTDATA  ; change value
        fxrstor [@BP]            ; load changed value into XMM6
        mov     [@BP+TESTPS],eax ; restore old value in buffer
        fxsave  [@BP]            ; save again
        mov     ebx,[@BP+TESTPS] ; read changed XMM6 register
        mov     [@BP+TESTPS],eax ; restore old value
        fxrstor [@BP]            ; load restored value into XMM6
        xor     eax, ebx         ; get difference between old and new value
        cmp     eax, TESTDATA    ; test if XMM6 was changed correctly
        sete    al               ; 1 if test successful
        movzx   eax, al          ; clear rest of EAX
        shl     eax, 11          ; move to bit 11
        or      edi, eax         ; combine bits in EDI        
        mov     esp, esi         ; restore ESP
        pop     ebp
no_SSE_test:
DPEND:  
        mov     eax, edi         ; return result bits
IF @WORDSIZE EQ 2
        mov     edx, eax         ; get high word into DX if 16-bit mode
        shr     edx, 16
ENDIF
        pop     edi
        pop     esi
        pop     ebx
        ret
DetectProcessor ENDP


; ********** ProcessorName function **********
; C++ prototype:
; extern "C" void ProcessorName (char * text);

; This function finds the name of the microprocessor. The name is returned
; in the parameter text, which must be a character array of at least 68 bytes.

ProcessorName PROC NEAR
PUBLIC ProcessorName
PublicAlias _ProcessorName            ; Underscore needed when called from Windows
        call    DetectProcessor
        push    ebx
        push    esi
        push    edi
        push    ebp
IF @WORDSIZE EQ 4        
        mov     edi, [esp+20]      ; text pointer
ELSE
        mov     bp, sp             ; 16-bit mode
        movzx   edi, WORD PTR [bp+18] 
        push    ds
        pop     es
ENDIF
        test    eax, eax
        jnz     IDENTIFYABLE
        ; processor has no CPUID
        mov     DWORD PTR [edi], '8408'   ; write text '80486 or lower'
        mov     DWORD PTR [edi+4], 'ro 6'
        mov     DWORD PTR [edi+8], 'wol '
        mov     DWORD PTR [edi+12], 're'  ; end with 0
        jmp     pnend
IDENTIFYABLE:
        mov     esi, eax           ; value returned by DetectProcessor
        mov     eax, 80000000H
        cpuid
        cmp     eax, 80000004H     ; text if extended vendor string available
        jb      no_ext_vendor_string
        mov     ebp, 80000002H
        push    edi
VLOOP:  mov     eax, ebp           ; repeat 3 times to get extended vendor string
        cpuid
        mov     [edi], eax         ; store extended vendor string
        mov     [edi+4], ebx
        mov     [edi+8], ecx
        mov     [edi+12], edx
        add     edi, 16
        inc     ebp
        cmp     ebp, 80000004h
        jbe     VLOOP
        pop     edi
        jmp     get_family_and_model
no_ext_vendor_string:
        ; get short vendor string
        xor     eax, eax
        cpuid
        mov     [edi],ebx          ; store short vendor string
        mov     [edi+4],edx
        mov     [edi+8],ecx
        mov     BYTE PTR [edi+12],0 ; terminate string
get_family_and_model:
        xor     EAX, EAX
        mov     ECX, 48
        cld
        repne   SCASB              ; find end of text
        dec     edi
        mov     DWORD PTR [edi], 'maF '    ; append text ' Family ? Model ?'
        mov     DWORD PTR [edi+4], ' yli'
        mov     DWORD PTR [edi+8], 'oM ?'
        mov     DWORD PTR [edi+12], ' led'
        mov     eax, esi
        shr     eax, 4
        and     eax, 07H            ; family number
        add     al, '0'             ; convert to ASCII
        mov     BYTE PTR [edi+8], al  ; put into string
        mov     eax, esi
        and     eax, 0FH            ; model number
        add     al, '0'             ; convert to ASCII
        cmp     al, '9'
        jna     MOD09
        add     al, 'A'-'9'+1       ; hexadecimal
MOD09:  mov     DWORD PTR [edi+16], eax ; put into string, followed by 0
PNEND:  pop     ebp
        pop     edi
        pop     esi
        pop     ebx
        ret
ProcessorName ENDP
        
END
