{-# LANGUAGE TemplateHaskell #-} {-# OPTIONS_GHC -funbox-strict-fields #-} module Main (main) where import Harpy.X86Assembler import qualified Harpy.X86CodeGen as CG import Harpy.CodeGenMonad (CodeGen, newLabel, defineLabel, ensureBufferSize, callDecl, runCodeGen, ) import Foreign (Ptr, FunPtr, Storable, sizeOf, allocaArray, ) import qualified Sound.Frame.Stereo as Stereo import Data.Word (Word32, ) import Data.Int (Int16, ) import qualified System.IO as IO import Prelude hiding (fst, ) asmFill :: CodeGen e s () asmFill = do loopTest <- newLabel loopStart <- newLabel push ecx push ebx mov ebx (Disp 12, esp) mov ecx (Disp 16, esp) mov eax (0 :: Word32) jmp loopTest defineLabel loopStart mov (Ind ebx) ax add eax (1 :: Word32) add ebx (2 :: Word32) dec ecx defineLabel loopTest cmp ecx (0 :: Word32) jne loopStart -- loop loopStart pop ebx pop ecx ret asmSine :: CodeGen e s () asmSine = do loopTest <- newLabel loopStart <- newLabel push ecx push ebx mov ebx (Disp 12, esp) mov ecx (Disp 16, esp) sub esp (4 :: Word32) fldpi -- 440 Hz mov (Ind esp) (880 :: Word32) fimul32 FPTopReg (Ind esp) mov (Ind esp) (44100 :: Word32) fidiv32 FPTopReg (Ind esp) fldz jmp loopTest defineLabel loopStart fld FPTopReg (FPReg 0) fsin mov (Ind esp) (fromIntegral (maxBound :: Int16) :: Word32) fimul32 FPTopReg (Ind esp) fist16p0 (Ind ebx) fadd FPTopReg (FPReg 1) add ebx (2 :: Word32) dec ecx defineLabel loopTest cmp ecx (0 :: Word32) jne loopStart -- loop loopStart add esp (4 :: Word32) pop ebx pop ecx ret asmSaw :: CodeGen e s () asmSaw = do loopTest <- newLabel loopStart <- newLabel push ecx push ebx mov ebx (Disp 12, esp) mov ecx (Disp 16, esp) sub esp (4 :: Word32) mov (Ind esp) (440 :: Word32) fild (Ind esp) mov (Ind esp) (44100 :: Word32) fidiv32 FPTopReg (Ind esp) fldz jmp loopTest defineLabel loopStart -- phase->saw: 1-2*x fld FPTopReg (FPReg 0) mov (Ind esp) (-2 :: Word32) fimul32 FPTopReg (Ind esp) mov (Ind esp) (1 :: Word32) fiadd32 FPTopReg (Ind esp) mov (Ind esp) (fromIntegral (maxBound :: Int16) :: Word32) fimul32 FPTopReg (Ind esp) fist16p0 (Ind ebx) -- fraction (x+inc) fadd FPTopReg (FPReg 1) mov (Ind esp) (1 :: Word32) fild (Ind esp) fxch (FPReg 1) fprem fstp (FPReg 1) add ebx (2 :: Word32) dec ecx defineLabel loopTest cmp ecx (0 :: Word32) jne loopStart -- loop loopStart add esp (4 :: Word32) pop ebx pop ecx ret asmSawStereo :: CodeGen e s () asmSawStereo = do loopTest <- newLabel loopStart <- newLabel push ecx push ebx mov ebx (Disp 12, esp) mov ecx (Disp 16, esp) let localVarSize = 4+4*2*8 :: Word32 sub esp localVarSize let phaseAddr n = (Disp (4+n*16), esp) incAddr n = (Disp (4+8+n*16), esp) let initAcc n freq = do fldz fstp (phaseAddr n) mov (Ind esp) (freq :: Word32) fild (Ind esp) mov (Ind esp) (441000 :: Word32) fidiv32 FPTopReg (Ind esp) fstp (incAddr n) initAcc 0 4410 initAcc 2 4405 initAcc 1 4395 initAcc 3 4390 jmp loopTest defineLabel loopStart -- phase->saw: 1-2*x let saw n = do fld FPTopReg (phaseAddr n) mov (Ind esp) (-2 :: Word32) fimul32 FPTopReg (Ind esp) mov (Ind esp) (1 :: Word32) fiadd32 FPTopReg (Ind esp) saw 0 saw 1 faddp (FPReg 1) FPTopReg -- is multiplication with 0.5 faster? mov (Ind esp) (2 :: Word32) fidiv32 FPTopReg (Ind esp) mov (Ind esp) (fromIntegral (maxBound :: Int16) :: Word32) fimul32 FPTopReg (Ind esp) fist16p0 (Ind ebx) add ebx (2 :: Word32) saw 2 saw 3 faddp (FPReg 1) FPTopReg mov (Ind esp) (2 :: Word32) fidiv32 FPTopReg (Ind esp) mov (Ind esp) (fromIntegral (maxBound :: Int16) :: Word32) fimul32 FPTopReg (Ind esp) fist16p0 (Ind ebx) add ebx (2 :: Word32) -- fraction (x+inc) let incfrac n = do fld FPTopReg (phaseAddr n) fadd FPTopReg (incAddr n) mov (Ind esp) (1 :: Word32) fild (Ind esp) fxch (FPReg 1) fprem fstp (FPReg 1) -- move one up and pop fstp (phaseAddr n) incfrac 0 incfrac 1 incfrac 2 incfrac 3 dec ecx defineLabel loopTest cmp ecx (0 :: Word32) jne loopStart -- loop loopStart add esp localVarSize pop ebx pop ecx ret flds :: FPTopReg -> (Disp, Reg32) -> CodeGen e s () flds FPTopReg (Disp d, Reg32 r) = ensureBufferSize CG.x86_max_instruction_bytes >> CG.x86_fld_membase r d False fist16p :: (Disp, Reg32) -> CodeGen e s () fist16p (Disp d, Reg32 r) = ensureBufferSize CG.x86_max_instruction_bytes >> CG.x86_fist_pop_membase r d CG.FInt16 fist16p0 :: Ind -> CodeGen e s () fist16p0 (Ind r) = fist16p (Disp 0, r) fstsp :: (Disp, Reg32) -> CodeGen e s () fstsp (Disp d, Reg32 r) = ensureBufferSize CG.x86_max_instruction_bytes >> CG.x86_fst_membase r d False True fstsp0 :: Ind -> CodeGen e s () fstsp0 (Ind r) = fstsp (Disp 0, r) fillips :: XMMReg -> Ind -> Word32 -> CodeGen e s () fillips reg store n = do mov store (n :: Word32) fild store fstsp0 store movss reg store shufps reg reg 0 asmSawStereoSIMD :: CodeGen e s () asmSawStereoSIMD = do loopTest <- newLabel loopStart <- newLabel push ecx push ebx mov ebx (Disp 12, esp) mov ecx (Disp 16, esp) -- additional stack is also needed for movlps let localVarSize = 4+4*4 :: Word32 sub esp localVarSize subps xmm0 xmm0 -- clear phases let initAcc n freq = do mov (Ind esp) (freq :: Word32) fild (Ind esp) fstsp (Disp (4+4*n), esp) initAcc 0 4410 initAcc 2 4405 initAcc 1 4395 initAcc 3 4390 movups xmm1 (Disp 4, esp) fillips xmm2 (Ind esp) (441000 :: Word32) divps xmm1 xmm2 fillips xmm2 (Ind esp) ((-2) :: Word32) fillips xmm3 (Ind esp) (1 :: Word32) fillips xmm7 (Ind esp) (2 :: Word32) movups xmm4 xmm3 divps xmm4 xmm7 fillips xmm5 (Ind esp) (fromIntegral (maxBound :: Int16) :: Word32) jmp loopTest defineLabel loopStart -- phase->saw: 1-2*x movups xmm7 xmm0 mulps xmm7 xmm2 addps xmm7 xmm3 haddps xmm7 xmm7 mulps xmm7 xmm4 mulps xmm7 xmm5 -- cvttps2dq xmm7 xmm7 -- pshuflw xmm7 xmm7 0x88 movlps (Ind esp) xmm7 {- -- mov eax (Ind esp) -- crashes at runtime mov eax (Disp 0, esp) mov (Ind ebx) eax add ebx (4 :: Word32) -} flds FPTopReg (Disp 0, esp) fist16p0 (Ind ebx) add ebx (2 :: Word32) flds FPTopReg (Disp 4, esp) fist16p0 (Ind ebx) add ebx (2 :: Word32) -- incfrac addps xmm0 xmm1 cvttps2dq xmm7 xmm0 cvtdq2ps xmm7 xmm7 subps xmm0 xmm7 dec ecx defineLabel loopTest cmp ecx (0 :: Word32) jne loopStart -- loop loopStart add esp localVarSize pop ebx pop ecx ret asmSawStereoSIMDFloat :: CodeGen e s () asmSawStereoSIMDFloat = do loopTest <- newLabel loopStart <- newLabel push ecx push ebx mov ebx (Disp 12, esp) mov ecx (Disp 16, esp) -- additional stack is also needed for movlps let localVarSize = 4+4*4 :: Word32 sub esp localVarSize subps xmm0 xmm0 -- clear phases let initAcc n freq = do mov (Ind esp) (freq :: Word32) fild (Ind esp) fstsp (Disp (4+4*n), esp) initAcc 0 4410 initAcc 2 4405 initAcc 1 4395 initAcc 3 4390 movups xmm1 (Disp 4, esp) fillips xmm2 (Ind esp) (441000 :: Word32) divps xmm1 xmm2 {- movups (Ind ebx) xmm1 add ebx (16 :: Word32) mov ecx (Disp (4+4*0), esp) mov (Disp (4*0), ebx) ecx mov ecx (Disp (4+4*1), esp) mov (Disp (4*1), ebx) ecx mov ecx (Disp (4+4*2), esp) mov (Disp (4*2), ebx) ecx mov ecx (Disp (4+4*3), esp) mov (Disp (4*3), ebx) ecx add ebx (16 :: Word32) let writeFloat freq = do mov (Ind esp) (freq :: Word32) fild (Ind esp) fstp (Disp 0, ebx) add ebx (8 :: Word32) writeFloat 4410 writeFloat 4405 writeFloat 4395 writeFloat 4390 mov (Ind esp) (4410 :: Word32) fild (Ind esp) mov (Ind esp) (441000 :: Word32) fidiv32 FPTopReg (Ind esp) fstp (Disp 0, ebx) -} fillips xmm2 (Ind esp) ((-2) :: Word32) fillips xmm3 (Ind esp) (1 :: Word32) fillips xmm7 (Ind esp) (2 :: Word32) movups xmm4 xmm3 divps xmm4 xmm7 jmp loopTest defineLabel loopStart -- phase->saw: 1-2*x movups xmm7 xmm0 mulps xmm7 xmm2 addps xmm7 xmm3 haddps xmm7 xmm7 mulps xmm7 xmm4 movlps (Ind ebx) xmm7 add ebx (8 :: Word32) -- incfrac addps xmm0 xmm1 cvttps2dq xmm7 xmm0 cvtdq2ps xmm7 xmm7 subps xmm0 xmm7 dec ecx defineLabel loopTest cmp ecx (0 :: Word32) jne loopStart -- loop loopStart add esp localVarSize pop ebx pop ecx ret $(callDecl "callMono" [t|Ptr Int16 -> Word32 -> Word32|]) $(callDecl "callStereo" [t|Ptr (Stereo.T Int16) -> Word32 -> Word32|]) $(callDecl "callStereoFloat" [t|Ptr (Stereo.T Float) -> Word32 -> Word32|]) main :: IO () main = let size = 10000000 in allocaArray size $ \p -> do let sizeOfArray :: Storable a => Ptr a -> a -> Int sizeOfArray _ x = size * sizeOf x (_finalState, result) <- runCodeGen (asmSawStereoSIMD >> callStereo p (fromIntegral size)) () () -- runCodeGen (asmSawStereoSIMDFloat >> callStereoFloat p (fromIntegral size)) () () case result of Left err -> putStrLn (show err) Right _ -> IO.withFile "speed.sw" IO.WriteMode $ \h -> IO.hPutBuf h p (sizeOfArray p undefined)