# Square root # https://en.wikipedia.org/wiki/Methods_of_computing_square_roots#Binary_numeral_system_(base_2) # int32_t isqrt(int32_t num) { # assert(("sqrt input should be non-negative", num > 0)); # int32_t res = 0; # int32_t bit = 1 << 30; // The second-to-top bit is set. # // Same as ((unsigned) INT32_MAX + 1) / 2. # # // "bit" starts at the highest power of four <= the argument. # while (bit > num) # bit >>= 2; # # while (bit != 0) { # if (num >= res + bit) { # num -= res + bit; # res = (res >> 1) + bit; # } else # res >>= 1; # bit >>= 2; # } # return res; #} # Binary square root # INPUT NUMBER: %rdi # RESULT %rcx # Uses registers %r11-r14 internally # NOTE: %rdi ends up as 0 if result was "true" square root sqrt_binary: # Calls right shift function so internals cannot be stored to registers that it is using # Initialize result register irmovq $0, %rcx # Initialize second-to-top bit # NOTE: adjust size of this to allow bigger numbers irmovq $16777216, %r13 # This loop lowers bit until it is in range of number adjust_highest_bit_loop: # Use r14 as temp storage rrmovq %r13, %r14 # Determine if right shift is to be done subq %rdi, %r14 jle sqrt_calc_loop # Move bit right twice with right shift function rrmovq %r13, %rsi call rs rrmovq %rax, %r13 rrmovq %r13, %rsi call rs rrmovq %rax, %r13 # Start loop again jmp adjust_highest_bit_loop # Loop with actual square root calculation sqrt_calc_loop: # Test if loop is to be continued (leave if bit = 0) # Reuse r14 as temporary storage irmovq $0,%r14 subq %r13, %r14 je sqrt_end # Check if num >= res + bit # Reuse r14 as temporary storage (sum result and bit there) rrmovq %rcx, %r14 addq %r13, %r14 subq %rdi, %r14 jg sqrt_calc_loop_else # Statement num >= res + bit is true # Perform num -= res + bit; and res = (res >> 1) + bit; # Reuse r14 for res + bit rrmovq %rcx, %r14 addq %r13, %r14 # num -= res + bit; subq %r14, %rdi # res = (res >> 1) + bit; # Move bit right once and add bit to result rrmovq %rcx, %rsi call rs rrmovq %rax, %rcx addq %r13,%rcx # Go to end of loop jmp sqrt_calc_loop_end # Else statement, move bit right once and proceed to loop ending sqrt_calc_loop_else: rrmovq %rcx, %rsi call rs rrmovq %rax, %rcx # End of calculation loop, always right shift twice at the end of loop sqrt_calc_loop_end: rrmovq %r13, %rsi call rs rrmovq %rax, %r13 rrmovq %r13, %rsi call rs rrmovq %rax, %r13 # Back to loop beginning jmp sqrt_calc_loop sqrt_end: ret # Right bit shift by one bit, used by square root method # INPUT: %rsi, # OUPTUT: %rax # Uses reqisters %r8-%r11 internally rs: # Bit mask at beginning (3 as decimal because not interested in last bit, multiplied by 2 every round to shift mask to left) irmovq $2, %r9 # Number to add end result (1 at beginning, multiplied by 2 every round) irmovq $1, %r10 # Initialize rax irmovq $0, %rax rsloop: # Move mask to temp register r11 to be used in comparison rrmovq %r9, %r11 # Check with and op with mask if bit is 1 or 0, if bit 1 then add it to result (rax), if no then skip addition andq %rsi, %r11 subq %r9, %r11 jne zerobit addq %r10, %rax zerobit: # End of the loop # Shift counter and mask left addq %r9, %r9 addq %r10, %r10 # Check ending condition by checking if mask has moved left enough steps # NOTE: Use higher numbers if bigger values than this are expected # Reuse temp register r11 for this comparison irmovq $16777216, %r11 subq %r10, %r11 je rsend # Back to beginning of loop jmp rsloop rsend: ret