dnl x86-64 mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number. dnl Copyright 2007, 2008, 2010 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. dnl The GNU MP Library is free software; you can redistribute it and/or modify dnl it under the terms of the GNU Lesser General Public License as published dnl by the Free Software Foundation; either version 3 of the License, or (at dnl your option) any later version. dnl The GNU MP Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public dnl License for more details. dnl You should have received a copy of the GNU Lesser General Public License dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') C c/l C AMD K8,K9 18 C AMD K10 18 C Intel P4 68 C Intel core2 34 C Intel corei 30.5 C Intel atom 73 C VIA nano 33 C INPUT PARAMETERS define(`qp', `%rdi') define(`fn', `%rsi') define(`up_param', `%rdx') define(`un_param', `%rcx') define(`dp', `%r8') ABI_SUPPORT(DOS64) ABI_SUPPORT(STD64) ASM_START() TEXT ALIGN(16) PROLOGUE(mpn_divrem_2) FUNC_ENTRY(4) IFDOS(` mov 56(%rsp), %r8 ') push %r15 push %r14 push %r13 push %r12 lea -24(%rdx,%rcx,8), %r12 C r12 = &up[un-1] mov %rsi, %r13 push %rbp mov %rdi, %rbp push %rbx mov 8(%r8), %r11 C d1 mov 16(%r12), %rbx mov (%r8), %r8 C d0 mov 8(%r12), %r10 xor R32(%r15), R32(%r15) cmp %rbx, %r11 ja L(2) setb %dl cmp %r10, %r8 setbe %al orb %al, %dl C "orb" form to placate Sun tools je L(2) inc R32(%r15) sub %r8, %r10 sbb %r11, %rbx L(2): lea -3(%rcx,%r13), %r14 C un + fn - 3 test %r14, %r14 js L(end) push %r8 push %r10 push %r11 IFSTD(` mov %r11, %rdi ') IFDOS(` mov %r11, %rcx ') CALL( mpn_invert_limb) pop %r11 pop %r10 pop %r8 mov %r11, %rdx mov %rax, %rdi imul %rax, %rdx mov %rdx, %r9 mul %r8 xor R32(%rcx), R32(%rcx) add %r8, %r9 adc $-1, %rcx add %rdx, %r9 adc $0, %rcx js 2f 1: dec %rdi sub %r11, %r9 sbb $0, %rcx jns 1b 2: lea (%rbp,%r14,8), %rbp mov %r11, %rsi neg %rsi C -d1 C rax rbx rcx rdx rsi rdi rbp r8 r9 r10 r11 r12 r13 r14 r15 C n2 un -d1 dinv qp d0 q0 d1 up fn msl ALIGN(16) L(top): mov %rdi, %rax C di ncp mul %rbx C 0, 17 mov %r10, %rcx C add %rax, %rcx C 4 adc %rbx, %rdx C 5 mov %rdx, %r9 C q 6 imul %rsi, %rdx C 6 mov %r8, %rax C ncp lea (%rdx, %r10), %rbx C n1 -= ... 10 xor R32(%r10), R32(%r10) C mul %r9 C 7 cmp %r14, %r13 C jg L(19) C mov (%r12), %r10 C sub $8, %r12 C L(19): sub %r8, %r10 C ncp sbb %r11, %rbx C 11 sub %rax, %r10 C 11 sbb %rdx, %rbx C 12 xor R32(%rax), R32(%rax) C xor R32(%rdx), R32(%rdx) C cmp %rcx, %rbx C 13 cmovnc %r8, %rax C 14 cmovnc %r11, %rdx C 14 adc $0, %r9 C adjust q 14 nop add %rax, %r10 C 15 adc %rdx, %rbx C 16 cmp %r11, %rbx C jae L(fix) C L(bck): mov %r9, (%rbp) C sub $8, %rbp C dec %r14 jns L(top) L(end): mov %r10, 8(%r12) mov %rbx, 16(%r12) pop %rbx pop %rbp pop %r12 pop %r13 pop %r14 mov %r15, %rax pop %r15 FUNC_EXIT() ret L(fix): seta %dl cmp %r8, %r10 setae %al orb %dl, %al C "orb" form to placate Sun tools je L(bck) inc %r9 sub %r8, %r10 sbb %r11, %rbx jmp L(bck) EPILOGUE()