/* mpn_mulmid -- middle product Contributed by David Harvey. THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. Copyright 2011 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #include "gmp.h" #include "gmp-impl.h" #define CHUNK (200 + MULMID_TOOM42_THRESHOLD) void mpn_mulmid (mp_ptr rp, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn) { mp_size_t rn, k; mp_ptr scratch, temp; ASSERT (an >= bn); ASSERT (bn >= 1); ASSERT (! MPN_OVERLAP_P (rp, an - bn + 3, ap, an)); ASSERT (! MPN_OVERLAP_P (rp, an - bn + 3, bp, bn)); if (bn < MULMID_TOOM42_THRESHOLD) { /* region not tall enough to make toom42 worthwhile for any portion */ if (an < CHUNK) { /* region not too wide either, just call basecase directly */ mpn_mulmid_basecase (rp, ap, an, bp, bn); return; } /* Region quite wide. For better locality, use basecase on chunks: AAABBBCC.. .AAABBBCC. ..AAABBBCC */ k = CHUNK - bn + 1; /* number of diagonals per chunk */ /* first chunk (marked A in the above diagram) */ mpn_mulmid_basecase (rp, ap, CHUNK, bp, bn); /* remaining chunks (B, C, etc) */ an -= k; while (an >= CHUNK) { mp_limb_t t0, t1, cy; ap += k, rp += k; t0 = rp[0], t1 = rp[1]; mpn_mulmid_basecase (rp, ap, CHUNK, bp, bn); ADDC_LIMB (cy, rp[0], rp[0], t0); /* add back saved limbs */ MPN_INCR_U (rp + 1, k + 1, t1 + cy); an -= k; } if (an >= bn) { /* last remaining chunk */ mp_limb_t t0, t1, cy; ap += k, rp += k; t0 = rp[0], t1 = rp[1]; mpn_mulmid_basecase (rp, ap, an, bp, bn); ADDC_LIMB (cy, rp[0], rp[0], t0); MPN_INCR_U (rp + 1, an - bn + 2, t1 + cy); } return; } /* region is tall enough for toom42 */ rn = an - bn + 1; if (rn < MULMID_TOOM42_THRESHOLD) { /* region not wide enough to make toom42 worthwhile for any portion */ TMP_DECL; if (bn < CHUNK) { /* region not too tall either, just call basecase directly */ mpn_mulmid_basecase (rp, ap, an, bp, bn); return; } /* Region quite tall. For better locality, use basecase on chunks: AAAAA.... .AAAAA... ..BBBBB.. ...BBBBB. ....CCCCC */ TMP_MARK; temp = TMP_ALLOC_LIMBS (rn + 2); /* first chunk (marked A in the above diagram) */ bp += bn - CHUNK, an -= bn - CHUNK; mpn_mulmid_basecase (rp, ap, an, bp, CHUNK); /* remaining chunks (B, C, etc) */ bn -= CHUNK; while (bn >= CHUNK) { ap += CHUNK, bp -= CHUNK; mpn_mulmid_basecase (temp, ap, an, bp, CHUNK); mpn_add_n (rp, rp, temp, rn + 2); bn -= CHUNK; } if (bn) { /* last remaining chunk */ ap += CHUNK, bp -= bn; mpn_mulmid_basecase (temp, ap, rn + bn - 1, bp, bn); mpn_add_n (rp, rp, temp, rn + 2); } TMP_FREE; return; } /* we're definitely going to use toom42 somewhere */ if (bn > rn) { /* slice region into chunks, use toom42 on all chunks except possibly the last: AA.... .AA... ..BB.. ...BB. ....CC */ TMP_DECL; TMP_MARK; temp = TMP_ALLOC_LIMBS (rn + 2 + mpn_toom42_mulmid_itch (rn)); scratch = temp + rn + 2; /* first chunk (marked A in the above diagram) */ bp += bn - rn; mpn_toom42_mulmid (rp, ap, bp, rn, scratch); /* remaining chunks (B, C, etc) */ bn -= rn; while (bn >= rn) { ap += rn, bp -= rn; mpn_toom42_mulmid (temp, ap, bp, rn, scratch); mpn_add_n (rp, rp, temp, rn + 2); bn -= rn; } if (bn) { /* last remaining chunk */ ap += rn, bp -= bn; mpn_mulmid (temp, ap, rn + bn - 1, bp, bn); mpn_add_n (rp, rp, temp, rn + 2); } TMP_FREE; } else { /* slice region into chunks, use toom42 on all chunks except possibly the last: AAABBBCC.. .AAABBBCC. ..AAABBBCC */ TMP_DECL; TMP_MARK; scratch = TMP_ALLOC_LIMBS (mpn_toom42_mulmid_itch (bn)); /* first chunk (marked A in the above diagram) */ mpn_toom42_mulmid (rp, ap, bp, bn, scratch); /* remaining chunks (B, C, etc) */ rn -= bn; while (rn >= bn) { mp_limb_t t0, t1, cy; ap += bn, rp += bn; t0 = rp[0], t1 = rp[1]; mpn_toom42_mulmid (rp, ap, bp, bn, scratch); ADDC_LIMB (cy, rp[0], rp[0], t0); /* add back saved limbs */ MPN_INCR_U (rp + 1, bn + 1, t1 + cy); rn -= bn; } TMP_FREE; if (rn) { /* last remaining chunk */ mp_limb_t t0, t1, cy; ap += bn, rp += bn; t0 = rp[0], t1 = rp[1]; mpn_mulmid (rp, ap, rn + bn - 1, bp, bn); ADDC_LIMB (cy, rp[0], rp[0], t0); MPN_INCR_U (rp + 1, rn + 1, t1 + cy); } } }