diff --git a/include/NTL/ZZ_pX.h b/include/NTL/ZZ_pX.h index d9bd98d..5d1c98f 100644 --- a/include/NTL/ZZ_pX.h +++ b/include/NTL/ZZ_pX.h @@ -548,6 +548,13 @@ inline ZZ_pX power(const ZZ_pX& a, long e) { ZZ_pX x; power(x, a, e); NTL_OPT_RETURN(ZZ_pX, x); } +void ExpTrunc(ZZ_pX& x, const ZZ_pX& a, long n); +inline ZZ_pX ExpTrunc(const ZZ_pX& a, long n) + { ZZ_pX x; ExpTrunc(x, a, n); NTL_OPT_RETURN(ZZ_pX, x); } + + + + // The following data structures and routines allow one // to hand-craft various algorithms, using the FFT convolution // algorithms directly. diff --git a/include/NTL/lzz_pX.h b/include/NTL/lzz_pX.h index 474d791..f864402 100644 --- a/include/NTL/lzz_pX.h +++ b/include/NTL/lzz_pX.h @@ -553,6 +553,9 @@ inline zz_pX power(const zz_pX& a, long e) { zz_pX x; power(x, a, e); NTL_OPT_RETURN(zz_pX, x); } +void ExpTrunc(zz_pX& x, const zz_pX& a, long n); +inline zz_pX ExpTrunc(const zz_pX& a, long n) + { zz_pX x; ExpTrunc(x, a, n); NTL_OPT_RETURN(zz_pX, x); } diff --git a/src/ZZ_pX1.cpp b/src/ZZ_pX1.cpp index b29f418..65da770 100644 --- a/src/ZZ_pX1.cpp +++ b/src/ZZ_pX1.cpp @@ -1753,6 +1753,78 @@ void SqrTrunc(ZZ_pX& x, const ZZ_pX& a, long n) } +// slight variant of https://homepages.loria.fr/PZimmermann/papers/fastnewton.ps.gz §5 +static void ExpTruncFast(ZZ_pX& f, ZZ_pX& g, const ZZ_pX& h, long prec) +{ + switch (prec) { + case 0: + clear(f); + clear(g); + return; + case 1: + f = 1; + clear(g); + return; + case 2: + f = 1 + trunc(h, 2); + g = 1; + return; + case 3: + f = 1 + trunc(h, 3) + MulTrunc(h, h, 3) / 2; + g = 1 - trunc(h, 2); + return; + } + + { + ZZ_pX h2 = MulTrunc(h, h, 4) / 2; + ZZ_pX h3 = MulTrunc(h, h2, 4) / 3; + f = 1 + trunc(h, 4) + h2 + h3; + g = 1 - trunc(h, 2); + } + + // invariant: f = exp(h) + O(x^n), g = exp(-h) + O(x^(n/2)) + for (long n = 4; n < prec; n *= 2) { + + // step 2 + ZZ_pX m1 = MulTrunc(f, sqr(g), n); + g.SetLength(n); + for (long i = n/2; i < n; ++i) + g[i] = -coeff(m1, i); + g.normalize(); + + // step 3 + ZZ_pX q = diff(trunc(h, n)); + + // step 4 + ZZ_pX s = MulTrunc(g, f*q >> (n-1), n); + q.SetLength(2*n-1); + for (long i = 0; i <= deg(s); ++i) + q[n-1+i] = -s[i]; + q.normalize(); + + // step 5 + unsigned long length = NTL::min(deg(q)+2, prec); + ZZ_pX intq; + intq.SetLength(length); + intq[0] = 0; + for (long i = 1; i < length; ++i) + intq[i] = q[i-1] / i; + intq.normalize(); + f = MulTrunc(f, 1 + h - intq, NTL::min(2*n, prec)); + } +} + +void ExpTrunc(ZZ_pX& x, const ZZ_pX& a, long n) +{ + if (n < 1) + LogicError("ExpTrunc: bad args"); + if (ConstTerm(a) != 0) + LogicError("ExpTrunc: nonzero constant term"); + ZZ_pX y; + ExpTruncFast(x, y, a, n); +} + + void FastTraceVec(vec_ZZ_p& S, const ZZ_pX& f) { long n = deg(f); diff --git a/src/ZZ_pXCharPoly.cpp b/src/ZZ_pXCharPoly.cpp index 964d008..991687d 100644 --- a/src/ZZ_pXCharPoly.cpp +++ b/src/ZZ_pXCharPoly.cpp @@ -29,14 +29,15 @@ void HessCharPoly(ZZ_pX& g, const ZZ_pX& a, const ZZ_pX& f) CharPoly(g, M); } +// "well-known" algorithm according to https://arxiv.org/pdf/1109.4323.pdf p.11 void CharPolyMod(ZZ_pX& g, const ZZ_pX& a, const ZZ_pX& ff) { ZZ_pX f = ff; MakeMonic(f); long n = deg(f); - if (n <= 0 || deg(a) >= n) - LogicError("CharPoly: bad args"); + if (n <= 0 || deg(a) >= n) + LogicError("CharPolyMod: bad args"); if (IsZero(a)) { clear(g); @@ -44,33 +45,38 @@ void CharPolyMod(ZZ_pX& g, const ZZ_pX& a, const ZZ_pX& ff) return; } - if (n > 25) { - ZZ_pX h; - MinPolyMod(h, a, f); - if (deg(h) == n) { - g = h; - return; - } - } - if (ZZ_p::modulus() < n+1) { - HessCharPoly(g, a, f); + MinPolyMod(g, a, f); + if (deg(g) < n) + HessCharPoly(g, a, f); return; } - vec_ZZ_p u(INIT_SIZE, n+1), v(INIT_SIZE, n+1); + // compute tr(x^0 mod f), tr(x^1 mod f), ..., tr(x^(n-1) mod f) as rev(f')/rev(f) + // see for instance https://cr.yp.to/papers/newton.pdf + vec_ZZ_p T(INIT_SIZE, n); + { + ZZ_pX rev_f = reverse(f); + ZZ_pX rev_df = reverse(diff(f)); + ZZ_pX inv_rev_f = InvTrunc(rev_f, n); + ZZ_pX quot = MulTrunc(rev_df, inv_rev_f, n); + VectorCopy(T, quot, n); + } - ZZ_pX h, h1; - negate(h, a); - long i; + // compute tr(a^0 mod f), tr(a^1 mod f), ..., tr(a^n mod f) + vec_ZZ_p tr = ProjectPowers(T, n+1, a, ff); - for (i = 0; i <= n; i++) { - u[i] = i; - add(h1, h, u[i]); - resultant(v[i], f, h1); - } + // recover characteristic polynomial using exp(∫g'/g) = g; + // "well-known" according to https://dl.acm.org/doi/pdf/10.1145/1145768.1145814 Prop. 9 + ZZ_pX trpoly; + trpoly.SetLength(n+1); + trpoly[0] = 0; + for (long i = 1; i < n+1; ++i) + trpoly[i] = -tr[i] / i; + trpoly.normalize(); - interpolate(g, u, v); + ExpTrunc(g, trpoly, n+1); + reverse(g, g, n); } NTL_END_IMPL diff --git a/src/lzz_pX1.cpp b/src/lzz_pX1.cpp index 798f629..e232125 100644 --- a/src/lzz_pX1.cpp +++ b/src/lzz_pX1.cpp @@ -1668,6 +1668,79 @@ void SqrTrunc(zz_pX& x, const zz_pX& a, long n) +// slight variant of https://homepages.loria.fr/PZimmermann/papers/fastnewton.ps.gz §5 +static void ExpTruncFast(zz_pX& f, zz_pX& g, const zz_pX& h, long prec) +{ + switch (prec) { + case 0: + clear(f); + clear(g); + return; + case 1: + f = 1; + clear(g); + return; + case 2: + f = 1 + trunc(h, 2); + g = 1; + return; + case 3: + f = 1 + trunc(h, 3) + MulTrunc(h, h, 3) / 2; + g = 1 - trunc(h, 2); + return; + } + + { + zz_pX h2 = MulTrunc(h, h, 4) / 2; + zz_pX h3 = MulTrunc(h, h2, 4) / 3; + f = 1 + trunc(h, 4) + h2 + h3; + g = 1 - trunc(h, 2); + } + + // invariant: f = exp(h) + O(x^n), g = exp(-h) + O(x^(n/2)) + for (long n = 4; n < prec; n *= 2) { + + // step 2 + zz_pX m1 = MulTrunc(f, sqr(g), n); + g.SetLength(n); + for (long i = n/2; i < n; ++i) + g[i] = -coeff(m1, i); + g.normalize(); + + // step 3 + zz_pX q = diff(trunc(h, n)); + + // step 4 + zz_pX s = MulTrunc(g, f*q >> (n-1), n); + q.SetLength(2*n-1); + for (long i = 0; i <= deg(s); ++i) + q[n-1+i] = -s[i]; + q.normalize(); + + // step 5 + unsigned long length = NTL::min(deg(q)+2, prec); + zz_pX intq; + intq.SetLength(length); + intq[0] = 0; + for (long i = 1; i < length; ++i) + intq[i] = q[i-1] / i; + intq.normalize(); + f = MulTrunc(f, 1 + h - intq, NTL::min(2*n, prec)); + } +} + +void ExpTrunc(zz_pX& x, const zz_pX& a, long n) +{ + if (n < 1) + LogicError("ExpTrunc: bad args"); + if (ConstTerm(a) != 0) + LogicError("ExpTrunc: nonzero constant term"); + zz_pX y; + ExpTruncFast(x, y, a, n); +} + + + void FastTraceVec(vec_zz_p& S, const zz_pX& f) { long n = deg(f); diff --git a/src/lzz_pXCharPoly.cpp b/src/lzz_pXCharPoly.cpp index f971981..70c14bf 100644 --- a/src/lzz_pXCharPoly.cpp +++ b/src/lzz_pXCharPoly.cpp @@ -29,14 +29,15 @@ void HessCharPoly(zz_pX& g, const zz_pX& a, const zz_pX& f) CharPoly(g, M); } +// "well-known" algorithm according to https://arxiv.org/pdf/1109.4323.pdf p.11 void CharPolyMod(zz_pX& g, const zz_pX& a, const zz_pX& ff) { zz_pX f = ff; MakeMonic(f); long n = deg(f); - if (n <= 0 || deg(a) >= n) - LogicError("CharPoly: bad args"); + if (n <= 0 || deg(a) >= n) + LogicError("CharPolyMod: bad args"); if (IsZero(a)) { clear(g); @@ -44,33 +45,38 @@ void CharPolyMod(zz_pX& g, const zz_pX& a, const zz_pX& ff) return; } - if (n > 90 || (zz_p::PrimeCnt() <= 1 && n > 45)) { - zz_pX h; - MinPolyMod(h, a, f); - if (deg(h) == n) { - g = h; - return; - } - } - if (zz_p::modulus() < n+1) { - HessCharPoly(g, a, f); + MinPolyMod(g, a, f); + if (deg(g) < n) + HessCharPoly(g, a, f); return; } - vec_zz_p u(INIT_SIZE, n+1), v(INIT_SIZE, n+1); + // compute tr(x^0 mod f), tr(x^1 mod f), ..., tr(x^(n-1) mod f) as rev(f')/rev(f) + // see for instance https://cr.yp.to/papers/newton.pdf + vec_zz_p T(INIT_SIZE, n); + { + zz_pX rev_f = reverse(f); + zz_pX rev_df = reverse(diff(f)); + zz_pX inv_rev_f = InvTrunc(rev_f, n); + zz_pX quot = MulTrunc(rev_df, inv_rev_f, n); + VectorCopy(T, quot, n); + } - zz_pX h, h1; - negate(h, a); - long i; + // compute tr(a^0 mod f), tr(a^1 mod f), ..., tr(a^n mod f) + vec_zz_p tr = ProjectPowers(T, n+1, a, ff); - for (i = 0; i <= n; i++) { - u[i] = i; - add(h1, h, u[i]); - resultant(v[i], f, h1); - } + // recover characteristic polynomial using exp(∫g'/g) = g; + // "well-known" according to https://dl.acm.org/doi/pdf/10.1145/1145768.1145814 Prop. 9 + zz_pX trpoly; + trpoly.SetLength(n+1); + trpoly[0] = 0; + for (long i = 1; i < n+1; ++i) + trpoly[i] = -tr[i] / i; + trpoly.normalize(); - interpolate(g, u, v); + ExpTrunc(g, trpoly, n+1); + reverse(g, g, n); } NTL_END_IMPL