51Nod 1228. 序列求和/51Nod 1258. 序列求和 V4[等幂求和](伯努利数+多项式逆元/多项式插值) 日付: 10月 26, 2018 リンクを取得 Facebook × Pinterest メール 他のアプリ [序列求和问题 - 51Nod](https://www.51nod.com/onlineJudge/questionCode.html#!problemId=1228) ## Problem 求$S_k(n) = \sum_{i=1}^ n i ^ k \mod 1000000007$ 数据范围:序列求和问题,$1 \le n \le 10^{18}, 1 \le k \le 2000$, 测试样例数$1 \le T \le 5000$ 序列求和V4问题,$1 \le n \le 10^{18}, 1 \le k \le 50000$, 测试样例数$1 \le T \le 500$ ## Solution ### Naive Method 等幂求和问题一般思路是使用递推方法 $$ \begin{aligned} (n+1)^{k+1} - n^{k+1} &= \sum_{i=0}^{k} \binom{k+1}{i}n^i \\ \cdots \\ 1^{k+1} - 0^{k+1} &= \sum_{i=0}^{k}\binom{k+1}{i}1^i \end{aligned} $$ 把等式左右相加得到 $$ (n+1)^{k+1} = \sum_{i=0}^{k}\binom{k+1}{i}(1^i + \cdots + n^i)$$ $$S_k(n) = \frac{(n+1)^{k+1} - \sum_{i=0}^{k-1}\binom{k+1}{i}S_i(n)}{k+1}$$ 这样对每次询问,可以$O(k^2)$时间复杂度递推求出$S_k(n)$ ### Bernoulli Number 伯努利数的前几项为$B_0=1, B_1^{\pm}=\pm \frac{1}{2}, B_2 = \frac{1}{6}, B_3=0, B_4=-\frac{1}{30}$。其中,$B_n^-$是第一伯努利数,$B_n^+$是第二伯努利数,区别在于$B_1$的符号。 对于等幂求和,有伯努利公式 $$S_k(n) = \frac{1}{k+1} \sum_{i=0}^ k \binom{k+1}{i} B_i^+ n^{k+1-i}$$ 对于第一伯努利数,貌似下面的式子是成立的,我一开始就是这样过的,不过不是很确定... $$S_k(n) = \frac{1}{k+1} \sum_{i=0}^ k \binom{k+1}{i} B_i^- (n+1)^{k+1-i}$$ 因此只要预处理出前$k$个伯努利数$B_i$,每次询问就可以线性算出$S_k(n)$,这道题询问很多所以会TLE 伯努利数满足$B_0=1$,且有递推公式$$\sum_{i=0}^n \binom{n+1}{i}B_i^- = 0$$ 因此可以用$O(k^2)$时间算出前$k$项伯努利数 对于V4版问题,因为$k$最大50000,显然$O(k^2)$会超时。需要用更好的方法找出伯努利数。我们可以利用伯努利数的指数母函数(Generating Function)。 $$EG(B_n;x)=\sum_{i=0}^ \infty \frac{B_i}{i!}x^i = \frac{x}{e^x-1} = \frac{x}{\sum_{i=1}^ \infty \frac{x^i}{i!}} = \frac{1}{\sum_{i=0}^ \infty \frac{x^i}{(i+1)!}}$$ 该母函数中$x^i$的系数就是$\frac{B_i}{i!}$,因此求出母函数在模$x^{k+1}$意义下的多项式,就可以得到前$k$个伯努利数了。现在问题转化为了找多项式$\sum_{i=0}^ \infty \frac{x^i}{(i+1)!}$在模$x^{k+1}$意义下的逆元多项式。 求多项式逆元可以用FFT以$O(klog(k))$复杂度计算。 ### Polynomial Interpolation 这两题还可以用插值法做,根据之前的递推式可以看出,最后$S(n)$一定是一个关于$n$的$k+1$次多项式,因此只需要有$k+2$个点的取值就可以算出对于任意的$n$的值。 插值可以用拉格朗日插值法(Lagrange Interpolation)来做,朴素的写法时间复杂度是$O(k^2)$,但是由于本题的一些限制条件(取点可以用$0 \sim k+1$,就可以优化插值复杂度为$O(k)$。 拉格朗日插值公式为$$L(x)=\sum_{j=0}^ k y_j l_j(x), \quad l_j(x)=\prod_{i=0,i \neq j}^ k\frac{x-x_i}{x_j-x_i}$$ 如果限制了$x_j=j$的情况下 $$L(x)=\sum_{j=0}^ k(-1)^{k-j}y_j\frac{\prod_{i=x-j+1}^x i \prod_{i=x-k} ^{x-j-1} i}{(k-j)!j!}$$ 对于右边的分数式,分子明显可以线性递推计算。分母可以维护其逆元,初始逆元是当$j=0$时,$inv(k!)=\prod_{j=1}^ k inv(j)$,从$j$向第$j+1$项递推时,分母变成了$(k-j-1)!(j+1)!$,相当于乘上了$j+1$并除掉了$k-j$,也就相当于分母逆元乘上$k-j$并除掉了$j+1$,等价于乘上$(k-j)inv(j+1)$。可以发现整个过程中使用的逆元都在$[1,k]$范围内,所以可以$O(k)$先预计算$k$以内的逆元,然后分母的整个递推过程复杂度也是$O(k)$。因为其余部分都是常数时间计算的,计算插值的整个循环的复杂度也是$O(k)$。 还有个问题就是计算前$k+1$项$1^ k + 2^ k + \cdots + n^k$如果用快速幂计算的话本身是$O(klog(k))$的,在本问题中还是可能超时,但是因为$i^k$是积性函数,可以用欧拉筛法线性计算。因此整个算法都是$O(k)$的。 ## Code Bernoulli Numbers, $O(k^2)$ Preprocessing, $O(k)$ Query ```c++ #include using namespace std; using LL = long long; LL inv(LL x, LL m) { return x > m ? inv(x % m, m) : x > 1 ? inv(m % x, m) * (m - m / x) % m : x; } LL mpow(LL a, LL k, LL m) { LL r(1); for (a %= m; k; k >>= 1, a = a * a % m) if (k & 1)r = r * a % m; return r; } struct Combine { using LL = long long; LL m; vector> F; Combine(int n, LL mod) : m(mod), F(n + 1) { assert(n <= 5000 && mod > 1); for (int i = 1; i <= n; i++) F[i].resize(i + 1), F[i][0] = 1; } LL com(int n, int k) { return assert(0 <= k && k <= n), k + k > n ? com(n, n - k) : F[n][k] ? F[n][k] : F[n][k] = (com(n - 1, k) + com(n - 1, k - 1)) % m; } }; Combine cb(2001, 1e9 + 7); vector bernouli_n(int n, int m = 1e9 + 7) { vector B(n); B[0] = 1; for (int i = 1; i < n; i++) { LL tmp = 0; for (int j = 0; j < i; j++) tmp = (tmp + 1LL * cb.com(i + 1, j) * B[j] % m) % m; B[i] = (m - tmp) * inv(i + 1, m) % m; } B[1] = inv(2, m); return B; } auto B = bernouli_n(2001); int main() { const int Mod = 1e9 + 7; int t; scanf("%d", &t); while (t--) { LL n, k; scanf("%lld%lld", &n, &k); LL ans = 0; for (int i = 0; i <= k; i++) { ans += B[i] * mpow(n, k + 1 - i, Mod) % Mod * cb.com(k + 1, i) % Mod; ans %= Mod; } ans = ans * inv(k + 1, Mod) % Mod; printf("%lld\n", ans); } return 0; } ``` Bernouli Numbers, $O(klog(k))$ Preprocessing, $O(k)$ Query ```c++ #include using namespace std; namespace MFFT { static const double PI = acos(-1.0); template void arrange(vector &A) { int n = A.size(); assert(n == (n & -n)); for (int i = 1, x = 0, y = 0; i <= n; i++) { if (x > y) swap(A[x], A[y]); x ^= i & -i, y ^= n / (i & -i) >> 1; } } template void fourier(vector> &A, int inv) { assert((inv == 1 || inv == -1) && is_floating_point::value); int n = 1 << (32 - __builtin_clz(A.size() - 1)); A.resize(n), arrange(A); vector> W(n >> 1, {1, 0}); for (int l = 1; l < n; l <<= 1) { complex wl(cos(inv * PI / l), sin(inv * PI / l)), t; for (int i = l - 2; i >= 0; i -= 2) W[i] = W[i >> 1]; for (int i = 1; i < l; i += 2) W[i] = W[i - 1] * wl; for (int i = 0; i < l; i++) for (int s = 0; s < n; s += l + l) t = W[i] * A[s + i + l], A[s + i + l] = A[s + i] - t, A[s + i] += t; } if (inv == -1) for (int i = 0; i < n; i++) A[i] /= n; } template vector multiply(const vector &A, const vector &B, int p = 1e9 + 7) { assert(is_integral::value); using CD = complex; // or long double; using LL = long long; int s = A.size() + B.size() - 1; vector U, V; for (auto x : A) U.emplace_back(x >> 15, x & 32767); for (auto x : B) V.emplace_back(x >> 15, x & 32767); U.resize(s), V.resize(s), fourier(U, 1), fourier(V, 1); for (size_t i = 0, n = U.size(); i + i <= n; i++) { size_t j = (n - i) & (n - 1); auto a = U[i], b = U[j], c = V[i], d = V[j]; U[i] = (a + conj(b)) * (c + conj(d)) * 0.25 - (a - conj(b)) * (c - conj(d)) * CD(0, 0.25); U[j] = (b + conj(a)) * (d + conj(c)) * 0.25 - (b - conj(a)) * (d - conj(c)) * CD(0, 0.25); V[i] = CD(0, 0.5) * (conj(b * d) - a * c), V[j] = CD(0, 0.5) * (conj(a * c) - b * d); } fourier(U, -1), fourier(V, -1); vector R(s); for (int i = 0; i < s; i++) { LL t1 = (LL)(U[i].real() + 0.5) % p; LL t2 = (LL)(V[i].real() + 0.5) % p; LL t3 = (LL)(U[i].imag() + 0.5) % p; R[i] = ((t1 << 30) % p + (t2 << 15) % p + t3) % p; } return R; } int mpow(long long a, int k, int m) { int r(1); for (a %= m; k; k >>= 1, a = a * a % m) if (k & 1)r = r * a % m; return r; } template vector inv(const vector &A, int n, int p) { assert(is_integral::value && A[0]); vector R; function inv_ = [&](int n) { if (n == 1) return R = {mpow(A[0], p - 2, p)}, void(); inv_((n + 1) >> 1); auto Tmp = multiply(A, R, p); Tmp[0] = (2 - Tmp[0] + p) % p; for (size_t i = 1; i < Tmp.size(); i++) Tmp[i] = Tmp[i] ? p - Tmp[i] : 0; R = multiply(R, Tmp, p); R.resize(n); }; inv_(n); return R; } }; // 51Nod. 1228 using LL = long long; LL inv(LL x, LL m) { return x > m ? inv(x % m, m) : x > 1 ? inv(m % x, m) * (m - m / x) % m : x; } LL mpow(LL a, LL k, LL m) { LL r(1); for (a %= m; k; k >>= 1, a = a * a % m) if (k & 1)r = r * a % m; return r; } vector bernouli_n(int n, int m = 1e9 + 7) { vector A; for (int i = 0, tmp = 1; i < n; i++) { tmp = 1LL * tmp * (i + 1) % m; A.push_back(inv(tmp, m)); } auto B = MFFT::inv(A, n, m); for (int i = 1, tmp = 1; i < n; i++) { tmp = 1LL * tmp * i % m; B[i] = 1LL * B[i] * tmp % m; } B[1] = inv(2, m); return B; } auto B = bernouli_n(50001); int main() { using LL = long long; const int Mod = 1e9 + 7; vector Inv(50002, 1); for (int i = 2; i <= 50001; i++) Inv[i] = 1LL * (Mod - Mod / i) * Inv[Mod % i] % Mod; int t; scanf("%d", &t); while (t--) { LL n, k; scanf("%lld%lld", &n, &k); LL ans = 0, tmp1 = mpow(n, k + 1, Mod), tmp2 = 1, invn = inv(n, Mod); for (int i = 0; i <= k; i++) { ans += B[i] * tmp1 % Mod * tmp2 % Mod; tmp1 = tmp1 * invn % Mod; tmp2 = tmp2 * (k + 1 - i) % Mod * Inv[i + 1] % Mod; ans %= Mod; } ans = ans * inv(k + 1, Mod) % Mod; printf("%lld\n", ans); } return 0; } ``` Lagrange Interpolation, $O(k)$ Query ```c++ #include using namespace std; using LL = long long; LL mpow(LL a, LL k, LL m) { LL r(1); for (a %= m; k; k >>= 1, a = a * a % m) if (k & 1)r = r * a % m; return r; } struct Lagrange { using LL = long long; LL n, m; vector Inv, Den; Lagrange(LL n, LL m): n(n), m(m), Inv(n, 1), Den(n, 1) { for (int i = 2; i < n; i++) Inv[i] = (m - m / i) * Inv[m % i] % m, Den[i] = Den[i - 1] * Inv[i] % m; } LL inv(LL x) { return x < n ? Inv[x] : x >= m ? inv(x % m) : (m - m / x) * inv(m % x) % m; } inline LL sub(LL x, LL y) { return (x - y + m) % m; } inline LL add(LL x, LL y) { return (x + y) % m; } // O(k^2), naive template LL interp(const vector &X, const vector &Y, LL x, LL m = 1e9 + 7) { LL ans = 0; for (size_t i = 0; i < X.size(); i++) { LL num = Y.at(i), den = 1; for (size_t j = 0; j < X.size(); j++) if (j != i) num = num * sub(x, X[j]) % m, den = den * sub(X[i], X[j]) % m; ans = (ans + num * inv(den)) % m; } return ans; } // O(k), for X is [0, 1, ..., k] template LL interp_fast(const vector &Y, LL x, LL m = 1e9 + 7) { int k = (int)Y.size() - 1; assert(k < n); if (x <= k) return Y[x]; vector Suf(1, 1); for (LL j = k, tmp = 1; j > 0; j--) Suf.push_back(tmp = tmp * sub(x, j) % m); LL ans = 0, den = Den[k]; for (LL j = 0, pref = 1; j <= k; j++) { LL num = pref * Suf.back() % m; pref = pref * sub(x, j) % m, Suf.pop_back(); LL t = num * den % m * Y[j] % m; ans = (k - j) & 1 ? sub(ans, t) : add(ans, t); if (j < k) den = den * Inv[j + 1] % m * (k - j) % m; } return ans; } }; int main() { const int Mod = 1e9 + 7; int t; scanf("%d", &t); Lagrange lg(2001, Mod); while (t--) { LL n, k; scanf("%lld%lld", &n, &k); vector L(k + 2), P, Y(k + 2); L[1] = 1, Y[1] = 1; for (int x = 2; x <= k + 1; x++) { if (!L[x]) P.push_back(L[x] = x), Y[x] = mpow(x, k, Mod); for (auto p : P) { if (x * p >= k + 1) break; L[x * p] = p; Y[x * p] = 1LL * Y[x] * Y[p] % Mod; if (x % p == 0) break; } } for (int i = 1; i <= k + 1; i++) Y[i] = (Y[i - 1] + Y[i]) % Mod; printf("%lld\n", lg.interp_fast(Y, n, Mod)); } return 0; } ``` コメント
コメント
コメントを投稿