Skip to content

Instantly share code, notes, and snippets.

@Mivik
Created February 19, 2021 06:56
Show Gist options
  • Save Mivik/0dd1068c0d768aa7f53abd78f9d4cf0f to your computer and use it in GitHub Desktop.
Save Mivik/0dd1068c0d768aa7f53abd78f9d4cf0f to your computer and use it in GitHub Desktop.
C++ program that computes subword complexity polynomial. See https://mivik.gitee.io/2021/research/expected-subword-complexity-en/.
// Requires: libgmp-dev
// Recommended compiler options: -std=c++11 -Ofast -lgmp -lgmpxx
// Mivik 2021.1.25
#include <cassert>
#include <cstdint>
#include <iostream>
#include <vector>
const int N = 60; // maximum length of the string
template<class V, class Inv, class Num, class Neg, class Empty>
class poly { // polynomial with single indeterminate
private:
std::vector<V> v;
public:
poly() {}
poly(const std::initializer_list<V> &list): v(list) {}
inline V &operator[](size_t i) { return v[i]; }
inline V operator[](size_t i) const { return v[i]; }
inline void reset() { v.clear(); }
inline size_t size() const { return v.size(); }
inline void resize(size_t len) { v.resize(len); }
inline void keep(size_t len) { if (v.size() > len) resize(len); }
// Calculate the product of polynomials `a` and `b` and put it in `this`
inline void from_mul(const poly &a, const poly &b) {
reset(); if (!a.size() || !b.size()) return;
resize(a.size() + b.size() - 1);
for (size_t i = a.size() - 1; ~i; --i)
for (size_t j = b.size() - 1; ~j; --j)
v[i + j] += a[i] * b[j];
}
inline poly operator*(const poly &t) const { static poly tmp; tmp.from_mul(*this, t); return tmp; }
inline poly operator*(int t) const {
poly ret; ret.resize(size());
for (size_t i = size() - 1; ~i; --i) ret[i] = v[i] * t;
return ret;
}
// Calculate the multiplicative inverse of `a` and put it in `this`
// Modulo (x ^ lim)
inline void from_inv(const poly &a, size_t lim) {
static poly t, d;
reset(); resize(lim);
v[0] = Inv()(a[0]);
const size_t s = lim << 1;
for (size_t l = 2; l < s; l <<= 1) {
t.from_mul(*this, a);
for (size_t i = 0; i < lim; ++i) t[i] = Neg()(t[i]);
t[0] += Num()(2);
d.from_mul(*this, t);
std::copy(d.v.begin(), d.v.begin() + std::min(l, lim), v.begin());
}
}
// Shift the content in this polynomial by `offset`, equivalent to multiplying (x ^ offset)
inline void shift(int offset) { v.insert(v.begin(), offset, Num()(0)); }
inline void operator+=(const poly &t) {
if (t.size() > size()) resize(t.size());
for (size_t i = std::min(size(), t.size()) - 1; ~i; --i) v[i] += t.v[i];
}
inline void operator-=(const poly &t) {
if (t.size() > size()) resize(t.size());
for (size_t i = std::min(size(), t.size()) - 1; ~i; --i) v[i] -= t.v[i];
while (!v.empty() && Empty()(v.back())) v.pop_back();
}
inline poly operator+(const poly &t) const { poly r = *this; r += t; return r; }
};
typedef __int128_t num;
struct num_inv { inline num operator()(const num &v) const { assert(v == 1); return 1; } };
struct num_num { inline num operator()(int v) const { return num(v); } };
struct num_neg { inline num operator()(const num &v) const { return -v; } };
struct num_empty { inline bool operator()(const num &v) const { return !v; } };
typedef poly<num, num_inv, num_num, num_neg, num_empty> inner;
struct inner_inv {
inline inner operator()(const inner &v) const {
assert(v.size() == 1);
return { num_inv()(v[0]) };
}
};
struct inner_num { inline inner operator()(int v) const { return { num(v) }; } };
struct inner_neg { inline inner operator()(const inner &v) const {
inner r = v;
for (size_t i = r.size() - 1; ~i; --i) r[i] = -r[i];
return r;
} };
struct inner_empty { inline bool operator()(const inner &v) const { return !v.size(); } };
typedef poly<inner, inner_inv, inner_num, inner_neg, inner_empty> aggr;
// Calculate the OGF of words containing a word with length `len` and autocorrelation `cor`
// See https://en.wikipedia.org/wiki/Autocorrelation_(words)#Ordinary_generating_functions
inline const aggr& containing(const aggr &cor, int len) {
static aggr ret, tmp, fin;
ret = { { num(1) }, { num(0), num(-1) } };
tmp.from_mul(ret, cor); tmp.keep(N + 1);
tmp[len] += { num(1) };
fin.from_mul(ret, tmp); fin.keep(N + 1);
ret.from_inv(fin, N + 1);
ret.shift(len);
return ret;
}
namespace gen { // enumeration of autocorrelations
int n;
struct bit_vector { // a simple bit vector containing up to 64 bits
int v[2];
inline bool test(int i) const { return v[i >> 5] & (1 << (i & 31)); }
inline void set(int i) { v[i >> 5] |= 1 << (i & 31); }
inline void reset(int i) { v[i >> 5] &= ~(1 << (i & 31)); }
inline void reset() { v[0] = v[1] = 0; }
inline bool contains(const bit_vector &t) const { return (v[0] & t.v[0]) == t.v[0] && (v[1] & t.v[1]) == t.v[1]; }
} vec;
struct info {
bit_vector vec;
int length;
int nfc; // Number of Free Characters
inner pop; // population of this autocorrelation.
info(bit_vector vec, int length, int nfc, inner pop):
vec(std::move(vec)), length(length), nfc(nfc), pop(pop) {}
};
std::vector<info> rec; // records all the possible autocorrelations with a length <= N
int req[N + 1]; // if (req[i] != 0), we require `i` to be a period.
inline int gcd(int x, int y) { while (x %= y) std::swap(x, y); return y; }
void dfs(
int o, // offset
int lst, // last small period. if the last period is not small then it's 0
int cnt // number of free characters. computed dynamically.
) {
const int n = gen::n - o;
if (!n) {
inner pop; pop.resize(cnt + 1); pop[cnt] = num(1);
for (size_t i = rec.size() - 1; ~i; --i) {
const auto &info = rec[i];
if (info.vec.contains(vec)) pop -= info.pop;
}
rec.emplace_back(vec, gen::n, cnt, pop);
return;
}
int *req = gen::req + o; // ATTENTION!!! name shadowing for convenience
vec.set(o);
for (int p = 1; p < n; ++p) { // enumerate the minimum non-zero period
if (p >= lst || p > (n - lst) + gcd(lst, p)) {
if (p * 2 <= n) { // a small period
const int r = p * (n / p - 1);
bool ok = true;
for (int i = 1; i < r; ++i) // check whether it satisfies previous requirements
if (req[i] && i % p) { ok = false; break; }
if (ok) {
for (int i = 1; i < r; ++i) vec.reset(o + i);
for (int i = p; i < r; i += p) vec.set(o + i);
if (r + p != n) ++req[r + p]; // requirements might overlap so we use int instead of bool
dfs(o + r, p, cnt);
if (r + p != n) --req[r + p];
}
} else {
vec.set(o + p);
dfs(o + p, 0, cnt + (p << 1) - n);
}
}
if (req[p]) return;
vec.reset(o + p);
}
// no period exists
dfs(o + n, 0, cnt + n);
}
inline const std::vector<info>& find_for(int n) {
gen::n = n; rec.clear();
dfs(0, 0, 0); return rec;
}
} // namespace gen
inline std::ostream& operator<<(std::ostream &out, num t) {
static char str[300]; int len = 0;
if (!t) return out << '0';
if (t < 0) { out << '-'; t = -t; }
do { num div = t / 10; str[++len] = (t - div * 10) + '0'; t = div; } while (t);
while (len) out << str[len--]; return out;
}
inner ans[N + 1];
int main() {
aggr cor;
for (int i = 1; i <= N; ++i) {
std::cerr << "Computing for i = " << i << std::endl;
for (const auto &info : gen::find_for(i)) {
const int n = info.length;
cor.reset(); cor.resize(n);
for (int i = 0; i < n; ++i)
if (info.vec.test(i)) cor[i] = { num(1) };
auto &tmp = containing(cor, n);
for (int i = n; i <= N; ++i)
ans[i] += tmp[i] * info.pop;
}
}
for (int i = 0; i <= N; ++i) {
std::cout << i << ": ";
for (int j = 0; j < ans[i].size(); ++j)
std::cout << ans[i][j] << ' ';
std::cout << '\n';
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment