Found the answer.
First, I'll change notation a bit so the equations become a bit more symmetric.

Using this notation, the plaintext word $PH$ in Matsui's paper becomes $x_0$, and $PL$ becomes $x_1$. The ciphertext word $CH$ becomes $x_{n+1}$, and $CL$ becomes $x_n$.
We can find an approximation to $n$ rounds via a graph search. A node in this graph will look like this:
$$
\bigoplus_{i=0}^{n+1} x_i[\delta_i] = \bigoplus_{i=1}^n k_i[\epsilon_i] \tag{1}
$$
where $\delta_i$ and $\epsilon_i$ are bitmasks.
To see what the edges are, say we have a 1-round approximation like this:
$$
x[\alpha] \oplus F(x, k)[\beta] = k[\gamma] \tag{2}
$$
We can instantiate it at round, say, $i$, to get:
$$
x_i[\alpha] \oplus F(x_i, k_i)[\beta] = k_i[\gamma] \tag{3}
$$
and now we can use the Feistel structure, knowing that $x_{i+1} = x_{i-1} \oplus F(x_i, k_i)$, to rewrite $F(x_i, k_i)$ as $x_{i+1} \oplus x_{i-1}$, and so:
$$
x_i[\alpha] \oplus \left(x_{i+1} \oplus x_{i-1}\right)[\beta] = k_i[\gamma] \tag{4}
$$
This will be an edge in our graph. That is, for all linear approximations, and for all instantiation rounds $i$, we can create such an edge. If the source of the edge is equation $(1)$, the target of the edge is the following equation, obtained by $\operatorname{XOR}$ing $(1)$ and $(4)$:
$$
\bigoplus_{j=0}^{i-2} x_j[\delta_j] \oplus x_{i-1}[\delta_{i-1} \oplus \beta] \oplus x_i[\delta_i \oplus \alpha] \oplus x_{i+1}[\delta_{i+1} \oplus \beta] \bigoplus_{j=i+2}^{n+1} x_j[\delta_j] = k_i[\epsilon_j \oplus \gamma] \bigoplus_{j=1}^n k_j[\epsilon_j] \tag{5}
$$
An $n$-round approximation is such a node like $(1)$, where the masks $\delta_2 \dots \delta_{n-1}$ are all $0$. That is, the equation only involves plaintexts, ciphertexts, and keys.
We start our graph search with a trivial approximation, where $\forall i, \delta_i = 0$, and $\forall i, \gamma_i = 0$. To see what edges we will actually take, a little bit of nomenclature:
- A state $x_i$ is said "hidden" when $1 < i < n$. That is, it's neither a plaintext nor a ciphertext. A mask $\delta_i$ is called "hidden" under the same conditions.
We will only take an edge $e$, that takes us from a node $v: \bigoplus_{i=0}^{n+1} x_i[\delta_i] = \bigoplus_{i=1}^n k_i[\epsilon_i]$ to a node $w: \bigoplus_{i=0}^k x_i[\delta'_i] = \bigoplus_{i=1}^k k_i[\epsilon'_i]$, when at most 2 hidden masks in $w$ are nonzero.
We reach the end state when all hidden masks are zero, and we're not at the start node.
We use the piling-up lemma to compute weights as we go, which we can do in logspace to improve precision.
Below is C++ source code to replicate Matsui's table of results in the appendix. I've used Dijkstra's algorithm for the graph search, but really it's overkill, a dynamic programming solution would also do. This is because the only paths we care about are increasing in the location where they apply one-round approximations (i.e. they start with the empty approximation, and apply it at rounds, say, 1, 2, 3, 5, 6, 8, 9, 10, and reach the final state). Dijkstra's already runs instantly, however, so no need to overthink it.
The only DES-specific thing here is the one-round approximations one_round_approximations
. Modifying that makes this find linear chains for any Feistel network, given approximations to the round function.
For NUM_ROUNDS = 10
, this code outputs:
Best approximation:
round_number: 10
state masks = [1: [7, 18, 24, 29], 10: [15], 11: [7, 18, 24, 29]]
key masks = [1: [22], 2: [44], 3: [22], 5: [22], 6: [44], 7: [22], 9: [22]]
applied approximations: [-ACD-DCA-A]
Probability: 0.500047
Log2(Bias): -14.3904
which exactly matches Matsui's paper.
#include <array>
#include <cstdint>
#include <iostream>
#include <set>
#include <unordered_map>
#include <cassert>
#include <vector>
#include <cmath>
#include <optional>
constexpr size_t NUM_ROUNDS = 10;
std::ostream& show_mask(std::ostream& o, uint64_t m) {
int i = 0;
bool first = true;
o << "[";
while (m) {
if (m & 1) {
if (!first) {
o << ", ";
} else {
first = false;
}
o << i;
}
m >>= 1;
++i;
}
o << "]";
return o;
}
struct OneRoundApproximation {
const char* name;
uint32_t alpha;
uint32_t beta;
uint64_t gamma;
double log2_bias;
double probability() const {
double x = std::pow(2.0, log2_bias) + 0.5;
return std::max(x, 1.0 - x);
}
friend auto operator<=>(const OneRoundApproximation&,
const OneRoundApproximation&) = default;
friend std::ostream& operator<<(std::ostream& o,
const OneRoundApproximation& ra) {
o << ra.name;
return o;
}
};
struct Approximation {
std::array<uint32_t, NUM_ROUNDS + 2> state_mask;
std::array<uint64_t, NUM_ROUNDS> round_key_mask;
std::array<std::optional<OneRoundApproximation>, NUM_ROUNDS>
applied_approximations;
int round_number;
friend auto operator<=>(const Approximation&, const Approximation&) = default;
friend std::ostream& operator<<(std::ostream& o, const Approximation& a) {
o << "round_number: " << a.round_number << std::endl;
o << "state masks = [";
int cnt = 0;
for (size_t i = 0; i < NUM_ROUNDS + 2; ++i) {
if (!a.state_mask[i]) continue;
if (cnt++ > 0) o << ", ";
o << i << ": ";
show_mask(o, a.state_mask[i]);
}
o << "]" << std::endl;
cnt = 0;
o << "key masks = [";
for (size_t i = 0; i < NUM_ROUNDS; ++i) {
if (!a.round_key_mask[i]) continue;
if (cnt++ > 0) o << ", ";
o << i << ": ";
show_mask(o, a.round_key_mask[i]);
}
o << "]" << std::endl;
o << "applied approximations: [";
for (size_t i = 0; i < NUM_ROUNDS; ++i) {
auto ma = a.applied_approximations[i];
if (ma == std::nullopt) {
o << "-";
} else {
o << *ma;
}
}
o << "]" << std::endl;
return o;
}
};
struct WeightedApproximation {
Approximation a;
double log2_bias;
double probability() const {
double x = std::pow(2.0, log2_bias) + 0.5;
return std::max(x, 1.0 - x);
}
};
std::array<OneRoundApproximation, 5> one_round_approximations = {{
{"A", 0x8000, 0x21040080, 0x400000, std::log2(std::abs(12.0/64.0 - 0.5))},
{"B", 0xd8000000, 0x8000, 0x6c0000000000ULL, std::log2(std::abs(22.0/64.0 - 0.5))},
{"C", 0x20000000, 0x8000, 0x100000000000ULL, std::log2(std::abs(30.0/64.0 - 0.5))},
{"D", 0x8000, 0x1040080, 0x400000, std::log2(std::abs(42.0/64.0 - 0.5))},
{"E", 0x11000, 0x1040080, 0x880000, std::log2(std::abs(16.0/64.0 - 0.5))}
}};
Approximation apply_one_round_approximation(const Approximation& a,
const OneRoundApproximation& o,
size_t position) {
Approximation b = a;
b.round_number = position + 1;
b.state_mask[position] ^= o.beta;
b.state_mask[position + 1] ^= o.alpha;
b.state_mask[position + 2] ^= o.beta;
b.round_key_mask[position] ^= o.gamma;
b.applied_approximations[position] = o;
return b;
}
size_t HiddenSize(Approximation& b) {
size_t result = 0;
for (size_t i = 2; i < NUM_ROUNDS; ++i) {
result += b.state_mask[i] != 0;
}
return result;
}
std::vector<WeightedApproximation> Transitions(
const WeightedApproximation& a, const OneRoundApproximation& o) {
std::vector<WeightedApproximation> result;
for (size_t i = a.a.round_number; i < NUM_ROUNDS; ++i) {
Approximation b = apply_one_round_approximation(a.a, o, i);
if (HiddenSize(b) > 2) continue;
result.push_back(
{.a = std::move(b),
.log2_bias = 1 + a.log2_bias + o.log2_bias});
}
return result;
}
std::ostream& operator<<(std::ostream& o, const WeightedApproximation& wa) {
o << wa.a << std::endl;
o << "Probability: " << wa.probability() << std::endl;
o << "Log2(Bias): " << wa.log2_bias << std::endl;
return o;
}
template <class T>
inline void hash_combine(std::size_t& seed, const T& v) {
std::hash<T> hasher;
seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
}
size_t HashApproximation(const Approximation& a) {
size_t h = 0;
for (size_t i = 0; i < NUM_ROUNDS + 2; ++i) {
hash_combine(h, a.state_mask[i]);
}
hash_combine(h, a.round_number);
return h;
};
int main() {
auto compare_by_probability = [](const WeightedApproximation& a,
const WeightedApproximation& b) {
if (a.log2_bias > b.log2_bias) return true;
if (a.a.round_key_mask < b.a.round_key_mask) return true;
return HashApproximation(a.a) < HashApproximation(b.a);
};
std::set<WeightedApproximation, decltype(compare_by_probability)> queue;
std::unordered_map<Approximation, decltype(queue.begin()),
decltype(&HashApproximation)>
seen(1, &HashApproximation);
WeightedApproximation wa = {.a = Approximation{},
.log2_bias = std::log2(0.5)};
auto it = queue.insert(wa).first;
seen.emplace(wa.a, it);
WeightedApproximation best_approximation = wa;
while (!queue.empty()) {
WeightedApproximation v = queue.extract(queue.begin()).value();
seen[v.a] = queue.end();
if (v.a.round_number == NUM_ROUNDS && HiddenSize(v.a) == 0) {
if (best_approximation.a.round_number == 0 ||
best_approximation.log2_bias < v.log2_bias) {
best_approximation = v;
}
continue;
}
for (const auto& o : one_round_approximations) {
for (WeightedApproximation w : Transitions(v, o)) {
auto it = seen.find(w.a);
if (it == seen.end()) {
auto [jt, inserted] = queue.insert(std::move(w));
assert(inserted);
seen.emplace(jt->a, std::move(jt));
} else {
if (it->second == queue.end()) continue;
if (it->second->log2_bias < w.log2_bias) {
queue.erase(it->second);
auto jt = queue.insert(w).first;
it->second = jt;
}
}
}
}
}
std::cout << "Best approximation: " << std::endl
<< best_approximation << std::endl;
}
```