# Theory IMO2019_Q1

(*
File:    IMO2019_Q1.thy
Author:  Manuel Eberl, TU München
*)
section ‹Q1›
theory IMO2019_Q1
imports Main
begin

text ‹
Consider a function ‹f : ℤ → ℤ› that fulfils the functional equation
‹f(2a) + 2f(b) = f(f(a+b))› for all ‹a, b ∈ ℤ›.

Then ‹f› is either identically 0 or of the form ‹f(x) = 2x + c› for some constant ‹c ∈ ℤ›.
›

context
fixes f :: "int ⇒ int" and m :: int
assumes f_eq: "f (2 * a) + 2 * f b = f (f (a + b))"
defines "m ≡ (f 0 - f (-2)) div 2"
begin

text ‹
We first show that ‹f› is affine with slope ‹(f(0) - f(-2)) / 2›.
This follows from plugging in ‹(0, b)› and ‹(-1, b + 1)› into the functional equation.
›
lemma f_eq': "f x = m * x + f 0"
proof -
have rec: "f (b + 1) = f b + m" for b
using f_eq[of 0 b] f_eq[of "-1" "b + 1"] by (simp add: m_def)
moreover have "f (b - 1) = f b - m" for b
using rec[of "b - 1"] by simp
ultimately show ?thesis
by (induction x rule: int_induct[of _ 0]) (auto simp: algebra_simps)
qed

text ‹
This version is better for the simplifier because it prevents it from looping.
›
lemma f_eq'_aux [simp]: "NO_MATCH 0 x ⟹ f x = m * x + f 0"
by (rule f_eq')

text ‹
Plugging in ‹(0, 0)› and ‹(0, 1)›.
›
lemma f_classification: "(∀x. f x = 0) ∨ (∀x. f x = 2 * x + f 0)"
using f_eq[of 0 0] f_eq[of 0 1] by auto

end

text ‹
It is now easy to derive the full characterisation of the functions we considered:
›
theorem
fixes f :: "int ⇒ int"
shows "(∀a b. f (2 * a) + 2 * f b = f (f (a + b))) ⟷
(∀x. f x = 0) ∨ (∀x. f x = 2 * x + f 0)" (is "?lhs ⟷ ?rhs")
proof
assume ?lhs
thus ?rhs using f_classification[of f] by blast
next
assume ?rhs
thus ?lhs by smt
qed

end

# Theory IMO2019_Q4

(*
File:    IMO2019_Q4.thy
Author:  Manuel Eberl, TU München
*)
section ‹Q4›
theory IMO2019_Q4
imports "Prime_Distribution_Elementary.More_Dirichlet_Misc"
begin

text ‹
Find all pairs ‹(k, n)› of positive integers such that $k! = \prod_{i=0}^{n-1} (2^n - 2^i)$.
›

subsection ‹Auxiliary facts›

(* TODO: Move stuff from this section? *)
lemma Sigma_insert: "Sigma (insert x A) f = (λy. (x, y))  f x ∪ Sigma A f"
by auto

lemma atLeastAtMost_nat_numeral:
"{(m::nat)..numeral k} =
(if m ≤ numeral k then insert (numeral k) {m..pred_numeral k} else {})"
by (auto simp: numeral_eq_Suc)

lemma greaterThanAtMost_nat_numeral:
"{(m::nat)<..numeral k} =
(if m < numeral k then insert (numeral k) {m<..pred_numeral k} else {})"
by (auto simp: numeral_eq_Suc)

lemma fact_ge_power:
fixes c :: nat
assumes "fact n0 ≥ c ^ n0" "c ≤ n0 + 1"
assumes "n ≥ n0"
shows   "fact n ≥ c ^ n"
using assms(3,1,2)
proof (induction n rule: dec_induct)
case (step n)
have "c * c ^ n ≤ Suc n * fact n"
using step by (intro mult_mono) auto
thus ?case by simp
qed auto

lemma prime_multiplicity_prime:
fixes p q :: "'a :: factorial_semiring"
assumes "prime p" "prime q"
shows   "multiplicity p q = (if p = q then 1 else 0)"
using assms by (auto simp: prime_multiplicity_other)

text ‹
We use Legendre's identity from the library. One could easily prove the property in question
without the library, but it probably still saves a few lines.

@{const legendre_aux} (related to Legendre's identity) is the multiplicity of a given prime
in the prime factorisation of ‹n!›.
›
(* TODO: Move? *)
lemma multiplicity_prime_fact:
fixes p :: nat
assumes "prime p"
shows   "multiplicity p (fact n) = legendre_aux n p"
proof (cases "p ≤ n")
case True
have "fact n = (∏p | prime p ∧ p ≤ n. p ^ legendre_aux n p)"
using legendre_identity'[of "real n"] by simp
also have "multiplicity p … = (∑q | prime q ∧ q ≤ n. multiplicity p (q ^ legendre_aux n q))"
using assms by (subst prime_elem_multiplicity_prod_distrib) auto
also have "… = (∑q∈{p}. legendre_aux n q)"
using assms ‹p ≤ n› prime_multiplicity_other[of p]
by (intro sum.mono_neutral_cong_right)
(auto simp: prime_elem_multiplicity_power_distrib prime_multiplicity_prime split: if_splits)
finally show ?thesis by simp
next
case False
hence "multiplicity p (fact n) = 0"
using assms by (intro not_dvd_imp_multiplicity_0) (auto simp: prime_dvd_fact_iff)
moreover from False have "legendre_aux (real n) p = 0"
by (intro legendre_aux_eq_0) auto
ultimately show ?thesis by simp
qed

text ‹
The following are simple and trivial lower and upper bounds for @{const legendre_aux}:
›
lemma legendre_aux_ge:
assumes "prime p" "k ≥ 1"
shows   "legendre_aux k p ≥ nat ⌊k / p⌋"
proof (cases "k ≥ p")
case True
have "(∑m∈{1}. nat ⌊k / real p ^ m⌋) ≤ (∑m | 0 < m ∧ real p ^ m ≤ k. nat ⌊k / real p ^ m⌋)"
using True finite_sum_legendre_aux[of p] assms by (intro sum_mono2) auto
with assms True show ?thesis by (simp add: legendre_aux_def)
next
case False
with assms have "k / p < 1" by (simp add: field_simps)
hence "nat ⌊k / p⌋ = 0" by simp
with False show ?thesis
qed

lemma legendre_aux_less:
assumes "prime p" "k ≥ 1"
shows   "legendre_aux k p < k / (p - 1)"
proof -
have "(λm. (k / p) * (1 / p) ^ m) sums ((k / p) * (1 / (1 - 1 / p)))"
using assms prime_gt_1_nat[of p] by (intro sums_mult geometric_sums) (auto simp: field_simps)
hence sums: "(λm. k / p ^ Suc m) sums (k / (p - 1))"
using assms prime_gt_1_nat[of p] by (simp add: field_simps of_nat_diff)

have "real (legendre_aux k p) = (∑m∈{0<..nat ⌊log (real p) k⌋}. of_int ⌊k / real p ^ m⌋)"
using assms by (simp add: legendre_aux_altdef1)
also have "… = (∑m<nat ⌊log (real p) k⌋. of_int ⌊k / real p ^ Suc m⌋)"
by (intro sum.reindex_bij_witness[of _ Suc "λi. i - 1"]) (auto simp flip: power_Suc)
also have "… ≤ (∑m<nat ⌊log (real p) k⌋. k / real p ^ Suc m)"
by (intro sum_mono) auto
also have "… < (∑m. k / real p ^ Suc m)"
using sums assms prime_gt_1_nat[of p]
by (intro sum_less_suminf) (auto simp: sums_iff intro!: divide_pos_pos)
also have "… = k / (p - 1)"
using sums by (simp add: sums_iff)
finally show ?thesis
using assms prime_gt_1_nat[of p] by (simp add: of_nat_diff)
qed

subsection ‹Main result›

text ‹
Now we move on to the main result: We fix two numbers ‹n› and ‹k› with the property
in question and derive facts from that.

The triangle number $T = n(n+1)/2$ is of particular importance here, so we introduce an
abbreviation for it.
›

context
fixes k n :: nat and rhs T :: nat
defines "rhs ≡ (∏i<n. 2 ^ n - 2 ^ i)"
defines "T ≡ (n * (n - 1)) div 2"
assumes pos: "k > 0" "n > 0"
assumes k_n: "fact k = rhs"
begin

text ‹
We can rewrite the right-hand side into a more convenient form:
›
lemma rhs_altdef: "rhs = 2 ^ T * (∏i=1..n. 2 ^ i - 1)"
proof -
have "rhs = (∏i<n. 2 ^ i * (2 ^ (n - i) - 1))"
also have "… = 2 ^ (∑i<n. i) * (∏i<n. 2 ^ (n - i) - 1)"
also have "(∑i<n. i) = T"
unfolding T_def using Sum_Ico_nat[of 0 n] by (simp add: atLeast0LessThan)
also have "(∏i<n. 2 ^ (n - i) - 1) = (∏i=1..n. 2 ^ i - 1)"
by (rule prod.reindex_bij_witness[of _ "λi. n - i" "λi. n - i"]) auto
finally show ?thesis .
qed

text ‹
The multiplicity of 2 in the prime factorisation of the right-hand side is precisely ‹T›.
›
lemma multiplicity_2_rhs [simp]: "multiplicity 2 rhs = T"
proof -
have nz: "2 ^ i - 1 ≠ (0 :: nat)" if "i ≥ 1" for i
proof -
from ‹i ≥ 1› have "2 ^ 0 < (2 ^ i :: nat)"
by (intro power_strict_increasing) auto
thus ?thesis by simp
qed

have "multiplicity 2 rhs = T + multiplicity 2 (∏i=1..n. 2 ^ i - 1 :: nat)"
using nz by (simp add: rhs_altdef prime_elem_multiplicity_mult_distrib)
also have "multiplicity 2 (∏i=1..n. 2 ^ i - 1 :: nat) = 0"
by (intro not_dvd_imp_multiplicity_0) (auto simp: prime_dvd_prod_iff)
finally show ?thesis by simp
qed

text ‹
From Legendre's identities and the associated bounds, it can easily be seen that
‹⌊k/2⌋ ≤ T < k›:
›
lemma k_gt_T: "k > T"
proof -
have "T = multiplicity 2 rhs"
by simp
also have "rhs = fact k"
also have "multiplicity 2 (fact k :: nat) = legendre_aux k 2"
also have "… < k"
using legendre_aux_less[of 2 k] pos by simp
finally show ?thesis .
qed

lemma T_ge_half_k: "T ≥ k div 2"
proof -
have "k div 2 ≤ legendre_aux k 2"
using legendre_aux_ge[of 2 k] pos by simp linarith?
also have "… = multiplicity 2 (fact k :: nat)"
also have "… = T" by (simp add: k_n)
finally show "T ≥ k div 2" .
qed

text ‹
It can also be seen fairly easily that the right-hand side is strictly smaller than $2^{n^2}$:
›
lemma rhs_less: "rhs < 2 ^ n⇧2"
proof -
have "rhs = 2 ^ T * (∏i=1..n. 2 ^ i - 1)"
also have "(∏i=1..n. 2 ^ i - 1 :: nat) < (∏i=1..n. 2 ^ i)"
using pos by (intro prod_mono_strict) auto
also have "… = (∏i=0..<n. 2 * 2 ^ i)"
by (intro prod.reindex_bij_witness[of _ Suc "λi. i - 1"]) (auto simp flip: power_Suc)
also have "… = 2 ^ n * 2 ^ (∑i=0..<n. i)"
also have "(∑i=0..<n. i) = T"
unfolding T_def by (simp add: Sum_Ico_nat)
also have "2 ^ T * (2 ^ n * 2 ^ T :: nat) = 2 ^ (2 * T + n)"
also have "2 * T + n = n ^ 2"
by (cases "even n") (auto simp: T_def algebra_simps power2_eq_square)
finally show "rhs < 2 ^ n⇧2"
by simp
qed

text ‹
It is clear that $2^{n^2} \leq 8^T$ and that $8^T < T!$ if $T$ is sufficiently big.
In this case, sufficiently big' means ‹T ≥ 20› and thereby ‹n ≥ 7›. We can therefore
conclude that ‹n› must be less than 7.
›
lemma n_less_7: "n < 7"
proof (rule ccontr)
assume "¬n < 7"
hence "n ≥ 7" by simp
have "T ≥ (7 * 6) div 2"
unfolding T_def using ‹n ≥ 7› by (intro div_le_mono mult_mono) auto
hence "T ≥ 21" by simp

from ‹n ≥ 7› have "(n * 2) div 2 ≤ T"
unfolding T_def by (intro div_le_mono) auto
hence "T ≥ n" by simp

from ‹T ≥ 21› have "sqrt (2 * pi * T) * (T / exp 1) ^ T ≤ fact T"
using fact_bounds[of T] by simp
have "fact T ≤ (fact k :: nat)"
using k_gt_T by (intro fact_mono) (auto simp: T_def)
also have "… = rhs" by fact
also have "rhs < 2 ^ n⇧2" by (rule rhs_less)
also have "n⇧2 = 2 * T + n"
by (cases "even n") (auto simp: T_def algebra_simps power2_eq_square)
also have "… ≤ 3 * T"
using ‹T ≥ n› by (simp add: T_def)
also have "2 ^ (3 * T) = (8 ^ T :: nat)"
finally have "fact T < (8 ^ T :: nat)"
by simp
moreover have "fact T ≥ (8 ^ T :: nat)"
by (rule fact_ge_power[of _ 20]) (use ‹T ≥ 21› in ‹auto simp: fact_numeral›)
ultimately show False by simp
qed

text ‹
We now only have 6 values for ‹n› to check. Together with the bounds that we obtained on ‹k›,
this only leaves a few combinations of ‹n› and ‹k› to check, and we do precisely that
and find that ‹n = k = 1› and ‹n = 2, k = 3› are the only possible combinations.
›
lemma n_k_in_set: "(n, k) ∈ {(1, 1), (2, 3)}"
proof -
define T' where "T' = (λn :: nat. n * (n - 1) div 2)"
define A :: "(nat × nat) set" where "A = (SIGMA n:{1..6}. {T' n<..2 * T' n + 1})"
define P where "P = (λ(n, k). fact k = (∏i<n. 2 ^ n - 2 ^ i :: nat))"
have [simp]: "{0<..Suc 0} = {1}" by auto
have "(n, k) ∈ Set.filter P A"
using k_n pos T_ge_half_k k_gt_T n_less_7
by (auto simp: A_def T'_def T_def Set.filter_def P_def rhs_def)
also have "Set.filter P A = {(1, 1), (2, 3)}"
by (simp add: P_def Set_filter_insert A_def atMost_nat_numeral atMost_Suc T'_def Sigma_insert
greaterThanAtMost_nat_numeral atLeastAtMost_nat_numeral lessThan_nat_numeral fact_numeral
cong: if_weak_cong)
finally show ?thesis .
qed

end

text ‹
Using this, deriving the final result is now trivial:
›
theorem "{(n, k). n > 0 ∧ k > 0 ∧ fact k = (∏i<n. 2 ^ n - 2 ^ i :: nat)} = {(1, 1), (2, 3)}"
(is "?lhs = ?rhs")
proof
show "?lhs ⊆ ?rhs" using n_k_in_set by blast
show "?rhs ⊆ ?lhs" by (auto simp: fact_numeral lessThan_nat_numeral)
qed

end

# Theory IMO2019_Q5

(*
File:    IMO2019_Q5.thy
Author:  Manuel Eberl, TU München
*)
section ‹Q5›
theory IMO2019_Q5
imports Complex_Main
begin

text ‹
Given a sequence $(c_1,\ldots, c_n)$ of coins, each of which can be heads (‹H›) or tails (‹T›),
Harry performs the following process: Let ‹k› be the number of coins that show ‹H›. If ‹k > 0›,
flip the ‹k›-th coin and repeat the process. Otherwise, stop.

What is the average number of steps that this process takes, averaged over all $2^n$
coin sequences of length ‹n›?
›

subsection ‹Definition›

text ‹
We represent coins as Booleans, where @{term True} indicates ‹H› and @{term False} indicates ‹T›.
Coin sequences are then simply lists of Booleans.

The following function flips the ‹i›-th coin in the sequence (in Isabelle, the convention is that
the first list element is indexed with 0).
›
definition flip :: "bool list ⇒ nat ⇒ bool list" where
"flip xs i = xs[i := ¬xs ! i]"

lemma flip_Cons_pos [simp]: "n > 0 ⟹ flip (x # xs) n = x # flip xs (n - 1)"
by (cases n) (auto simp: flip_def)

lemma flip_Cons_0 [simp]: "flip (x # xs) 0 = (¬x) # xs"

lemma flip_append1 [simp]: "n < length xs ⟹ flip (xs @ ys) n = flip xs n @ ys"
and flip_append2 [simp]: "n ≥ length xs ⟹ n < length xs + length ys ⟹
flip (xs @ ys) n = xs @ flip ys (n - length xs)"
by (auto simp: flip_def list_update_append nth_append)

lemma length_flip [simp]: "length (flip xs i) = length xs"

text ‹
The following function computes the number of ‹H› in a coin sequence.
›
definition heads :: "bool list ⇒ nat" where "heads xs = length (filter id xs)"

by (induction xs) (auto simp: heads_Cons)

lemma heads_eq_0 [simp]: "True ∉ set xs ⟹ heads xs = 0"
by (induction xs) (auto simp: heads_Cons)

lemma heads_eq_0_iff [simp]: "heads xs = 0 ⟷ True ∉ set xs"
by (induction xs) (auto simp: heads_Cons)

lemma heads_pos_iff [simp]: "heads xs > 0 ⟷ True ∈ set xs"
by (induction xs) (auto simp: heads_Cons)

text ‹
The following function performs a single step of Harry's process.
›
definition harry_step :: "bool list ⇒ bool list" where
"harry_step xs = flip xs (heads xs - 1)"

lemma length_harry_step [simp]: "length (harry_step xs) = length xs"

text ‹
The following is the measure function for Harry's process, i.e. how many steps the process takes
to terminate starting from the given sequence. We define it like this now and prove the
correctness later.
›
function harry_meas where
"harry_meas xs =
(if xs = [] then 0
else if hd xs then 1 + harry_meas (tl xs)
else if ¬last xs then harry_meas (butlast xs)
else let n = length xs in harry_meas (take (n - 2) (tl xs)) + 2 * n - 1)"
by auto
termination by (relation "Wellfounded.measure length") (auto simp: min_def)

lemmas [simp del] = harry_meas.simps

text ‹
We now prove some simple properties of @{const harry_meas} and @{const harry_step}.
›

text ‹
We prove a more convenient case distinction rule for lists that allows us to
distinguish between lists starting with @{term True}, ending with @{term False}, and
starting with @{term False} and ending with @{term True}.
›
lemma head_last_cases [case_names Nil True False False_True]:
assumes "xs = [] ⟹ P"
assumes "⋀ys. xs = True # ys ⟹ P" "⋀ys. xs = ys @ [False] ⟹ P"
"⋀ys. xs = False # ys @ [True] ⟹ P"
shows   "P"
proof -
consider "length xs = 0" | "length xs = 1" | "length xs ≥ 2" by linarith
thus ?thesis
proof cases
assume "length xs = 1"
hence "xs = [hd xs]" by (cases xs) auto
thus P using assms(2)[of "[]"] assms(3)[of "[]"] by (cases "hd xs") auto
next
assume len: "length xs ≥ 2"
from len obtain x xs' where *: "xs = x # xs'"
by (cases xs) auto
have **: "xs' = butlast xs' @ [last xs']"
using len by (subst append_butlast_last_id) (auto simp: *)
have [simp]: "xs = x # butlast xs' @ [last xs']"
by (subst *, subst **) auto
show P
using assms(2)[of xs'] assms(3)[of "x # butlast xs'"] assms(4)[of "butlast xs'"] **
by (cases x; cases "last xs'") auto
qed (use assms in auto)
qed

lemma harry_meas_Nil [simp]: "harry_meas [] = 0"

lemma harry_meas_True_start [simp]: "harry_meas (True # xs) = 1 + harry_meas xs"
by (subst harry_meas.simps) auto

lemma harry_meas_False_end [simp]: "harry_meas (xs @ [False]) = harry_meas xs"
proof (induction xs)
case (Cons x xs)
thus ?case by (cases x) (auto simp: harry_meas.simps)
qed (auto simp: harry_meas.simps)

lemma harry_meas_False_True: "harry_meas (False # xs @ [True]) = harry_meas xs + 2 * length xs + 3"
by (subst harry_meas.simps) auto

lemma harry_meas_eq_0 [simp]:
assumes "True ∉ set xs"
shows   "harry_meas xs = 0"
using assms by (induction xs rule: rev_induct) auto

text ‹
If the sequence starts with ‹H›, the process runs on the remaining sequence until it
terminates and then flips this ‹H› in another single step.
›
lemma harry_step_True_start [simp]:
"harry_step (True # xs) = (if True ∈ set xs then True # harry_step xs else False # xs)"
by (auto simp: harry_step_def)

text ‹
If the sequence ends in ‹T›, the process simply runs on the remaining sequence as if it
were not present.
›
lemma harry_step_False_end [simp]:
assumes "True ∈ set xs"
shows   "harry_step (xs @ [False]) = harry_step xs @ [False]"
proof -
have "harry_step (xs @ [False]) = flip (xs @ [False]) (heads xs - 1)"
using heads_le_length[of xs] by (auto simp: harry_step_def)
also have "… = harry_step xs @ [False]"
by (subst flip_append1; fastforce simp: harry_step_def)
finally show ?thesis .
qed

text ‹
If the sequence starts with ‹T› and ends with ‹H›, the process runs on the remaining
sequence inbetween as if these two were not present, eventually leaving a sequence that
consists entirely if ‹T› except for a single final ‹H›.
›
lemma harry_step_False_True:
assumes "True ∈ set xs"
shows "harry_step (False # xs @ [True]) = False # harry_step xs @ [True]"
proof -
have "harry_step (False # xs @ [True]) = False # flip (xs @ [True]) (heads xs - 1)"
also have "… = False # harry_step xs @ [True]"
using assms by (subst flip_append1)
(auto simp: harry_step_def Suc_less_SucD heads_le_length less_Suc_eq_le)
finally show ?thesis .
qed

text ‹
That sequence consisting only of ‹T› except for a single final ‹H› is then turned into
an all-‹T› sequence in ‹2n+1› steps.
›
lemma harry_meas_Falses_True [simp]: "harry_meas (replicate n False @ [True]) = 2 * n + 1"
proof (cases "n = 0")
case False
hence "replicate n False @ [True] = False # replicate (n - 1) False @ [True]"
by (cases n) auto
also have "harry_meas … = 2 * n + 1"
using False by (simp add: harry_meas_False_True algebra_simps)
finally show ?thesis .
qed auto

lemma harry_step_Falses_True [simp]:
"n > 0 ⟹ harry_step (replicate n False @ [True]) = True # replicate (n - 1) False @ [True]"
by (cases n) (simp_all add: harry_step_def)

subsection ‹Correctness of the measure›

text ‹
We will now show that @{const harry_meas} indeed counts the length of the process.
As a first step, we will show that if there is a ‹H› in a sequence, applying a single step
decreases the measure by one.
›
lemma harry_meas_step_aux:
assumes "True ∈ set xs"
shows   "harry_meas xs = Suc (harry_meas (harry_step xs))"
using assms
proof (induction xs rule: length_induct)
case (1 xs)
hence IH: "harry_meas ys = Suc (harry_meas (harry_step ys))"
if "length ys < length xs" "True ∈ set ys" for ys
using that by blast

show ?case
case (True ys)
thus ?thesis by (auto simp: IH)
next
case (False ys)
thus ?thesis using "1.prems" by (auto simp: IH)
next
case (False_True ys)
thus ?thesis
proof (cases "True ∈ set ys")
case False
define n where "n = length ys + 1"
have "n > 0" by (simp add: n_def)
from False have "ys = replicate (n - 1) False"
unfolding n_def by (induction ys) auto
with False_True ‹n > 0› have [simp]: "xs = replicate n False @ [True]"
by (cases n) auto
show ?thesis using ‹n > 0› by auto
qed (auto simp: IH False_True harry_step_False_True harry_meas_False_True)
qed (use 1 in auto)
qed

lemma harry_meas_step: "True ∈ set xs ⟹ harry_meas (harry_step xs) = harry_meas xs - 1"
using harry_meas_step_aux[of xs] by simp

text ‹
Next, we show that the measure is zero if and only if there is no ‹H› left in the sequence.
›
lemma harry_meas_eq_0_iff [simp]: "harry_meas xs = 0 ⟷ True ∉ set xs"
proof (induction xs rule: length_induct)
case (1 xs)
show ?case
by (cases xs rule: head_last_cases) (auto simp: 1 harry_meas_False_True 1)
qed

text ‹
It follows by induction that if the measure of a sequence is ‹n›, then iterating the step
less than ‹n› times yields a sequence with at least one ‹H› in it, but iterating it exactly
‹n› times yields a sequence that contains no more ‹H›.
›
lemma True_in_funpow_harry_step:
assumes "n < harry_meas xs"
shows   "True ∈ set ((harry_step ^^ n) xs)"
using assms
proof (induction n arbitrary: xs)
case 0
show ?case by (rule ccontr) (use 0 in auto)
next
case (Suc n)
have "True ∈ set xs" by (rule ccontr) (use Suc in auto)
have "(harry_step ^^ Suc n) xs = (harry_step ^^ n) (harry_step xs)"
by (simp only: funpow_Suc_right o_def)
also have "True ∈ set …"
using Suc ‹True ∈ set xs› by (intro Suc) (auto simp: harry_meas_step)
finally show ?case .
qed

lemma True_notin_funpow_harry_step: "True ∉ set ((harry_step ^^ harry_meas xs) xs)"
proof (induction "harry_meas xs" arbitrary: xs)
case (Suc n)
have "True ∈ set xs" by (rule ccontr) (use Suc in auto)
have "(harry_step ^^ harry_meas xs) xs = (harry_step ^^ Suc n) xs"
by (simp only: Suc)
also have "… = (harry_step ^^ n) (harry_step xs)"
by (simp only: funpow_Suc_right o_def)
also have "… = (harry_step ^^ (harry_meas xs - 1)) (harry_step xs)"
by (simp flip: Suc(2))
also have "harry_meas xs - 1 = harry_meas (harry_step xs)"
using ‹True ∈ set xs› by (subst harry_meas_step) auto
also have "True ∉ set ((harry_step ^^ …) (harry_step xs))"
using Suc ‹True ∈ set xs› by (intro Suc) (auto simp: harry_meas_step)
finally show ?case .
qed auto

text ‹
This shows that the measure is indeed the correct one: It is the smallest number such that
iterating Harry's step that often yields a sequence with no heads in it.
›
theorem "harry_meas xs = (LEAST n. True ∉ set ((harry_step ^^ n) xs))"
proof (rule sym, rule Least_equality, goal_cases)
show "True ∉ set ((harry_step ^^ harry_meas xs) xs)"
by (rule True_notin_funpow_harry_step)
next
case (2 y)
show ?case
by (rule ccontr) (use 2 True_in_funpow_harry_step[of y] in auto)
qed

subsection ‹Average-case analysis›

text ‹
The set of all coin sequences of a given length.
›
definition seqs where "seqs n = {xs :: bool list . length xs = n}"

lemma length_seqs [dest]: "xs ∈ seqs n ⟹ length xs = n"

lemma seqs_0 [simp]: "seqs 0 = {[]}"
by (auto simp: seqs_def)

text ‹
The coin sequences of length ‹n + 1› are simply what is obtained by appending either ‹H›
or ‹T› to each coin sequence of length ‹n›.
›
lemma seqs_Suc: "seqs (Suc n) = (λxs. True # xs)  seqs n ∪ (λxs. False # xs)  seqs n"
by (auto simp: seqs_def length_Suc_conv)

text ‹
The set of coin sequences of length ‹n› is invariant under reversal.
›
lemma seqs_rev [simp]: "rev  seqs n = seqs n"
proof
show "rev  seqs n ⊆ seqs n"
by (auto simp: seqs_def)
hence "rev  rev  seqs n ⊆ rev  seqs n"
by blast
thus "seqs n ⊆ rev  seqs n" by (simp add: image_image)
qed

text ‹
Hence we get a similar decomposition theorem that appends at the end.
›
lemma seqs_Suc': "seqs (Suc n) = (λxs. xs @ [True])  seqs n ∪ (λxs. xs @ [False])  seqs n"
proof -
have "rev  rev  ((λxs. xs @ [True])  seqs n ∪ (λxs. xs @ [False])  seqs n) =
rev  ((λxs. True # xs)  rev  seqs n ∪ (λxs. False # xs)  rev  seqs n)"
unfolding image_Un image_image by simp
also have "(λxs. True # xs)  rev  seqs n ∪ (λxs. False # xs)  rev  seqs n = seqs (Suc n)"
finally show ?thesis by (simp add: image_image)
qed

lemma finite_seqs [intro]: "finite (seqs n)"
by (induction n) (auto simp: seqs_Suc)

lemma card_seqs [simp]: "card (seqs n) = 2 ^ n"
proof (induction n)
case (Suc n)
have "card (seqs (Suc n)) = card ((#) True  seqs n ∪ (#) False  seqs n)"
by (auto simp: seqs_Suc)
also from Suc.IH have "… = 2 ^ Suc n"
by (subst card_Un_disjoint) (auto simp: card_image)
finally show ?case .
qed auto

lemmas seqs_code [code] = seqs_0 seqs_Suc

text ‹
The sum of the measures over all possible coin sequences of a given length (defined
as a recurrence relation; correctness proven later).
›
fun harry_sum :: "nat ⇒ nat" where
"harry_sum 0 = 0"
| "harry_sum (Suc 0) = 1"
| "harry_sum (Suc (Suc n)) = 2 * harry_sum (Suc n) + (2 * n + 4) * 2 ^ n"

lemma Suc_Suc_induct: "P 0 ⟹ P (Suc 0) ⟹ (⋀n. P n ⟹ P (Suc n) ⟹ P (Suc (Suc n))) ⟹ P n"
by induction_schema (pat_completeness, rule wf_measure[of id], auto)

text ‹
The recurrence relation really does describe the sum over all measures:
›
lemma harry_sum_correct: "harry_sum n = sum harry_meas (seqs n)"
proof (induction n rule: Suc_Suc_induct)
case (3 n)
have "seqs (Suc (Suc n)) =
(λxs. xs @ [False])  seqs (Suc n) ∪
(λxs. True # xs @ [True])  seqs n ∪
(λxs. False # xs @ [True])  seqs n"
by (subst (1) seqs_Suc, subst (1 2) seqs_Suc') (simp add: image_Un image_image Un_ac seqs_Suc)
also have "int (sum harry_meas …) =
int (harry_sum (Suc n)) +
int (∑xs∈seqs n. 1 + harry_meas (xs @ [True])) +
int (∑xs∈seqs n. harry_meas (False # xs @ [True]))"
by (subst sum.union_disjoint sum.reindex, auto simp: inj_on_def 3)+
also have "int (∑xs∈seqs n. 1 + harry_meas (xs @ [True])) =
2 ^ n + int (∑xs∈seqs n. harry_meas (xs @ [True]))"
by (subst sum.distrib) auto
also have "(∑xs∈seqs n. harry_meas (False # xs @ [True])) = harry_sum n + (2 * n + 3) * 2 ^ n"
by (auto simp: 3 harry_meas_False_True sum.distrib algebra_simps length_seqs)
also have "harry_sum (Suc n) = (∑xs∈seqs n. harry_meas (xs @ [True])) + harry_sum n"
unfolding seqs_Suc' 3 by (subst sum.union_disjoint sum.reindex, auto simp: inj_on_def)+
hence "int (∑xs∈seqs n. harry_meas (xs @ [True])) = int (harry_sum (Suc n)) - int (harry_sum n)"
by simp
finally have "int (∑x∈seqs (Suc (Suc n)). harry_meas x) =
int (2 * harry_sum (Suc n) + (2 * n + 4) * 2 ^ n)"
hence "(∑x∈seqs (Suc (Suc n)). (harry_meas x)) =
(2 * harry_sum (Suc n) + (2 * n + 4) * 2 ^ n)" by linarith
thus ?case by simp
qed (auto simp: seqs_Suc)

lemma harry_sum_closed_form_aux: "4 * harry_sum n = n * (n + 1) * 2 ^ n"
by (induction n rule: harry_sum.induct) (auto simp: algebra_simps)

text ‹
Solving the recurrence gives us the following solution:
›
theorem harry_sum_closed_form: "harry_sum n = n * (n + 1) * 2 ^ n div 4"
using harry_sum_closed_form_aux[of n] by simp

text ‹
The average is now a simple consequence:
›
definition harry_avg where "harry_avg n = harry_sum n / card (seqs n)"

corollary "harry_avg n = n * (n + 1) / 4"
proof -
have "real (4 * harry_sum n) = n * (n + 1) * 2 ^ n"
by (subst harry_sum_closed_form_aux) auto
hence "real (harry_sum n) = n * (n + 1) * 2 ^ n / 4"
end