# Theory Folds

(* Author: Xingyuan Zhang, Chunhan Wu, Christian Urban *)
theory Folds
imports "Regular-Sets.Regular_Exp"
begin

section ‹Summation'' for regular expressions›

text ‹
To obtain equational system out of finite set of equivalence classes, a fold operation
on finite sets ‹folds› is defined. The use of ‹SOME› makes ‹folds›
more robust than the ‹fold› in the Isabelle library. The expression ‹folds f›
makes sense when ‹f› is not ‹associative› and ‹commutitive›,
while ‹fold f› does not.
›

definition
folds :: "('a ⇒ 'b ⇒ 'b) ⇒ 'b ⇒ 'a set ⇒ 'b"
where
"folds f z S ≡ SOME x. fold_graph f z S x"

text ‹Plus-combination for a set of regular expressions›

abbreviation
Setalt :: "'a rexp set ⇒ 'a rexp" ("⨄_"  999)
where
"⨄A ≡ folds Plus Zero A"

text ‹
For finite sets, @{term Setalt} is preserved under @{term lang}.
›

lemma folds_plus_simp [simp]:
fixes rs::"('a rexp) set"
assumes a: "finite rs"
shows "lang (⨄rs) = ⋃ (lang  rs)"
unfolding folds_def
apply(rule set_eqI)
apply(rule someI2_ex)
apply(rule_tac finite_imp_fold_graph[OF a])
apply(erule fold_graph.induct)
apply(auto)
done

end


# Theory Myhill_1

(* Author: Xingyuan Zhang, Chunhan Wu, Christian Urban *)
theory Myhill_1
imports Folds
"HOL-Library.While_Combinator"
begin

section ‹First direction of MN: ‹finite partition ⇒ regular language››

notation
conc (infixr "⋅" 100) and
star ("_⋆"  102)

lemma Pair_Collect [simp]:
shows "(x, y) ∈ {(x, y). P x y} ⟷ P x y"
by simp

text ‹Myhill-Nerode relation›

definition
str_eq :: "'a lang ⇒ ('a list × 'a list) set" ("≈_"  100)
where
"≈A ≡ {(x, y).  (∀z. x @ z ∈ A ⟷ y @ z ∈ A)}"

abbreviation
str_eq_applied :: "'a list ⇒ 'a lang ⇒ 'a list ⇒ bool" ("_ ≈_ _")
where
"x ≈A y ≡ (x, y) ∈ ≈A"

lemma str_eq_conv_Derivs:
"str_eq A = {(u,v). Derivs u A = Derivs v A}"
by (auto simp: str_eq_def Derivs_def)

definition
finals :: "'a lang ⇒ 'a lang set"
where
"finals A ≡ {≈A  {s} | s . s ∈ A}"

lemma lang_is_union_of_finals:
shows "A = ⋃(finals A)"
unfolding finals_def
unfolding Image_def
unfolding str_eq_def
by (auto) (metis append_Nil2)

lemma finals_in_partitions:
shows "finals A ⊆ (UNIV // ≈A)"
unfolding finals_def quotient_def
by auto

subsection ‹Equational systems›

text ‹The two kinds of terms in the rhs of equations.›

datatype 'a trm =
Lam "'a rexp"            (* Lambda-marker *)
| Trn "'a lang" "'a rexp"     (* Transition *)

fun
lang_trm::"'a trm ⇒ 'a lang"
where
"lang_trm (Lam r) = lang r"
| "lang_trm (Trn X r) = X ⋅ lang r"

fun
lang_rhs::"('a trm) set ⇒ 'a lang"
where
"lang_rhs rhs = ⋃ (lang_trm  rhs)"

lemma lang_rhs_set:
shows "lang_rhs {Trn X r | r. P r} = ⋃{lang_trm (Trn X r) | r. P r}"
by (auto)

lemma lang_rhs_union_distrib:
shows "lang_rhs A ∪ lang_rhs B = lang_rhs (A ∪ B)"
by simp

text ‹Transitions between equivalence classes›

definition
transition :: "'a lang ⇒ 'a ⇒ 'a lang ⇒ bool" ("_ ⊨_⇒_" [100,100,100] 100)
where
"Y ⊨c⇒ X ≡ Y ⋅ {[c]} ⊆ X"

text ‹Initial equational system›

definition
"Init_rhs CS X ≡
if ([] ∈ X) then
{Lam One} ∪ {Trn Y (Atom c) | Y c. Y ∈ CS ∧ Y ⊨c⇒ X}
else
{Trn Y (Atom c)| Y c. Y ∈ CS ∧ Y ⊨c⇒ X}"

definition
"Init CS ≡ {(X, Init_rhs CS X) | X.  X ∈ CS}"

subsection ‹Arden Operation on equations›

fun
Append_rexp :: "'a rexp ⇒ 'a trm ⇒ 'a trm"
where
"Append_rexp r (Lam rexp)   = Lam (Times rexp r)"
| "Append_rexp r (Trn X rexp) = Trn X (Times rexp r)"

definition
"Append_rexp_rhs rhs rexp ≡ (Append_rexp rexp)  rhs"

definition
"Arden X rhs ≡
Append_rexp_rhs (rhs - {Trn X r | r. Trn X r ∈ rhs}) (Star (⨄ {r. Trn X r ∈ rhs}))"

subsection ‹Substitution Operation on equations›

definition
"Subst rhs X xrhs ≡
(rhs - {Trn X r | r. Trn X r ∈ rhs}) ∪ (Append_rexp_rhs xrhs (⨄ {r. Trn X r ∈ rhs}))"

definition
Subst_all :: "('a lang × ('a trm) set) set ⇒ 'a lang ⇒ ('a trm) set ⇒ ('a lang × ('a trm) set) set"
where
"Subst_all ES X xrhs ≡ {(Y, Subst yrhs X xrhs) | Y yrhs. (Y, yrhs) ∈ ES}"

definition
"Remove ES X xrhs ≡
Subst_all  (ES - {(X, xrhs)}) X (Arden X xrhs)"

subsection ‹While-combinator and invariants›

definition
"Iter X ES ≡ (let (Y, yrhs) = SOME (Y, yrhs). (Y, yrhs) ∈ ES ∧ X ≠ Y
in Remove ES Y yrhs)"

lemma IterI2:
assumes "(Y, yrhs) ∈ ES"
and     "X ≠ Y"
and     "⋀Y yrhs. ⟦(Y, yrhs) ∈ ES; X ≠ Y⟧ ⟹ Q (Remove ES Y yrhs)"
shows "Q (Iter X ES)"
unfolding Iter_def using assms
by (rule_tac a="(Y, yrhs)" in someI2) (auto)

abbreviation
"Cond ES ≡ card ES ≠ 1"

definition
"Solve X ES ≡ while Cond (Iter X) ES"

definition
"distinctness ES ≡
∀ X rhs rhs'. (X, rhs) ∈ ES ∧ (X, rhs') ∈ ES ⟶ rhs = rhs'"

definition
"soundness ES ≡ ∀(X, rhs) ∈ ES. X = lang_rhs rhs"

definition
"ardenable rhs ≡ (∀ Y r. Trn Y r ∈ rhs ⟶ [] ∉ lang r)"

definition
"ardenable_all ES ≡ ∀(X, rhs) ∈ ES. ardenable rhs"

definition
"finite_rhs ES ≡ ∀(X, rhs) ∈ ES. finite rhs"

lemma finite_rhs_def2:
"finite_rhs ES = (∀ X rhs. (X, rhs) ∈ ES ⟶ finite rhs)"
unfolding finite_rhs_def by auto

definition
"rhss rhs ≡ {X | X r. Trn X r ∈ rhs}"

definition
"lhss ES ≡ {Y | Y yrhs. (Y, yrhs) ∈ ES}"

definition
"validity ES ≡ ∀(X, rhs) ∈ ES. rhss rhs ⊆ lhss ES"

lemma rhss_union_distrib:
shows "rhss (A ∪ B) = rhss A ∪ rhss B"
by (auto simp add: rhss_def)

lemma lhss_union_distrib:
shows "lhss (A ∪ B) = lhss A ∪ lhss B"
by (auto simp add: lhss_def)

definition
"invariant ES ≡ finite ES
∧ finite_rhs ES
∧ soundness ES
∧ distinctness ES
∧ ardenable_all ES
∧ validity ES"

lemma invariantI:
assumes "soundness ES" "finite ES" "distinctness ES" "ardenable_all ES"
"finite_rhs ES" "validity ES"
shows "invariant ES"
using assms by (simp add: invariant_def)

declare [[simproc add: finite_Collect]]

lemma finite_Trn:
assumes fin: "finite rhs"
shows "finite {r. Trn Y r ∈ rhs}"
using assms by (auto intro!: finite_vimageI simp add: inj_on_def)

lemma finite_Lam:
assumes fin: "finite rhs"
shows "finite {r. Lam r ∈ rhs}"
using assms by (auto intro!: finite_vimageI simp add: inj_on_def)

lemma trm_soundness:
assumes finite:"finite rhs"
shows "lang_rhs ({Trn X r| r. Trn X r ∈ rhs}) = X ⋅ (lang (⨄{r. Trn X r ∈ rhs}))"
proof -
have "finite {r. Trn X r ∈ rhs}"
by (rule finite_Trn[OF finite])
then show "lang_rhs ({Trn X r| r. Trn X r ∈ rhs}) = X ⋅ (lang (⨄{r. Trn X r ∈ rhs}))"
by (simp only: lang_rhs_set lang_trm.simps) (auto simp add: conc_def)
qed

lemma lang_of_append_rexp:
"lang_trm (Append_rexp r trm) = lang_trm trm ⋅ lang r"
by (induct rule: Append_rexp.induct)
(auto simp add: conc_assoc)

lemma lang_of_append_rexp_rhs:
"lang_rhs (Append_rexp_rhs rhs r) = lang_rhs rhs ⋅ lang r"
unfolding Append_rexp_rhs_def
by (auto simp add: conc_def lang_of_append_rexp)

subsection ‹Intial Equational Systems›

lemma defined_by_str:
assumes "s ∈ X" "X ∈ UNIV // ≈A"
shows "X = ≈A  {s}"
using assms
unfolding quotient_def Image_def str_eq_def
by auto

lemma every_eqclass_has_transition:
assumes has_str: "s @ [c] ∈ X"
and     in_CS:   "X ∈ UNIV // ≈A"
obtains Y where "Y ∈ UNIV // ≈A" and "Y ⋅ {[c]} ⊆ X" and "s ∈ Y"
proof -
define Y where "Y = ≈A  {s}"
have "Y ∈ UNIV // ≈A"
unfolding Y_def quotient_def by auto
moreover
have "X = ≈A  {s @ [c]}"
using has_str in_CS defined_by_str by blast
then have "Y ⋅ {[c]} ⊆ X"
unfolding Y_def Image_def conc_def
unfolding str_eq_def
by clarsimp
moreover
have "s ∈ Y" unfolding Y_def
unfolding Image_def str_eq_def by simp
ultimately show thesis using that by blast
qed

lemma l_eq_r_in_eqs:
assumes X_in_eqs: "(X, rhs) ∈ Init (UNIV // ≈A)"
shows "X = lang_rhs rhs"
proof
show "X ⊆ lang_rhs rhs"
proof
fix x
assume in_X: "x ∈ X"
{ assume empty: "x = []"
then have "x ∈ lang_rhs rhs" using X_in_eqs in_X
unfolding Init_def Init_rhs_def
by auto
}
moreover
{ assume not_empty: "x ≠ []"
then obtain s c where decom: "x = s @ [c]"
using rev_cases by blast
have "X ∈ UNIV // ≈A" using X_in_eqs unfolding Init_def by auto
then obtain Y where "Y ∈ UNIV // ≈A" "Y ⋅ {[c]} ⊆ X" "s ∈ Y"
using decom in_X every_eqclass_has_transition by metis
then have "x ∈ lang_rhs {Trn Y (Atom c)| Y c. Y ∈ UNIV // ≈A ∧ Y ⊨c⇒ X}"
unfolding transition_def
using decom by (force simp add: conc_def)
then have "x ∈ lang_rhs rhs" using X_in_eqs in_X
unfolding Init_def Init_rhs_def by simp
}
ultimately show "x ∈ lang_rhs rhs" by blast
qed
next
show "lang_rhs rhs ⊆ X" using X_in_eqs
unfolding Init_def Init_rhs_def transition_def
by auto
qed

lemma finite_Init_rhs:
fixes CS::"(('a::finite) lang) set"
assumes finite: "finite CS"
shows "finite (Init_rhs CS X)"
using assms unfolding Init_rhs_def transition_def by simp

lemma Init_ES_satisfies_invariant:
fixes A::"(('a::finite) lang)"
assumes finite_CS: "finite (UNIV // ≈A)"
shows "invariant (Init (UNIV // ≈A))"
proof (rule invariantI)
show "soundness (Init (UNIV // ≈A))"
unfolding soundness_def
using l_eq_r_in_eqs by auto
show "finite (Init (UNIV // ≈A))" using finite_CS
unfolding Init_def by simp
show "distinctness (Init (UNIV // ≈A))"
unfolding distinctness_def Init_def by simp
show "ardenable_all (Init (UNIV // ≈A))"
unfolding ardenable_all_def Init_def Init_rhs_def ardenable_def
by auto
show "finite_rhs (Init (UNIV // ≈A))"
using finite_Init_rhs[OF finite_CS]
unfolding finite_rhs_def Init_def by auto
show "validity (Init (UNIV // ≈A))"
unfolding validity_def Init_def Init_rhs_def rhss_def lhss_def
by auto
qed

subsection ‹Interations›

lemma Arden_preserves_soundness:
assumes l_eq_r: "X = lang_rhs rhs"
and not_empty: "ardenable rhs"
and finite: "finite rhs"
shows "X = lang_rhs (Arden X rhs)"
proof -
define A where "A = lang (⨄{r. Trn X r ∈ rhs})"
define b where "b = {Trn X r | r. Trn X r ∈ rhs}"
define B where "B = lang_rhs (rhs - b)"
have not_empty2: "[] ∉ A"
using finite_Trn[OF finite] not_empty
unfolding A_def ardenable_def by simp
have "X = lang_rhs rhs" using l_eq_r by simp
also have "… = lang_rhs (b ∪ (rhs - b))" unfolding b_def by auto
also have "… = lang_rhs b ∪ B" unfolding B_def by (simp only: lang_rhs_union_distrib)
also have "… = X ⋅ A ∪ B"
unfolding b_def
unfolding trm_soundness[OF finite]
unfolding A_def
by blast
finally have "X = X ⋅ A ∪ B" .
then have "X = B ⋅ A⋆"
by (simp add: reversed_Arden[OF not_empty2])
also have "… = lang_rhs (Arden X rhs)"
unfolding Arden_def A_def B_def b_def
by (simp only: lang_of_append_rexp_rhs lang.simps)
finally show "X = lang_rhs (Arden X rhs)" by simp
qed

lemma Append_preserves_finite:
"finite rhs ⟹ finite (Append_rexp_rhs rhs r)"
by (auto simp: Append_rexp_rhs_def)

lemma Arden_preserves_finite:
"finite rhs ⟹ finite (Arden X rhs)"
by (auto simp: Arden_def Append_preserves_finite)

lemma Append_preserves_ardenable:
"ardenable rhs ⟹ ardenable (Append_rexp_rhs rhs r)"
apply (auto simp: ardenable_def Append_rexp_rhs_def)
by (case_tac x, auto simp: conc_def)

lemma ardenable_set_sub:
"ardenable rhs ⟹ ardenable (rhs - A)"
by (auto simp:ardenable_def)

lemma ardenable_set_union:
"⟦ardenable rhs; ardenable rhs'⟧ ⟹ ardenable (rhs ∪ rhs')"
by (auto simp:ardenable_def)

lemma Arden_preserves_ardenable:
"ardenable rhs ⟹ ardenable (Arden X rhs)"
by (simp only:Arden_def Append_preserves_ardenable ardenable_set_sub)

lemma Subst_preserves_ardenable:
"⟦ardenable rhs; ardenable xrhs⟧ ⟹ ardenable (Subst rhs X xrhs)"
by (simp only: Subst_def Append_preserves_ardenable ardenable_set_union ardenable_set_sub)

lemma Subst_preserves_soundness:
assumes substor: "X = lang_rhs xrhs"
and finite: "finite rhs"
shows "lang_rhs (Subst rhs X xrhs) = lang_rhs rhs" (is "?Left = ?Right")
proof-
define A where "A = lang_rhs (rhs - {Trn X r | r. Trn X r ∈ rhs})"
have "?Left = A ∪ lang_rhs (Append_rexp_rhs xrhs (⨄{r. Trn X r ∈ rhs}))"
unfolding Subst_def
unfolding lang_rhs_union_distrib[symmetric]
by (simp add: A_def)
moreover have "?Right = A ∪ lang_rhs {Trn X r | r. Trn X r ∈ rhs}"
proof-
have "rhs = (rhs - {Trn X r | r. Trn X r ∈ rhs}) ∪ ({Trn X r | r. Trn X r ∈ rhs})" by auto
thus ?thesis
unfolding A_def
unfolding lang_rhs_union_distrib
by simp
qed
moreover
have "lang_rhs (Append_rexp_rhs xrhs (⨄{r. Trn X r ∈ rhs})) = lang_rhs {Trn X r | r. Trn X r ∈ rhs}"
using finite substor by (simp only: lang_of_append_rexp_rhs trm_soundness)
ultimately show ?thesis by simp
qed

lemma Subst_preserves_finite_rhs:
"⟦finite rhs; finite yrhs⟧ ⟹ finite (Subst rhs Y yrhs)"
by (auto simp: Subst_def Append_preserves_finite)

lemma Subst_all_preserves_finite:
assumes finite: "finite ES"
shows "finite (Subst_all ES Y yrhs)"
using assms unfolding Subst_all_def by simp

declare [[simproc del: finite_Collect]]

lemma Subst_all_preserves_finite_rhs:
"⟦finite_rhs ES; finite yrhs⟧ ⟹ finite_rhs (Subst_all ES Y yrhs)"
by (auto intro:Subst_preserves_finite_rhs simp add:Subst_all_def finite_rhs_def)

lemma append_rhs_preserves_cls:
"rhss (Append_rexp_rhs rhs r) = rhss rhs"
apply (auto simp: rhss_def Append_rexp_rhs_def)
apply (case_tac xa, auto simp: image_def)
by (rule_tac x = "Times ra r" in exI, rule_tac x = "Trn x ra" in bexI, simp+)

lemma Arden_removes_cl:
"rhss (Arden Y yrhs) = rhss yrhs - {Y}"
apply (simp add:Arden_def append_rhs_preserves_cls)
by (auto simp: rhss_def)

lemma lhss_preserves_cls:
"lhss (Subst_all ES Y yrhs) = lhss ES"
by (auto simp: lhss_def Subst_all_def)

"X ∉ rhss xrhs ⟹
rhss (Subst rhs X xrhs) = rhss rhs ∪ rhss xrhs - {X}"
apply (simp only:Subst_def append_rhs_preserves_cls rhss_union_distrib)
by (auto simp: rhss_def)

lemma Subst_all_preserves_validity:
assumes sc: "validity (ES ∪ {(Y, yrhs)})"        (is "validity ?A")
shows "validity (Subst_all ES Y (Arden Y yrhs))"  (is "validity ?B")
proof -
{ fix X xrhs'
assume "(X, xrhs') ∈ ?B"
then obtain xrhs
where xrhs_xrhs': "xrhs' = Subst xrhs Y (Arden Y yrhs)"
and X_in: "(X, xrhs) ∈ ES" by (simp add:Subst_all_def, blast)
have "rhss xrhs' ⊆ lhss ?B"
proof-
have "lhss ?B = lhss ES" by (auto simp add:lhss_def Subst_all_def)
moreover have "rhss xrhs' ⊆ lhss ES"
proof-
have "rhss xrhs' ⊆  rhss xrhs ∪ rhss (Arden Y yrhs) - {Y}"
proof -
have "Y ∉ rhss (Arden Y yrhs)"
using Arden_removes_cl by auto
thus ?thesis using xrhs_xrhs' by (auto simp: Subst_updates_cls)
qed
moreover have "rhss xrhs ⊆ lhss ES ∪ {Y}" using X_in sc
apply (simp only:validity_def lhss_union_distrib)
by (drule_tac x = "(X, xrhs)" in bspec, auto simp:lhss_def)
moreover have "rhss (Arden Y yrhs) ⊆ lhss ES ∪ {Y}"
using sc
by (auto simp add: Arden_removes_cl validity_def lhss_def)
ultimately show ?thesis by auto
qed
ultimately show ?thesis by simp
qed
} thus ?thesis by (auto simp only:Subst_all_def validity_def)
qed

lemma Subst_all_satisfies_invariant:
assumes invariant_ES: "invariant (ES ∪ {(Y, yrhs)})"
shows "invariant (Subst_all ES Y (Arden Y yrhs))"
proof (rule invariantI)
have Y_eq_yrhs: "Y = lang_rhs yrhs"
using invariant_ES by (simp only:invariant_def soundness_def, blast)
have finite_yrhs: "finite yrhs"
using invariant_ES by (auto simp:invariant_def finite_rhs_def)
have ardenable_yrhs: "ardenable yrhs"
using invariant_ES by (auto simp:invariant_def ardenable_all_def)
show "soundness (Subst_all ES Y (Arden Y yrhs))"
proof -
have "Y = lang_rhs (Arden Y yrhs)"
using Y_eq_yrhs invariant_ES finite_yrhs
using finite_Trn[OF finite_yrhs]
apply(rule_tac Arden_preserves_soundness)
apply(simp_all)
unfolding invariant_def ardenable_all_def ardenable_def
apply(auto)
done
thus ?thesis using invariant_ES
unfolding invariant_def finite_rhs_def2 soundness_def Subst_all_def
by (auto simp add: Subst_preserves_soundness simp del: lang_rhs.simps)
qed
show "finite (Subst_all ES Y (Arden Y yrhs))"
using invariant_ES by (simp add:invariant_def Subst_all_preserves_finite)
show "distinctness (Subst_all ES Y (Arden Y yrhs))"
using invariant_ES
unfolding distinctness_def Subst_all_def invariant_def by auto
show "ardenable_all (Subst_all ES Y (Arden Y yrhs))"
proof -
{ fix X rhs
assume "(X, rhs) ∈ ES"
hence "ardenable rhs"  using invariant_ES
by (auto simp add:invariant_def ardenable_all_def)
with ardenable_yrhs
have "ardenable (Subst rhs Y (Arden Y yrhs))"
Subst_preserves_ardenable Arden_preserves_ardenable)
} thus ?thesis by (auto simp add:ardenable_all_def Subst_all_def)
qed
show "finite_rhs (Subst_all ES Y (Arden Y yrhs))"
proof-
have "finite_rhs ES" using invariant_ES
by (simp add:invariant_def finite_rhs_def)
moreover have "finite (Arden Y yrhs)"
proof -
have "finite yrhs" using invariant_ES
by (auto simp:invariant_def finite_rhs_def)
thus ?thesis using Arden_preserves_finite by auto
qed
ultimately show ?thesis
qed
show "validity (Subst_all ES Y (Arden Y yrhs))"
using invariant_ES Subst_all_preserves_validity by (auto simp add: invariant_def)
qed

lemma Remove_in_card_measure:
assumes finite: "finite ES"
and     in_ES: "(X, rhs) ∈ ES"
shows "(Remove ES X rhs, ES) ∈ measure card"
proof -
define f where "f x = ((fst x)::'a lang, Subst (snd x) X (Arden X rhs))" for x
define ES' where "ES' = ES - {(X, rhs)}"
have "Subst_all ES' X (Arden X rhs) = f  ES'"
apply (auto simp: Subst_all_def f_def image_def)
by (rule_tac x = "(Y, yrhs)" in bexI, simp+)
then have "card (Subst_all ES' X (Arden X rhs)) ≤ card ES'"
unfolding ES'_def using finite by (auto intro: card_image_le)
also have "… < card ES" unfolding ES'_def
using in_ES finite by (rule_tac card_Diff1_less)
finally show "(Remove ES X rhs, ES) ∈ measure card"
unfolding Remove_def ES'_def by simp
qed

lemma Subst_all_cls_remains:
"(X, xrhs) ∈ ES ⟹ ∃ xrhs'. (X, xrhs') ∈ (Subst_all ES Y yrhs)"
by (auto simp: Subst_all_def)

lemma card_noteq_1_has_more:
assumes card:"Cond ES"
and e_in: "(X, xrhs) ∈ ES"
and finite: "finite ES"
shows "∃(Y, yrhs) ∈ ES. (X, xrhs) ≠ (Y, yrhs)"
proof-
have "card ES > 1" using card e_in finite
by (cases "card ES") (auto)
then have "card (ES - {(X, xrhs)}) > 0"
using finite e_in by auto
then have "(ES - {(X, xrhs)}) ≠ {}" using finite by (rule_tac notI, simp)
then show "∃(Y, yrhs) ∈ ES. (X, xrhs) ≠ (Y, yrhs)"
by auto
qed

lemma iteration_step_measure:
assumes Inv_ES: "invariant ES"
and    X_in_ES: "(X, xrhs) ∈ ES"
and    Cnd:     "Cond ES "
shows "(Iter X ES, ES) ∈ measure card"
proof -
have fin: "finite ES" using Inv_ES unfolding invariant_def by simp
then obtain Y yrhs
where Y_in_ES: "(Y, yrhs) ∈ ES" and not_eq: "(X, xrhs) ≠ (Y, yrhs)"
using Cnd X_in_ES by (drule_tac card_noteq_1_has_more) (auto)
then have "(Y, yrhs) ∈ ES " "X ≠ Y"
using X_in_ES Inv_ES unfolding invariant_def distinctness_def
by auto
then show "(Iter X ES, ES) ∈ measure card"
apply(rule IterI2)
apply(rule Remove_in_card_measure)
done
qed

lemma iteration_step_invariant:
assumes Inv_ES: "invariant ES"
and    X_in_ES: "(X, xrhs) ∈ ES"
and    Cnd: "Cond ES"
shows "invariant (Iter X ES)"
proof -
have finite_ES: "finite ES" using Inv_ES by (simp add: invariant_def)
then obtain Y yrhs
where Y_in_ES: "(Y, yrhs) ∈ ES" and not_eq: "(X, xrhs) ≠ (Y, yrhs)"
using Cnd X_in_ES by (drule_tac card_noteq_1_has_more) (auto)
then have "(Y, yrhs) ∈ ES" "X ≠ Y"
using X_in_ES Inv_ES unfolding invariant_def distinctness_def
by auto
then show "invariant (Iter X ES)"
proof(rule IterI2)
fix Y yrhs
assume h: "(Y, yrhs) ∈ ES" "X ≠ Y"
then have "ES - {(Y, yrhs)} ∪ {(Y, yrhs)} = ES" by auto
then show "invariant (Remove ES Y yrhs)" unfolding Remove_def
using Inv_ES
by (rule_tac Subst_all_satisfies_invariant) (simp)
qed
qed

lemma iteration_step_ex:
assumes Inv_ES: "invariant ES"
and    X_in_ES: "(X, xrhs) ∈ ES"
and    Cnd: "Cond ES"
shows "∃xrhs'. (X, xrhs') ∈ (Iter X ES)"
proof -
have finite_ES: "finite ES" using Inv_ES by (simp add: invariant_def)
then obtain Y yrhs
where "(Y, yrhs) ∈ ES" "(X, xrhs) ≠ (Y, yrhs)"
using Cnd X_in_ES by (drule_tac card_noteq_1_has_more) (auto)
then have "(Y, yrhs) ∈ ES " "X ≠ Y"
using X_in_ES Inv_ES unfolding invariant_def distinctness_def
by auto
then show "∃xrhs'. (X, xrhs') ∈ (Iter X ES)"
apply(rule IterI2)
unfolding Remove_def
apply(rule Subst_all_cls_remains)
using X_in_ES
apply(auto)
done
qed

subsection ‹The conclusion of the first direction›

lemma Solve:
fixes A::"('a::finite) lang"
assumes fin: "finite (UNIV // ≈A)"
and     X_in: "X ∈ (UNIV // ≈A)"
shows "∃rhs. Solve X (Init (UNIV // ≈A)) = {(X, rhs)} ∧ invariant {(X, rhs)}"
proof -
define Inv where "Inv ES ⟷ invariant ES ∧ (∃rhs. (X, rhs) ∈ ES)" for ES
have "Inv (Init (UNIV // ≈A))" unfolding Inv_def
using fin X_in by (simp add: Init_ES_satisfies_invariant, simp add: Init_def)
moreover
{ fix ES
assume inv: "Inv ES" and crd: "Cond ES"
then have "Inv (Iter X ES)"
unfolding Inv_def
by (auto simp add: iteration_step_invariant iteration_step_ex) }
moreover
{ fix ES
assume inv: "Inv ES" and not_crd: "¬Cond ES"
from inv obtain rhs where "(X, rhs) ∈ ES" unfolding Inv_def by auto
moreover
from not_crd have "card ES = 1" by simp
ultimately
have "ES = {(X, rhs)}" by (auto simp add: card_Suc_eq)
then have "∃rhs'. ES = {(X, rhs')} ∧ invariant {(X, rhs')}" using inv
unfolding Inv_def by auto }
moreover
have "wf (measure card)" by simp
moreover
{ fix ES
assume inv: "Inv ES" and crd: "Cond ES"
then have "(Iter X ES, ES) ∈ measure card"
unfolding Inv_def
apply(clarify)
apply(rule_tac iteration_step_measure)
apply(auto)
done }
ultimately
show "∃rhs. Solve X (Init (UNIV // ≈A)) = {(X, rhs)} ∧ invariant {(X, rhs)}"
unfolding Solve_def by (rule while_rule)
qed

lemma every_eqcl_has_reg:
fixes A::"('a::finite) lang"
assumes finite_CS: "finite (UNIV // ≈A)"
and X_in_CS: "X ∈ (UNIV // ≈A)"
shows "∃r. X = lang r"
proof -
from finite_CS X_in_CS
obtain xrhs where Inv_ES: "invariant {(X, xrhs)}"
using Solve by metis

define A where "A = Arden X xrhs"
have "rhss xrhs ⊆ {X}" using Inv_ES
unfolding validity_def invariant_def rhss_def lhss_def
by auto
then have "rhss A = {}" unfolding A_def
by (simp add: Arden_removes_cl)
then have eq: "{Lam r | r. Lam r ∈ A} = A" unfolding rhss_def
by (auto, case_tac x, auto)

have "finite A" using Inv_ES unfolding A_def invariant_def finite_rhs_def
using Arden_preserves_finite by auto
then have fin: "finite {r. Lam r ∈ A}" by (rule finite_Lam)

have "X = lang_rhs xrhs" using Inv_ES unfolding invariant_def soundness_def
by simp
then have "X = lang_rhs A" using Inv_ES
unfolding A_def invariant_def ardenable_all_def finite_rhs_def
by (rule_tac Arden_preserves_soundness) (simp_all add: finite_Trn)
then have "X = lang_rhs {Lam r | r. Lam r ∈ A}" using eq by simp
then have "X = lang (⨄{r. Lam r ∈ A})" using fin by auto
then show "∃r. X = lang r" by blast
qed

lemma bchoice_finite_set:
assumes a: "∀x ∈ S. ∃y. x = f y"
and     b: "finite S"
shows "∃ys. (⋃ S) = ⋃(f  ys) ∧ finite ys"
using bchoice[OF a] b
apply(erule_tac exE)
apply(rule_tac x="fa  S" in exI)
apply(auto)
done

theorem Myhill_Nerode1:
fixes A::"('a::finite) lang"
assumes finite_CS: "finite (UNIV // ≈A)"
shows   "∃r. A = lang r"
proof -
have fin: "finite (finals A)"
using finals_in_partitions finite_CS by (rule finite_subset)
have "∀X ∈ (UNIV // ≈A). ∃r. X = lang r"
using finite_CS every_eqcl_has_reg by blast
then have a: "∀X ∈ finals A. ∃r. X = lang r"
using finals_in_partitions by auto
then obtain rs::"('a rexp) set" where "⋃ (finals A) = ⋃(lang  rs)" "finite rs"
using fin by (auto dest: bchoice_finite_set)
then have "A = lang (⨄rs)"
unfolding lang_is_union_of_finals[symmetric] by simp
then show "∃r. A = lang r" by blast
qed

end


# Theory Myhill_2

(* Author: Xingyuan Zhang, Chunhan Wu, Christian Urban *)
theory Myhill_2
imports Myhill_1 "HOL-Library.Sublist"
begin

section ‹Second direction of MN: ‹regular language ⇒ finite partition››

subsection ‹Tagging functions›

definition
tag_eq :: "('a list ⇒ 'b) ⇒ ('a list × 'a list) set" ("=_=")
where
"=tag= ≡ {(x, y). tag x = tag y}"

abbreviation
tag_eq_applied :: "'a list ⇒ ('a list ⇒ 'b) ⇒ 'a list ⇒ bool" ("_ =_= _")
where
"x =tag= y ≡ (x, y) ∈ =tag="

lemma [simp]:
shows "(≈A)  {x} = (≈A)  {y} ⟷ x ≈A y"
unfolding str_eq_def by auto

lemma refined_intro:
assumes "⋀x y z. ⟦x =tag= y; x @ z ∈ A⟧ ⟹ y @ z ∈ A"
shows "=tag= ⊆ ≈A"
using assms unfolding str_eq_def tag_eq_def
apply(clarify, simp (no_asm_use))
by metis

lemma finite_eq_tag_rel:
assumes rng_fnt: "finite (range tag)"
shows "finite (UNIV // =tag=)"
proof -
let "?f" =  "λX. tag  X" and ?A = "(UNIV // =tag=)"
have "finite (?f  ?A)"
proof -
have "range ?f ⊆ (Pow (range tag))" unfolding Pow_def by auto
moreover
have "finite (Pow (range tag))" using rng_fnt by simp
ultimately
have "finite (range ?f)" unfolding image_def by (blast intro: finite_subset)
moreover
have "?f  ?A ⊆ range ?f" by auto
ultimately show "finite (?f  ?A)" by (rule rev_finite_subset)
qed
moreover
have "inj_on ?f ?A"
proof -
{ fix X Y
assume X_in: "X ∈ ?A"
and  Y_in: "Y ∈ ?A"
and  tag_eq: "?f X = ?f Y"
then obtain x y
where "x ∈ X" "y ∈ Y" "tag x = tag y"
unfolding quotient_def Image_def image_def tag_eq_def
by (simp) (blast)
with X_in Y_in
have "X = Y"
unfolding quotient_def tag_eq_def by auto
}
then show "inj_on ?f ?A" unfolding inj_on_def by auto
qed
ultimately show "finite (UNIV // =tag=)" by (rule finite_imageD)
qed

lemma refined_partition_finite:
assumes fnt: "finite (UNIV // R1)"
and refined: "R1 ⊆ R2"
and eq1: "equiv UNIV R1" and eq2: "equiv UNIV R2"
shows "finite (UNIV // R2)"
proof -
let ?f = "λX. {R1  {x} | x. x ∈ X}"
and ?A = "UNIV // R2" and ?B = "UNIV // R1"
have "?f  ?A ⊆ Pow ?B"
unfolding image_def Pow_def quotient_def by auto
moreover
have "finite (Pow ?B)" using fnt by simp
ultimately
have "finite (?f  ?A)" by (rule finite_subset)
moreover
have "inj_on ?f ?A"
proof -
{ fix X Y
assume X_in: "X ∈ ?A" and Y_in: "Y ∈ ?A" and eq_f: "?f X = ?f Y"
from quotientE [OF X_in]
obtain x where "X = R2  {x}" by blast
with equiv_class_self[OF eq2] have x_in: "x ∈ X" by simp
then have "R1 {x} ∈ ?f X" by auto
with eq_f have "R1  {x} ∈ ?f Y" by simp
then obtain y
where y_in: "y ∈ Y" and eq_r1_xy: "R1  {x} = R1  {y}" by auto
with eq_equiv_class[OF _ eq1]
have "(x, y) ∈ R1" by blast
with refined have "(x, y) ∈ R2" by auto
with quotient_eqI [OF eq2 X_in Y_in x_in y_in]
have "X = Y" .
}
then show "inj_on ?f ?A" unfolding inj_on_def by blast
qed
ultimately show "finite (UNIV // R2)" by (rule finite_imageD)
qed

lemma tag_finite_imageD:
assumes rng_fnt: "finite (range tag)"
and     refined: "=tag=  ⊆ ≈A"
shows "finite (UNIV // ≈A)"
proof (rule_tac refined_partition_finite [of "=tag="])
show "finite (UNIV // =tag=)" by (rule finite_eq_tag_rel[OF rng_fnt])
next
show "=tag= ⊆ ≈A" using refined .
next
show "equiv UNIV =tag="
and  "equiv UNIV (≈A)"
unfolding equiv_def str_eq_def tag_eq_def refl_on_def sym_def trans_def
by auto
qed

subsection ‹Base cases: @{const Zero}, @{const One} and @{const Atom}›

lemma quot_zero_eq:
shows "UNIV // ≈{} = {UNIV}"
unfolding quotient_def Image_def str_eq_def by auto

lemma quot_zero_finiteI [intro]:
shows "finite (UNIV // ≈{})"
unfolding quot_zero_eq by simp

lemma quot_one_subset:
shows "UNIV // ≈{[]} ⊆ {{[]}, UNIV - {[]}}"
proof
fix x
assume "x ∈ UNIV // ≈{[]}"
then obtain y where h: "x = {z. y ≈{[]} z}"
unfolding quotient_def Image_def by blast
{ assume "y = []"
with h have "x = {[]}" by (auto simp: str_eq_def)
then have "x ∈ {{[]}, UNIV - {[]}}" by simp }
moreover
{ assume "y ≠ []"
with h have "x = UNIV - {[]}" by (auto simp: str_eq_def)
then have "x ∈ {{[]}, UNIV - {[]}}" by simp }
ultimately show "x ∈ {{[]}, UNIV - {[]}}" by blast
qed

lemma quot_one_finiteI [intro]:
shows "finite (UNIV // ≈{[]})"
by (rule finite_subset[OF quot_one_subset]) (simp)

lemma quot_atom_subset:
"UNIV // (≈{[c]}) ⊆ {{[]},{[c]}, UNIV - {[], [c]}}"
proof
fix x
assume "x ∈ UNIV // ≈{[c]}"
then obtain y where h: "x = {z. (y, z) ∈ ≈{[c]}}"
unfolding quotient_def Image_def by blast
show "x ∈ {{[]},{[c]}, UNIV - {[], [c]}}"
proof -
{ assume "y = []" hence "x = {[]}" using h
by (auto simp: str_eq_def) }
moreover
{ assume "y = [c]" hence "x = {[c]}" using h
by (auto dest!: spec[where x = "[]"] simp: str_eq_def) }
moreover
{ assume "y ≠ []" and "y ≠ [c]"
hence "∀ z. (y @ z) ≠ [c]" by (case_tac y, auto)
moreover have "⋀ p. (p ≠ [] ∧ p ≠ [c]) = (∀ q. p @ q ≠ [c])"
by (case_tac p, auto)
ultimately have "x = UNIV - {[],[c]}" using h
by (auto simp add: str_eq_def)
}
ultimately show ?thesis by blast
qed
qed

lemma quot_atom_finiteI [intro]:
shows "finite (UNIV // ≈{[c]})"
by (rule finite_subset[OF quot_atom_subset]) (simp)

subsection ‹Case for @{const Plus}›

definition
tag_Plus :: "'a lang ⇒ 'a lang ⇒ 'a list ⇒ ('a lang × 'a lang)"
where
"tag_Plus A B ≡ λx. (≈A  {x}, ≈B  {x})"

lemma quot_plus_finiteI [intro]:
assumes finite1: "finite (UNIV // ≈A)"
and     finite2: "finite (UNIV // ≈B)"
shows "finite (UNIV // ≈(A ∪ B))"
proof (rule_tac tag = "tag_Plus A B" in tag_finite_imageD)
have "finite ((UNIV // ≈A) × (UNIV // ≈B))"
using finite1 finite2 by auto
then show "finite (range (tag_Plus A B))"
unfolding tag_Plus_def quotient_def
by (rule rev_finite_subset) (auto)
next
show "=tag_Plus A B= ⊆ ≈(A ∪ B)"
unfolding tag_eq_def tag_Plus_def str_eq_def by auto
qed

subsection ‹Case for ‹Times››

definition
"Partitions x ≡ {(x⇩p, x⇩s). x⇩p @ x⇩s = x}"

lemma conc_partitions_elim:
assumes "x ∈ A ⋅ B"
shows "∃(u, v) ∈ Partitions x. u ∈ A ∧ v ∈ B"
using assms unfolding conc_def Partitions_def
by auto

lemma conc_partitions_intro:
assumes "(u, v) ∈ Partitions x ∧ u ∈ A ∧  v ∈ B"
shows "x ∈ A ⋅ B"
using assms unfolding conc_def Partitions_def
by auto

lemma equiv_class_member:
assumes "x ∈ A"
and "≈A  {x} = ≈A  {y}"
shows "y ∈ A"
using assms
apply(simp)
apply(metis append_Nil2)
done

definition
tag_Times :: "'a lang ⇒ 'a lang ⇒ 'a list ⇒ 'a lang × 'a lang set"
where
"tag_Times A B ≡ λx. (≈A  {x}, {(≈B  {x⇩s}) | x⇩p x⇩s. x⇩p ∈ A ∧ (x⇩p, x⇩s) ∈ Partitions x})"

lemma tag_Times_injI:
assumes a: "tag_Times A B x = tag_Times A B y"
and     c: "x @ z ∈ A ⋅ B"
shows "y @ z ∈ A ⋅ B"
proof -
from c obtain u v where
h1: "(u, v) ∈ Partitions (x @ z)" and
h2: "u ∈ A" and
h3: "v ∈ B" by (auto dest: conc_partitions_elim)
from h1 have "x @ z = u @ v" unfolding Partitions_def by simp
then obtain us
where "(x = u @ us ∧ us @ z = v) ∨ (x @ us = u ∧ z = us @ v)"
by (auto simp add: append_eq_append_conv2)
moreover
{ assume eq: "x = u @ us" "us @ z = v"
have "(≈B  {us}) ∈ snd (tag_Times A B x)"
unfolding Partitions_def tag_Times_def using h2 eq
by (auto simp add: str_eq_def)
then have "(≈B  {us}) ∈ snd (tag_Times A B y)"
using a by simp
then obtain u' us' where
q1: "u' ∈ A" and
q2: "≈B  {us} = ≈B  {us'}" and
q3: "(u', us') ∈ Partitions y"
unfolding tag_Times_def by auto
from q2 h3 eq
have "us' @ z ∈ B"
unfolding Image_def str_eq_def by auto
then have "y @ z ∈ A ⋅ B" using q1 q3
unfolding Partitions_def by auto
}
moreover
{ assume eq: "x @ us = u" "z = us @ v"
have "(≈A  {x}) = fst (tag_Times A B x)"
by (simp add: tag_Times_def)
then have "(≈A  {x}) = fst (tag_Times A B y)"
using a by simp
then have "≈A  {x} = ≈A  {y}"
by (simp add: tag_Times_def)
moreover
have "x @ us ∈ A" using h2 eq by simp
ultimately
have "y @ us ∈ A" using equiv_class_member
unfolding Image_def str_eq_def by blast
then have "(y @ us) @ v ∈ A ⋅ B"
using h3 unfolding conc_def by blast
then have "y @ z ∈ A ⋅ B" using eq by simp
}
ultimately show "y @ z ∈ A ⋅ B" by blast
qed

lemma quot_conc_finiteI [intro]:
assumes fin1: "finite (UNIV // ≈A)"
and     fin2: "finite (UNIV // ≈B)"
shows "finite (UNIV // ≈(A ⋅ B))"
proof (rule_tac tag = "tag_Times A B" in tag_finite_imageD)
have "⋀x y z. ⟦tag_Times A B x = tag_Times A B y; x @ z ∈ A ⋅ B⟧ ⟹ y @ z ∈ A ⋅ B"
by (rule tag_Times_injI)
(auto simp add: tag_Times_def tag_eq_def)
then show "=tag_Times A B= ⊆ ≈(A ⋅ B)"
by (rule refined_intro)
(auto simp add: tag_eq_def)
next
have *: "finite ((UNIV // ≈A) × (Pow (UNIV // ≈B)))"
using fin1 fin2 by auto
show "finite (range (tag_Times A B))"
unfolding tag_Times_def
apply(rule finite_subset[OF _ *])
unfolding quotient_def
by auto
qed

subsection ‹Case for @{const "Star"}›

lemma star_partitions_elim:
assumes "x @ z ∈ A⋆" "x ≠ []"
shows "∃(u, v) ∈ Partitions (x @ z). strict_prefix u x ∧ u ∈ A⋆ ∧ v ∈ A⋆"
proof -
have "([], x @ z) ∈ Partitions (x @ z)" "strict_prefix [] x" "[] ∈ A⋆" "x @ z ∈ A⋆"
using assms by (auto simp add: Partitions_def strict_prefix_def)
then show "∃(u, v) ∈ Partitions (x @ z). strict_prefix u x ∧ u ∈ A⋆ ∧ v ∈ A⋆"
by blast
qed

lemma finite_set_has_max2:
"⟦finite A; A ≠ {}⟧ ⟹ ∃ max ∈ A. ∀ a ∈ A. length a ≤ length max"
apply(induct rule:finite.induct)
apply(simp)
by (metis (no_types) all_not_in_conv insert_iff linorder_le_cases order_trans)

lemma finite_strict_prefix_set:
shows "finite {xa. strict_prefix xa (x::'a list)}"
apply (induct x rule:rev_induct, simp)
apply (subgoal_tac "{xa. strict_prefix xa (xs @ [x])} = {xa. strict_prefix xa xs} ∪ {xs}")
by (auto simp:strict_prefix_def)

lemma append_eq_cases:
assumes a: "x @ y = m @ n" "m ≠ []"
shows "prefix x m ∨ strict_prefix m x"
unfolding prefix_def strict_prefix_def using a
by (auto simp add: append_eq_append_conv2)

lemma star_spartitions_elim2:
assumes a: "x @ z ∈ A⋆"
and     b: "x ≠ []"
shows "∃(u, v) ∈ Partitions x. ∃ (u', v') ∈ Partitions z. strict_prefix u x ∧ u ∈ A⋆ ∧ v @ u' ∈ A ∧ v' ∈ A⋆"
proof -
define S where "S = {u | u v. (u, v) ∈ Partitions x ∧ strict_prefix u x ∧ u ∈ A⋆ ∧ v @ z ∈ A⋆}"
have "finite {u. strict_prefix u x}" by (rule finite_strict_prefix_set)
then have "finite S" unfolding S_def
by (rule rev_finite_subset) (auto)
moreover
have "S ≠ {}" using a b unfolding S_def Partitions_def
by (auto simp: strict_prefix_def)
ultimately have "∃ u_max ∈ S. ∀ u ∈ S. length u ≤ length u_max"
using finite_set_has_max2 by blast
then obtain u_max v
where h0: "(u_max, v) ∈ Partitions x"
and h1: "strict_prefix u_max x"
and h2: "u_max ∈ A⋆"
and h3: "v @ z ∈ A⋆"
and h4: "∀ u v. (u, v) ∈ Partitions x ∧ strict_prefix u x ∧ u ∈ A⋆ ∧ v @ z ∈ A⋆ ⟶ length u ≤ length u_max"
unfolding S_def Partitions_def by blast
have q: "v ≠ []" using h0 h1 b unfolding Partitions_def by auto
from h3 obtain a b
where i1: "(a, b) ∈ Partitions (v @ z)"
and   i2: "a ∈ A"
and   i3: "b ∈ A⋆"
and   i4: "a ≠ []"
unfolding Partitions_def
using q by (auto dest: star_decom)
have "prefix v a"
proof (rule ccontr)
assume a: "¬(prefix v a)"
from i1 have i1': "a @ b = v @ z" unfolding Partitions_def by simp
then have "prefix a v ∨ strict_prefix v a" using append_eq_cases q by blast
then have q: "strict_prefix a v" using a unfolding strict_prefix_def prefix_def by auto
then obtain as where eq: "a @ as = v" unfolding strict_prefix_def prefix_def by auto
have "(u_max @ a, as) ∈ Partitions x" using eq h0 unfolding Partitions_def by auto
moreover
have "strict_prefix (u_max @ a) x" using h0 eq q unfolding Partitions_def prefix_def strict_prefix_def by auto
moreover
have "u_max @ a ∈ A⋆" using i2 h2 by simp
moreover
have "as @ z ∈ A⋆" using i1' i2 i3 eq by auto
ultimately have "length (u_max @ a) ≤ length u_max" using h4 by blast
with i4 show "False" by auto
qed
with i1 obtain za zb
where k1: "v @ za = a"
and   k2: "(za, zb) ∈ Partitions z"
and   k4: "zb = b"
unfolding Partitions_def prefix_def
by (auto simp add: append_eq_append_conv2)
show "∃ (u, v) ∈ Partitions x. ∃ (u', v') ∈ Partitions z. strict_prefix u x ∧ u ∈ A⋆ ∧ v @ u' ∈ A ∧ v' ∈ A⋆"
using h0 h1 h2 i2 i3 k1 k2 k4 unfolding Partitions_def by blast
qed

definition
tag_Star :: "'a lang ⇒ 'a list ⇒ ('a lang) set"
where
"tag_Star A ≡ λx. {≈A  {v} | u v. strict_prefix u x ∧ u ∈ A⋆ ∧ (u, v) ∈ Partitions x}"

lemma tag_Star_non_empty_injI:
assumes a: "tag_Star A x = tag_Star A y"
and     c: "x @ z ∈ A⋆"
and     d: "x ≠ []"
shows "y @ z ∈ A⋆"
proof -
obtain u v u' v'
where a1: "(u,  v) ∈ Partitions x" "(u', v')∈ Partitions z"
and   a2: "strict_prefix u x"
and   a3: "u ∈ A⋆"
and   a4: "v @ u' ∈ A"
and   a5: "v' ∈ A⋆"
using c d by (auto dest: star_spartitions_elim2)
have "(≈A)  {v} ∈ tag_Star A x"
apply(simp add: tag_Star_def Partitions_def str_eq_def)
using a1 a2 a3 by (auto simp add: Partitions_def)
then have "(≈A)  {v} ∈ tag_Star A y" using a by simp
then obtain u1 v1
where b1: "v ≈A v1"
and   b3: "u1 ∈ A⋆"
and   b4: "(u1, v1) ∈ Partitions y"
unfolding tag_Star_def by auto
have c: "v1 @ u' ∈ A⋆" using b1 a4 unfolding str_eq_def by simp
have "u1 @ (v1 @ u') @ v' ∈ A⋆"
using b3 c a5 by (simp only: append_in_starI)
then show "y @ z ∈ A⋆" using b4 a1
unfolding Partitions_def by auto
qed

lemma tag_Star_empty_injI:
assumes a: "tag_Star A x = tag_Star A y"
and     c: "x @ z ∈ A⋆"
and     d: "x = []"
shows "y @ z ∈ A⋆"
proof -
from a have "{} = tag_Star A y" unfolding tag_Star_def using d by auto
then have "y = []"
unfolding tag_Star_def Partitions_def strict_prefix_def prefix_def
by (auto) (metis Nil_in_star append_self_conv2)
then show "y @ z ∈ A⋆" using c d by simp
qed

lemma quot_star_finiteI [intro]:
assumes finite1: "finite (UNIV // ≈A)"
shows "finite (UNIV // ≈(A⋆))"
proof (rule_tac tag = "tag_Star A" in tag_finite_imageD)
have "⋀x y z. ⟦tag_Star A x = tag_Star A y; x @ z ∈ A⋆⟧ ⟹ y @ z ∈ A⋆"
by (case_tac "x = []") (blast intro: tag_Star_empty_injI tag_Star_non_empty_injI)+
then show "=(tag_Star A)= ⊆ ≈(A⋆)"
by (rule refined_intro) (auto simp add: tag_eq_def)
next
have *: "finite (Pow (UNIV // ≈A))"
using finite1 by auto
show "finite (range (tag_Star A))"
unfolding tag_Star_def
by (rule finite_subset[OF _ *])
(auto simp add: quotient_def)
qed

subsection ‹The conclusion of the second direction›

lemma Myhill_Nerode2:
fixes r::"'a rexp"
shows "finite (UNIV // ≈(lang r))"
by (induct r) (auto)

end


# Theory Myhill

(* Author: Xingyuan Zhang, Chunhan Wu, Christian Urban *)

theory Myhill
imports Myhill_2 "Regular-Sets.Derivatives"
begin

section ‹The theorem›

theorem Myhill_Nerode:
fixes A::"('a::finite) lang"
shows "(∃r. A = lang r) ⟷ finite (UNIV // ≈A)"
using Myhill_Nerode1 Myhill_Nerode2 by auto

subsection ‹Second direction proved using partial derivatives›

text ‹
An alternaive proof using the notion of partial derivatives for regular
expressions due to Antimirov \cite{Antimirov95}.
›

lemma MN_Rel_Derivs:
shows "x ≈A y ⟷ Derivs x A = Derivs y A"
unfolding Derivs_def str_eq_def
by auto

lemma Myhill_Nerode3:
fixes r::"'a rexp"
shows "finite (UNIV // ≈(lang r))"
proof -
have "finite (UNIV // =(λx. pderivs x r)=)"
proof -
have "range (λx. pderivs x r) ⊆ Pow (pderivs_lang UNIV r)"
unfolding pderivs_lang_def by auto
moreover
have "finite (Pow (pderivs_lang UNIV r))" by (simp add: finite_pderivs_lang)
ultimately
have "finite (range (λx. pderivs x r))"
by (simp add: finite_subset)
then show "finite (UNIV // =(λx. pderivs x r)=)"
by (rule finite_eq_tag_rel)
qed
moreover
have "=(λx. pderivs x r)= ⊆ ≈(lang r)"
unfolding tag_eq_def
by (auto simp add: MN_Rel_Derivs Derivs_pderivs)
moreover
have "equiv UNIV =(λx. pderivs x r)="
and  "equiv UNIV (≈(lang r))"
unfolding equiv_def refl_on_def sym_def trans_def
unfolding tag_eq_def str_eq_def
by auto
ultimately show "finite (UNIV // ≈(lang r))"
by (rule refined_partition_finite)
qed

end


# Theory Closures

(* Author: Christian Urban, Xingyuan Zhang, Chunhan Wu *)
theory Closures
imports Myhill "HOL-Library.Infinite_Set"
begin

section ‹Closure properties of regular languages›

abbreviation
regular :: "'a lang ⇒ bool"
where
"regular A ≡ ∃r. A = lang r"

subsection ‹Closure under ‹∪›, ‹⋅› and ‹⋆››

lemma closure_union [intro]:
assumes "regular A" "regular B"
shows "regular (A ∪ B)"
proof -
from assms obtain r1 r2::"'a rexp" where "lang r1 = A" "lang r2 = B" by auto
then have "A ∪ B = lang (Plus r1 r2)" by simp
then show "regular (A ∪ B)" by blast
qed

lemma closure_seq [intro]:
assumes "regular A" "regular B"
shows "regular (A ⋅ B)"
proof -
from assms obtain r1 r2::"'a rexp" where "lang r1 = A" "lang r2 = B" by auto
then have "A ⋅ B = lang (Times r1 r2)" by simp
then show "regular (A ⋅ B)" by blast
qed

lemma closure_star [intro]:
assumes "regular A"
shows "regular (A⋆)"
proof -
from assms obtain r::"'a rexp" where "lang r = A" by auto
then have "A⋆ = lang (Star r)" by simp
then show "regular (A⋆)" by blast
qed

subsection ‹Closure under complementation›

text ‹Closure under complementation is proved via the
Myhill-Nerode theorem›

lemma closure_complement [intro]:
fixes A::"('a::finite) lang"
assumes "regular A"
shows "regular (- A)"
proof -
from assms have "finite (UNIV // ≈A)" by (simp add: Myhill_Nerode)
then have "finite (UNIV // ≈(-A))" by (simp add: str_eq_def)
then show "regular (- A)" by (simp add: Myhill_Nerode)
qed

subsection ‹Closure under ‹-› and ‹∩››

lemma closure_difference [intro]:
fixes A::"('a::finite) lang"
assumes "regular A" "regular B"
shows "regular (A - B)"
proof -
have "A - B = - (- A ∪ B)" by blast
moreover
have "regular (- (- A ∪ B))"
using assms by blast
ultimately show "regular (A - B)" by simp
qed

lemma closure_intersection [intro]:
fixes A::"('a::finite) lang"
assumes "regular A" "regular B"
shows "regular (A ∩ B)"
proof -
have "A ∩ B = - (- A ∪ - B)" by blast
moreover
have "regular (- (- A ∪ - B))"
using assms by blast
ultimately show "regular (A ∩ B)" by simp
qed

subsection ‹Closure under string reversal›

fun
Rev :: "'a rexp ⇒ 'a rexp"
where
"Rev Zero = Zero"
| "Rev One = One"
| "Rev (Atom c) = Atom c"
| "Rev (Plus r1 r2) = Plus (Rev r1) (Rev r2)"
| "Rev (Times r1 r2) = Times (Rev r2) (Rev r1)"
| "Rev (Star r) = Star (Rev r)"

lemma rev_seq[simp]:
shows "rev  (B ⋅ A) = (rev  A) ⋅ (rev  B)"
unfolding conc_def image_def
by (auto) (metis rev_append)+

lemma rev_star1:
assumes a: "s ∈ (rev  A)⋆"
shows "s ∈ rev  (A⋆)"
using a
proof(induct rule: star_induct)
case (append s1 s2)
have inj: "inj (rev::'a list ⇒ 'a list)" unfolding inj_on_def by auto
have "s1 ∈ rev  A" "s2 ∈ rev  (A⋆)" by fact+
then obtain x1 x2 where "x1 ∈ A" "x2 ∈ A⋆" and eqs: "s1 = rev x1" "s2 = rev x2" by auto
then have "x1 ∈ A⋆" "x2 ∈ A⋆" by (auto)
then have "x2 @ x1 ∈ A⋆" by (auto)
then have "rev (x2 @ x1) ∈ rev  A⋆" using inj by (simp only: inj_image_mem_iff)
then show "s1 @ s2 ∈  rev  A⋆" using eqs by simp
qed (auto)

lemma rev_star2:
assumes a: "s ∈ A⋆"
shows "rev s ∈ (rev  A)⋆"
using a
proof(induct rule: star_induct)
case (append s1 s2)
have inj: "inj (rev::'a list ⇒ 'a list)" unfolding inj_on_def by auto
have "s1 ∈ A"by fact
then have "rev s1 ∈ rev  A" using inj by (simp only: inj_image_mem_iff)
then have "rev s1 ∈ (rev  A)⋆" by (auto)
moreover
have "rev s2 ∈ (rev  A)⋆" by fact
ultimately show "rev (s1 @ s2) ∈  (rev  A)⋆" by (auto)
qed (auto)

lemma rev_star [simp]:
shows " rev  (A⋆) = (rev  A)⋆"
using rev_star1 rev_star2 by auto

lemma rev_lang:
shows "rev  (lang r) = lang (Rev r)"
by (induct r) (simp_all add: image_Un)

lemma closure_reversal [intro]:
assumes "regular A"
shows "regular (rev  A)"
proof -
from assms obtain r::"'a rexp" where "A = lang r" by auto
then have "lang (Rev r) = rev  A" by (simp add: rev_lang)
then show "regular (rev A)" by blast
qed

subsection ‹Closure under left-quotients›

abbreviation
"Deriv_lang A B ≡ ⋃x ∈ A. Derivs x B"

lemma closure_left_quotient:
assumes "regular A"
shows "regular (Deriv_lang B A)"
proof -
from assms obtain r::"'a rexp" where eq: "lang r = A" by auto
have fin: "finite (pderivs_lang B r)" by (rule finite_pderivs_lang)

have "Deriv_lang B (lang r) = (⋃ (lang  pderivs_lang B r))"
by (simp add: Derivs_pderivs pderivs_lang_def)
also have "… = lang (⨄(pderivs_lang B r))" using fin by simp
finally have "Deriv_lang B A = lang (⨄(pderivs_lang B r))" using eq
by simp
then show "regular (Deriv_lang B A)" by auto
qed

subsection ‹Finite and co-finite sets are regular›

lemma singleton_regular:
shows "regular {s}"
proof (induct s)
case Nil
have "{[]} = lang (One)" by simp
then show "regular {[]}" by blast
next
case (Cons c s)
have "regular {s}" by fact
then obtain r where "{s} = lang r" by blast
then have "{c # s} = lang (Times (Atom c) r)"
by (auto simp add: conc_def)
then show "regular {c # s}" by blast
qed

lemma finite_regular:
assumes "finite A"
shows "regular A"
using assms
proof (induct)
case empty
have "{} = lang (Zero)" by simp
then show "regular {}" by blast
next
case (insert s A)
have "regular {s}" by (simp add: singleton_regular)
moreover
have "regular A" by fact
ultimately have "regular ({s} ∪ A)" by (rule closure_union)
then show "regular (insert s A)" by simp
qed

lemma cofinite_regular:
fixes A::"'a::finite lang"
assumes "finite (- A)"
shows "regular A"
proof -
from assms have "regular (- A)" by (simp add: finite_regular)
then have "regular (-(- A))" by (rule closure_complement)
then show "regular A" by simp
qed

subsection ‹Continuation lemma for showing non-regularity of languages›

lemma continuation_lemma:
fixes A B::"'a::finite lang"
assumes reg: "regular A"
and     inf: "infinite B"
shows "∃x ∈ B. ∃y ∈ B. x ≠ y ∧ x ≈A y"
proof -
define eqfun where "eqfun = (λA x::('a::finite list). (≈A)  {x})"
have "finite (UNIV // ≈A)" using reg by (simp add: Myhill_Nerode)
moreover
have "(eqfun A)  B ⊆ UNIV // (≈A)"
unfolding eqfun_def quotient_def by auto
ultimately have "finite ((eqfun A)  B)" by (rule rev_finite_subset)
with inf have "∃a ∈ B. infinite {b ∈ B. eqfun A b = eqfun A a}"
by (rule pigeonhole_infinite)
then obtain a where in_a: "a ∈ B" and "infinite {b ∈ B. eqfun A b = eqfun A a}"
by blast
moreover
have "{b ∈ B. eqfun A b = eqfun A a} = {b ∈ B. b ≈A a}"
unfolding eqfun_def Image_def str_eq_def by auto
ultimately have "infinite {b ∈ B. b ≈A a}" by simp
then have "infinite ({b ∈ B. b ≈A a} - {a})" by simp
moreover
have "{b ∈ B. b ≈A a} - {a} = {b ∈ B. b ≈A a ∧ b ≠ a}" by auto
ultimately have "infinite {b ∈ B. b ≈A a ∧ b ≠ a}" by simp
then have "{b ∈ B. b ≈A a ∧ b ≠ a} ≠ {}"
by (metis finite.emptyI)
then obtain b where "b ∈ B" "b ≠ a" "b ≈A a" by blast
with in_a show "∃x ∈ B. ∃y ∈ B. x ≠ y ∧ x ≈A y"
by blast
qed

subsection ‹The language ‹a⇧n b⇧n› is not regular›

abbreviation
replicate_rev ("_ ^^^ _" [100, 100] 100)
where
"a ^^^ n ≡ replicate n a"

lemma an_bn_not_regular:
shows "¬ regular (⋃n. {CHR ''a'' ^^^ n @ CHR ''b'' ^^^ n})"
proof
define A where "A = (⋃n. {CHR ''a'' ^^^ n @ CHR ''b'' ^^^ n})"
assume as: "regular A"
define B where "B = (⋃n. {CHR ''a'' ^^^ n})"

have sameness: "⋀i j. CHR ''a'' ^^^ i @ CHR ''b'' ^^^ j ∈ A ⟷ i = j"
unfolding A_def
apply auto
apply(drule_tac f="λs. length (filter ((=) (CHR ''a'')) s) = length (filter ((=) (CHR ''b'')) s)"
in arg_cong)
apply(simp)
done

have b: "infinite B"
unfolding infinite_iff_countable_subset
unfolding inj_on_def B_def
by (rule_tac x="λn. CHR ''a'' ^^^ n" in exI) (auto)
moreover
have "∀x ∈ B. ∀y ∈ B. x ≠ y ⟶ ¬ (x ≈A y)"
apply(auto)
unfolding B_def
apply(auto)
apply(drule_tac x="CHR ''b'' ^^^ xa" in spec)
done
ultimately
show "False" using continuation_lemma[OF as] by blast
qed

end


# Theory Closures2

theory Closures2
imports
Closures
Well_Quasi_Orders.Well_Quasi_Orders
begin

section ‹Closure under ‹SUBSEQ› and ‹SUPSEQ››

text ‹Properties about the embedding relation›

lemma subseq_strict_length:
assumes a: "subseq x y" "x ≠ y"
shows "length x < length y"
using a
by (induct) (auto simp add: less_Suc_eq)

lemma subseq_wf:
shows "wf {(x, y). subseq x y ∧ x ≠ y}"
proof -
have "wf (measure length)" by simp
moreover
have "{(x, y). subseq x y ∧ x ≠ y} ⊆ measure length"
unfolding measure_def by (auto simp add: subseq_strict_length)
ultimately
show "wf {(x, y). subseq x y ∧ x ≠ y}" by (rule wf_subset)
qed

lemma subseq_good:
shows "good subseq (f :: nat ⇒ ('a::finite) list)"
using wqo_on_imp_good[where f="f", OF wqo_on_lists_over_finite_sets]
by simp

lemma subseq_Higman_antichains:
assumes a: "∀(x::('a::finite) list) ∈ A. ∀y ∈ A. x ≠ y ⟶ ¬(subseq x y) ∧ ¬(subseq y x)"
shows "finite A"
proof (rule ccontr)
assume "infinite A"
then obtain f::"nat ⇒ 'a::finite list" where b: "inj f" and c: "range f ⊆ A"
by (auto simp add: infinite_iff_countable_subset)
from subseq_good[where f="f"]
obtain i j where d: "i < j" and e: "subseq (f i) (f j) ∨ f i = f j"
unfolding good_def
by auto
have "f i ≠ f j" using b d by (auto simp add: inj_on_def)
moreover
have "f i ∈ A" using c by auto
moreover
have "f j ∈ A" using c by auto
ultimately have "¬(subseq (f i) (f j))" using a by simp
with e show "False" by auto
qed

subsection ‹Sub- and Supersequences›

definition
"SUBSEQ A ≡ {x::('a::finite) list. ∃y ∈ A. subseq x y}"

definition
"SUPSEQ A ≡ {x::('a::finite) list. ∃y ∈ A. subseq y x}"

lemma SUPSEQ_simps [simp]:
shows "SUPSEQ {} = {}"
and   "SUPSEQ {[]} = UNIV"
unfolding SUPSEQ_def by auto

lemma SUPSEQ_atom [simp]:
shows "SUPSEQ {[c]} = UNIV ⋅ {[c]} ⋅ UNIV"
unfolding SUPSEQ_def conc_def
by (auto dest: list_emb_ConsD)

lemma SUPSEQ_union [simp]:
shows "SUPSEQ (A ∪ B) = SUPSEQ A ∪ SUPSEQ B"
unfolding SUPSEQ_def by auto

lemma SUPSEQ_conc [simp]:
shows "SUPSEQ (A ⋅ B) = SUPSEQ A ⋅ SUPSEQ B"
unfolding SUPSEQ_def conc_def
apply(auto)
apply(drule list_emb_appendD)
apply(auto)
by (metis list_emb_append_mono)

lemma SUPSEQ_star [simp]:
shows "SUPSEQ (A⋆) = UNIV"
apply(subst star_unfold_left)
apply(simp only: SUPSEQ_union)
apply(simp)
done

subsection ‹Regular expression that recognises every character›

definition
Allreg :: "'a::finite rexp"
where
"Allreg ≡ ⨄(Atom  UNIV)"

lemma Allreg_lang [simp]:
shows "lang Allreg = (⋃a. {[a]})"
unfolding Allreg_def by auto

lemma [simp]:
shows "(⋃a. {[a]})⋆ = UNIV"
apply(auto)
apply(induct_tac x)
apply(auto)
apply(subgoal_tac "[a] @ list ∈ (⋃a. {[a]})⋆")
apply(simp)
apply(rule append_in_starI)
apply(auto)
done

lemma Star_Allreg_lang [simp]:
shows "lang (Star Allreg) = UNIV"
by simp

fun
UP :: "'a::finite rexp ⇒ 'a rexp"
where
"UP (Zero) = Zero"
| "UP (One) = Star Allreg"
| "UP (Atom c) = Times (Star Allreg) (Times (Atom c) (Star Allreg))"
| "UP (Plus r1 r2) = Plus (UP r1) (UP r2)"
| "UP (Times r1 r2) = Times (UP r1) (UP r2)"
| "UP (Star r) = Star Allreg"

lemma lang_UP:
fixes r::"'a::finite rexp"
shows "lang (UP r) = SUPSEQ (lang r)"
by (induct r) (simp_all)

lemma SUPSEQ_regular:
fixes A::"'a::finite lang"
assumes "regular A"
shows "regular (SUPSEQ A)"
proof -
from assms obtain r::"'a::finite rexp" where "lang r = A" by auto
then have "lang (UP r) = SUPSEQ A" by (simp add: lang_UP)
then show "regular (SUPSEQ A)" by auto
qed

lemma SUPSEQ_subset:
fixes A::"'a::finite list set"
shows "A ⊆ SUPSEQ A"
unfolding SUPSEQ_def by auto

lemma SUBSEQ_complement:
shows "- (SUBSEQ A) = SUPSEQ (- (SUBSEQ A))"
proof -
have "- (SUBSEQ A) ⊆ SUPSEQ (- (SUBSEQ A))"
by (rule SUPSEQ_subset)
moreover
have "SUPSEQ (- (SUBSEQ A)) ⊆ - (SUBSEQ A)"
proof (rule ccontr)
assume "¬ (SUPSEQ (- (SUBSEQ A)) ⊆ - (SUBSEQ A))"
then obtain x where
a: "x ∈ SUPSEQ (- (SUBSEQ A))" and
b: "x ∉ - (SUBSEQ A)" by auto

from a obtain y where c: "y ∈ - (SUBSEQ A)" and d: "subseq y x"
by (auto simp add: SUPSEQ_def)

from b have "x ∈ SUBSEQ A" by simp
then obtain x' where f: "x' ∈ A" and e: "subseq x x'"
by (auto simp add: SUBSEQ_def)

from d e have "subseq y x'"
by (rule subseq_order.order_trans)
then have "y ∈ SUBSEQ A" using f
by (auto simp add: SUBSEQ_def)
with c show "False" by simp
qed
ultimately show "- (SUBSEQ A) = SUPSEQ (- (SUBSEQ A))" by simp
qed

definition
minimal :: "'a::finite list ⇒ 'a lang ⇒ bool"
where
"minimal x A ≡ (∀y ∈ A. subseq y x ⟶ subseq x y)"

lemma main_lemma:
shows "∃M. finite M ∧ SUPSEQ A = SUPSEQ M"
proof -
define M where "M = {x ∈ A. minimal x A}"
have "finite M"
unfolding M_def minimal_def
by (rule subseq_Higman_antichains) (auto simp add: subseq_order.antisym)
moreover
have "SUPSEQ A ⊆ SUPSEQ M"
proof
fix x
assume "x ∈ SUPSEQ A"
then obtain y where "y ∈ A" and "subseq y x" by (auto simp add: SUPSEQ_def)
then have a: "y ∈ {y' ∈ A. subseq y' x}" by simp
obtain z where b: "z ∈ A" "subseq z x" and c: "∀y. subseq y z ∧ y ≠ z ⟶ y ∉ {y' ∈ A. subseq y' x}"
using wfE_min[OF subseq_wf a] by auto
then have "z ∈ M"
unfolding M_def minimal_def
by (auto intro: subseq_order.order_trans)
with b(2) show "x ∈ SUPSEQ M"
by (auto simp add: SUPSEQ_def)
qed
moreover
have "SUPSEQ M ⊆ SUPSEQ A"
by (auto simp add: SUPSEQ_def M_def)
ultimately
show "∃M. finite M ∧ SUPSEQ A = SUPSEQ M" by blast
qed

subsection ‹Closure of @{const SUBSEQ} and @{const SUPSEQ}›

lemma closure_SUPSEQ:
fixes A::"'a::finite lang"
shows "regular (SUPSEQ A)"
proof -
obtain M where a: "finite M" and b: "SUPSEQ A = SUPSEQ M"
using main_lemma by blast
have "regular M" using a by (rule finite_regular)
then have "regular (SUPSEQ M)" by (rule SUPSEQ_regular)
then show "regular (SUPSEQ A)" using b by simp
qed

lemma closure_SUBSEQ:
fixes A::"'a::finite lang"
shows "regular (SUBSEQ A)"
proof -
have "regular (SUPSEQ (- SUBSEQ A))" by (rule closure_SUPSEQ)
then have "regular (- SUBSEQ A)" by (subst SUBSEQ_complement) (simp)
then have "regular (- (- (SUBSEQ A)))" by (rule closure_complement)
then show "regular (SUBSEQ A)" by simp
qed

end


# Theory Non_Regular_Languages

(*
File:     Non_Regular_Languages.thy
Author:   Manuel Eberl <eberlm@in.tum.de>

This file provides some tools for showing the non-regularity of a language, either
via an infinite set of equivalence classes or via the Pumping Lemma.
*)
section ‹Tools for showing non-regularity of a language›
theory Non_Regular_Languages
imports Myhill
begin

subsection ‹Auxiliary material›

lemma bij_betw_image_quotient:
"bij_betw (λy. f - {y}) (f  A) (A // {(a,b). f a = f b})"
by (force simp: bij_betw_def inj_on_def image_image quotient_def)

lemma regular_Derivs_finite:
fixes r :: "'a :: finite rexp"
shows "finite (range (λw. Derivs w (lang r)))"
proof -
have "?thesis ⟷ finite (UNIV // ≈lang r)"
unfolding str_eq_conv_Derivs by (rule bij_betw_finite bij_betw_image_quotient)+
also have "…" by (subst Myhill_Nerode [symmetric]) auto
finally show ?thesis .
qed

lemma Nil_in_Derivs_iff: "[] ∈ Derivs w A ⟷ w ∈ A"
by (auto simp: Derivs_def)

(* TODO: Move to distribution? *)
text ‹
The following operation repeats a list $n$ times (usually written as $w ^ n$).
›
primrec repeat :: "nat ⇒ 'a list ⇒ 'a list" where
"repeat 0 xs = []"
| "repeat (Suc n) xs = xs @ repeat n xs"

lemma repeat_Cons_left: "repeat (Suc n) xs = xs @ repeat n xs" by simp

lemma repeat_Cons_right: "repeat (Suc n) xs = repeat n xs @ xs"
by (induction n) simp_all

lemma repeat_Cons_append_commute [simp]: "repeat n xs @ xs = xs @ repeat n xs"
by (subst repeat_Cons_right [symmetric]) simp

lemma repeat_Cons_add [simp]: "repeat (m + n) xs = repeat m xs @ repeat n xs"
by (induction m) simp_all

lemma repeat_Nil [simp]: "repeat n [] = []"
by (induction n) simp_all

lemma repeat_conv_replicate: "repeat n xs = concat (replicate n xs)"
by (induction n) simp_all

(* TODO: Move to distribution? *)
lemma nth_prefixes [simp]: "n ≤ length xs ⟹ prefixes xs ! n = take n xs"
by (induction xs arbitrary: n) (auto simp: nth_Cons split: nat.splits)

lemma nth_suffixes [simp]: "n ≤ length xs ⟹ suffixes xs ! n = drop (length xs - n) xs"
by (subst suffixes_conv_prefixes) (simp_all add: rev_take)

lemma length_take_prefixes:
assumes "xs ∈ set (take n (prefixes ys))"
shows   "length xs < n"
proof (cases "n ≤ Suc (length ys)")
case True
with assms obtain i where "i < n" "xs = take i ys"
by (subst (asm) nth_image [symmetric]) auto
thus ?thesis by simp
next
case False
with assms have "prefix xs ys" by simp
hence "length xs ≤ length ys" by (rule prefix_length_le)
also from False have "… < n" by simp
finally show ?thesis .
qed

subsection ‹Non-regularity by giving an infinite set of equivalence classes›

text ‹
Non-regularity can be shown by giving an infinite set of non-equivalent words (w.r.t. the
Myhill--Nerode relation).
›
lemma not_regular_langI:
assumes "infinite B" "⋀x y. x ∈ B ⟹ y ∈ B ⟹ x ≠ y ⟹ ∃w. ¬(x @ w ∈ A ⟷ y @ w ∈ A)"
shows   "¬regular_lang (A :: 'a :: finite list set)"
proof -
have "(λw. Derivs w A)  B ⊆ range (λw. Derivs w A)" by blast
moreover from assms(2) have "inj_on (λw. Derivs w A) B"
by (auto simp: inj_on_def Derivs_def)
with assms(1) have "infinite ((λw. Derivs w A)  B)"
by (blast dest: finite_imageD)
ultimately have "infinite (range (λw. Derivs w A))" by (rule infinite_super)
with regular_Derivs_finite show ?thesis by blast
qed

lemma not_regular_langI':
assumes "infinite B" "⋀x y. x ∈ B ⟹ y ∈ B ⟹ x ≠ y ⟹ ∃w. ¬(f x @ w ∈ A ⟷ f y @ w ∈ A)"
shows   "¬regular_lang (A :: 'a :: finite list set)"
proof (rule not_regular_langI)
from assms(2) have "inj_on f B" by (force simp: inj_on_def)
with ‹infinite B› show "infinite (f  B)" by (simp add: finite_image_iff)
qed (insert assms, auto)

subsection ‹The Pumping Lemma›

text ‹
The Pumping lemma can be shown very easily from the Myhill--Nerode theorem: if we have
a word whose length is more than the (finite) number of equivalence classes, then it must
have two different prefixes in the same class and the difference between these two
prefixes can then be pumped''.
›
lemma pumping_lemma_aux:
fixes A :: "'a list set"
defines "δ ≡ λw. Derivs w A"
defines "n ≡ card (range δ)"
assumes "z ∈ A" "finite (range δ)" "length z ≥ n"
shows "∃u v w. z = u @ v @ w ∧ length (u @ v) ≤ n ∧ v ≠ [] ∧ (∀i. u @ repeat i v @ w ∈ A)"
proof -
define P where "P = set (take (Suc n) (prefixes z))"
from ‹length z ≥ n› have [simp]: "card P = Suc n"
unfolding P_def by (subst distinct_card) (auto intro!: distinct_take)
have length_le: "length y ≤ n" if "y ∈ P" for y
using length_take_prefixes[OF that [unfolded P_def]] by simp

have "card (δ  P) ≤ card (range δ)" by (intro card_mono assms) auto
also from assms have "… < card P" by simp
finally have "¬inj_on δ P" by (rule pigeonhole)
then obtain a b where ab: "a ∈ P" "b ∈ P" "a ≠ b" "Derivs a A = Derivs b A"
by (auto simp: inj_on_def δ_def)
from ab have prefix_ab: "prefix a z" "prefix b z" by (auto simp: P_def dest: in_set_takeD)
from ab have length_ab: "length a ≤ n" "length b ≤ n"
by (simp_all add: length_le)

have *: ?thesis
if uz': "prefix u z'" "prefix z' z" "length z' ≤ n"
"u ≠ z'" "Derivs z' A = Derivs u A" for u z'
proof -
from ‹prefix u z'› and ‹u ≠ z'›
obtain v where v [simp]: "z' = u @ v" "v ≠ []"
by (auto simp: prefix_def)
from ‹prefix z' z› obtain w where [simp]: "z = u @ v @ w"
by (auto simp: prefix_def)

hence [simp]: "Derivs (repeat i v) (Derivs u A) = Derivs u A" for i
by (induction i) (use uz' in simp_all)
have "Derivs z A = Derivs (u @ repeat i v @ w) A" for i
using uz' by simp
with ‹z ∈ A› and uz' have "∀i. u @ repeat i v @ w ∈ A"
by (simp add: Nil_in_Derivs_iff [of _ A, symmetric])
moreover have "z = u @ v @ w" by simp
moreover from ‹length z' ≤ n› have "length (u @ v) ≤ n" by simp
ultimately show ?thesis using ‹v ≠ []› by blast
qed

from prefix_ab have "prefix a b ∨ prefix b a" by (rule prefix_same_cases)
with *[of a b] and *[of b a] and ab and prefix_ab and length_ab show ?thesis by blast
qed

theorem pumping_lemma:
fixes r :: "'a :: finite rexp"
obtains n where
"⋀z. z ∈ lang r ⟹ length z ≥ n ⟹
∃u v w. z = u @ v @ w ∧ length (u @ v) ≤ n ∧ v ≠ [] ∧ (∀i. u @ repeat i v @ w ∈ lang r)"
proof -
let ?n = "card (range (λw. Derivs w (lang r)))"
have "∃u v w. z = u @ v @ w ∧ length (u @ v) ≤ ?n ∧ v ≠ [] ∧ (∀i. u @ repeat i v @ w ∈ lang r)"
if "z ∈ lang r" and "length z ≥ ?n" for z
by (intro pumping_lemma_aux[of z] that regular_Derivs_finite)
thus ?thesis by (rule that)
qed

corollary pumping_lemma_not_regular_lang:
fixes A :: "'a :: finite list set"
assumes "⋀n. length (z n) ≥ n" and "⋀n. z n ∈ A"
assumes "⋀n u v w. z n = u @ v @ w ⟹ length (u @ v) ≤ n ⟹ v ≠ [] ⟹
u @ repeat (i n u v w) v @ w ∉ A"
shows   "¬regular_lang A"
proof
assume "regular_lang A"
then obtain r where r: "lang r = A" by blast
from pumping_lemma[of r] guess n .
from this[of "z n"] and assms[of n] obtain u v w
where "z n = u @ v @ w" and "length (u @ v) ≤ n" and "v ≠ []" and
"⋀i. u @ repeat i v @ w ∈ lang r" by (auto simp: r)
with assms(3)[of n u v w] show False by (auto simp: r)
qed

subsection ‹Examples›

text ‹
The language of all words containing the same number of $a$s and $b$s is not regular.
›
lemma "¬regular_lang {w. length (filter id w) = length (filter Not w)}" (is "¬regular_lang ?A")
proof (rule not_regular_langI')
show "infinite (UNIV :: nat set)" by simp
next
fix m n :: nat assume "m ≠ n"
hence "replicate m True @ replicate m False ∈ ?A" and
"replicate n True @ replicate m False ∉ ?A" by simp_all
thus "∃w. ¬(replicate m True @ w ∈ ?A ⟷ replicate n True @ w ∈ ?A)" by blast
qed

text ‹
The language $\{a^i b^i\ |\ i \in \mathbb{N}\}$ is not regular.
›
lemma eq_replicate_iff:
"xs = replicate n x ⟷ set xs ⊆ {x} ∧ length xs = n"
using replicate_length_same[of xs x] by (subst eq_commute) auto

lemma replicate_eq_appendE:
assumes "xs @ ys = replicate n x"
obtains i j where "n = i + j" "xs = replicate i x" "ys = replicate j x"
proof -
have "n = length (replicate n x)" by simp
also note assms [symmetric]
finally have "n = length xs + length ys" by simp
moreover have "xs = replicate (length xs) x" and "ys = replicate (length ys) x"
using assms by (auto simp: eq_replicate_iff)
ultimately show ?thesis using that[of "length xs" "length ys"] by auto
qed

lemma "¬regular_lang (range (λi. replicate i True @ replicate i False))" (is "¬regular_lang ?A")
proof (rule pumping_lemma_not_regular_lang)
fix n :: nat
show "length (replicate n True @ replicate n False) ≥ n" by simp
show "replicate n True @ replicate n False ∈ ?A" by simp
next
fix n :: nat and u v w :: "bool list"
assume decomp: "replicate n True @ replicate n False = u @ v @ w"
and length_le: "length (u @ v) ≤ n" and v_ne: "v ≠ []"
define w1 w2 where "w1 = take (n - length (u@v)) w" and "w2 = drop (n - length (u@v)) w"
have w_decomp: "w = w1 @ w2" by (simp add: w1_def w2_def)

have "replicate n True = take n (replicate n True @ replicate n False)" by simp
also note decomp
also have "take n (u @ v @ w) = u @ v @ w1" using length_le by (simp add: w1_def)
finally have "u @ v @ w1 = replicate n True" by simp
then obtain i j k
where uvw1: "n = i + j + k" "u = replicate i True" "v = replicate j True" "w1 = replicate k True"
by (elim replicate_eq_appendE) auto

have "replicate n False = drop n (replicate n True @ replicate n False)" by simp
also note decomp
finally have [simp]: "w2 = replicate n False" using length_le by (simp add: w2_def)

have "u @ repeat (Suc (Suc 0)) v @ w = replicate (n + j) True @ replicate n False"
by (simp add: uvw1 w_decomp replicate_add [symmetric])
also have "… ∉ ?A"
proof safe
fix m assume *: "replicate (n + j) True @ replicate n False =
replicate m True @ replicate m False" (is "?lhs = ?rhs")
have "n = length (filter Not ?lhs)" by simp
also note *
also have "length (filter Not ?rhs) = m" by simp
finally have [simp]: "m = n" by simp
from * have "v = []" by (simp add: uvw1)
with ‹v ≠ []› show False by contradiction
qed
finally show "u @ repeat (Suc (Suc 0)) v @ w ∉ ?A" .
qed

end
`