# Theory Regular_Set

(*  Author: Tobias Nipkow, Alex Krauss, Christian Urban  *)

section "Regular sets"

theory Regular_Set
imports Main
begin

type_synonym 'a lang = "'a list set"

definition conc :: "'a lang ⇒ 'a lang ⇒ 'a lang" (infixr "@@" 75) where
"A @@ B = {xs@ys | xs ys. xs:A & ys:B}"

text ‹checks the code preprocessor for set comprehensions›
export_code conc checking SML

overloading lang_pow == "compow :: nat ⇒ 'a lang ⇒ 'a lang"
begin
primrec lang_pow :: "nat ⇒ 'a lang ⇒ 'a lang" where
"lang_pow 0 A = {[]}" |
"lang_pow (Suc n) A = A @@ (lang_pow n A)"
end

text ‹for code generation›

definition lang_pow :: "nat ⇒ 'a lang ⇒ 'a lang" where
lang_pow_code_def [code_abbrev]: "lang_pow = compow"

lemma [code]:
"lang_pow (Suc n) A = A @@ (lang_pow n A)"
"lang_pow 0 A = {[]}"

hide_const (open) lang_pow

definition star :: "'a lang ⇒ 'a lang" where
"star A = (⋃n. A ^^ n)"

subsection‹@{term "(@@)"}›

lemma concI[simp,intro]: "u : A ⟹ v : B ⟹ u@v : A @@ B"

lemma concE[elim]:
assumes "w ∈ A @@ B"
obtains u v where "u ∈ A" "v ∈ B" "w = u@v"
using assms by (auto simp: conc_def)

lemma conc_mono: "A ⊆ C ⟹ B ⊆ D ⟹ A @@ B ⊆ C @@ D"
by (auto simp: conc_def)

lemma conc_empty[simp]: shows "{} @@ A = {}" and "A @@ {} = {}"
by auto

lemma conc_epsilon[simp]: shows "{[]} @@ A = A" and "A @@ {[]} = A"

lemma conc_assoc: "(A @@ B) @@ C = A @@ (B @@ C)"
by (auto elim!: concE) (simp only: append_assoc[symmetric] concI)

lemma conc_Un_distrib:
shows "A @@ (B ∪ C) = A @@ B ∪ A @@ C"
and   "(A ∪ B) @@ C = A @@ C ∪ B @@ C"
by auto

lemma conc_UNION_distrib:
shows "A @@ ⋃(M  I) = ⋃((%i. A @@ M i)  I)"
and   "⋃(M  I) @@ A = ⋃((%i. M i @@ A)  I)"
by auto

lemma conc_subset_lists: "A ⊆ lists S ⟹ B ⊆ lists S ⟹ A @@ B ⊆ lists S"
by(fastforce simp: conc_def in_lists_conv_set)

lemma Nil_in_conc[simp]: "[] ∈ A @@ B ⟷ [] ∈ A ∧ [] ∈ B"
by (metis append_is_Nil_conv concE concI)

lemma concI_if_Nil1: "[] ∈ A ⟹ xs : B ⟹ xs ∈ A @@ B"
by (metis append_Nil concI)

lemma conc_Diff_if_Nil1: "[] ∈ A ⟹ A @@ B = (A - {[]}) @@ B ∪ B"
by (fastforce elim: concI_if_Nil1)

lemma concI_if_Nil2: "[] ∈ B ⟹ xs : A ⟹ xs ∈ A @@ B"
by (metis append_Nil2 concI)

lemma conc_Diff_if_Nil2: "[] ∈ B ⟹ A @@ B = A @@ (B - {[]}) ∪ A"
by (fastforce elim: concI_if_Nil2)

lemma singleton_in_conc:
"[x] : A @@ B ⟷ [x] : A ∧ [] : B ∨ [] : A ∧ [x] : B"
by (fastforce simp: Cons_eq_append_conv append_eq_Cons_conv
conc_Diff_if_Nil1 conc_Diff_if_Nil2)

subsection‹@{term "A ^^ n"}›

lemma lang_pow_add: "A ^^ (n + m) = A ^^ n @@ A ^^ m"
by (induct n) (auto simp: conc_assoc)

lemma lang_pow_empty: "{} ^^ n = (if n = 0 then {[]} else {})"
by (induct n) auto

lemma lang_pow_empty_Suc[simp]: "({}::'a lang) ^^ Suc n = {}"

lemma conc_pow_comm:
shows "A @@ (A ^^ n) = (A ^^ n) @@ A"
by (induct n) (simp_all add: conc_assoc[symmetric])

lemma length_lang_pow_ub:
"∀w ∈ A. length w ≤ k ⟹ w : A^^n ⟹ length w ≤ k*n"
by(induct n arbitrary: w) (fastforce simp: conc_def)+

lemma length_lang_pow_lb:
"∀w ∈ A. length w ≥ k ⟹ w : A^^n ⟹ length w ≥ k*n"
by(induct n arbitrary: w) (fastforce simp: conc_def)+

lemma lang_pow_subset_lists: "A ⊆ lists S ⟹ A ^^ n ⊆ lists S"
by(induct n)(auto simp: conc_subset_lists)

subsection‹@{const star}›

lemma star_subset_lists: "A ⊆ lists S ⟹ star A ⊆ lists S"
unfolding star_def by(blast dest: lang_pow_subset_lists)

lemma star_if_lang_pow[simp]: "w : A ^^ n ⟹ w : star A"
by (auto simp: star_def)

lemma Nil_in_star[iff]: "[] : star A"
proof (rule star_if_lang_pow)
show "[] : A ^^ 0" by simp
qed

lemma star_if_lang[simp]: assumes "w : A" shows "w : star A"
proof (rule star_if_lang_pow)
show "w : A ^^ 1" using ‹w : A› by simp
qed

lemma append_in_starI[simp]:
assumes "u : star A" and "v : star A" shows "u@v : star A"
proof -
from ‹u : star A› obtain m where "u : A ^^ m" by (auto simp: star_def)
moreover
from ‹v : star A› obtain n where "v : A ^^ n" by (auto simp: star_def)
thus ?thesis by simp
qed

lemma conc_star_star: "star A @@ star A = star A"
by (auto simp: conc_def)

lemma conc_star_comm:
shows "A @@ star A = star A @@ A"
unfolding star_def conc_pow_comm conc_UNION_distrib
by simp

lemma star_induct[consumes 1, case_names Nil append, induct set: star]:
assumes "w : star A"
and "P []"
and step: "!!u v. u : A ⟹ v : star A ⟹ P v ⟹ P (u@v)"
shows "P w"
proof -
{ fix n have "w : A ^^ n ⟹ P w"
by (induct n arbitrary: w) (auto intro: ‹P []› step star_if_lang_pow) }
with ‹w : star A› show "P w" by (auto simp: star_def)
qed

lemma star_empty[simp]: "star {} = {[]}"
by (auto elim: star_induct)

lemma star_epsilon[simp]: "star {[]} = {[]}"
by (auto elim: star_induct)

lemma star_idemp[simp]: "star (star A) = star A"
by (auto elim: star_induct)

lemma star_unfold_left: "star A = A @@ star A ∪ {[]}" (is "?L = ?R")
proof
show "?L ⊆ ?R" by (rule, erule star_induct) auto
qed auto

lemma concat_in_star: "set ws ⊆ A ⟹ concat ws : star A"
by (induct ws) simp_all

lemma in_star_iff_concat:
"w ∈ star A = (∃ws. set ws ⊆ A ∧ w = concat ws)"
(is "_ = (∃ws. ?R w ws)")
proof
assume "w : star A" thus "∃ws. ?R w ws"
proof induct
case Nil have "?R [] []" by simp
thus ?case ..
next
case (append u v)
then obtain ws where "set ws ⊆ A ∧ v = concat ws" by blast
with append have "?R (u@v) (u#ws)" by auto
thus ?case ..
qed
next
assume "∃us. ?R w us" thus "w : star A"
by (auto simp: concat_in_star)
qed

lemma star_conv_concat: "star A = {concat ws|ws. set ws ⊆ A}"
by (fastforce simp: in_star_iff_concat)

lemma star_insert_eps[simp]: "star (insert [] A) = star(A)"
proof-
{ fix us
have "set us ⊆ insert [] A ⟹ ∃vs. concat us = concat vs ∧ set vs ⊆ A"
(is "?P ⟹ ∃vs. ?Q vs")
proof
let ?vs = "filter (%u. u ≠ []) us"
show "?P ⟹ ?Q ?vs" by (induct us) auto
qed
} thus ?thesis by (auto simp: star_conv_concat)
qed

lemma star_unfold_left_Nil: "star A = (A - {[]}) @@ (star A) ∪ {[]}"
by (metis insert_Diff_single star_insert_eps star_unfold_left)

lemma star_Diff_Nil_fold: "(A - {[]}) @@ star A = star A - {[]}"
proof -
have "[] ∉ (A - {[]}) @@ star A" by simp
thus ?thesis using star_unfold_left_Nil by blast
qed

lemma star_decom:
assumes a: "x ∈ star A" "x ≠ []"
shows "∃a b. x = a @ b ∧ a ≠ [] ∧ a ∈ A ∧ b ∈ star A"
using a by (induct rule: star_induct) (blast)+

subsection ‹Left-Quotients of languages›

definition Deriv :: "'a ⇒ 'a lang ⇒ 'a lang"
where "Deriv x A = { xs. x#xs ∈ A }"

definition Derivs :: "'a list ⇒ 'a lang ⇒ 'a lang"
where "Derivs xs A = { ys. xs @ ys ∈ A }"

abbreviation
Derivss :: "'a list ⇒ 'a lang set ⇒ 'a lang"
where
"Derivss s As ≡ ⋃ (Derivs s  As)"

lemma Deriv_empty[simp]:   "Deriv a {} = {}"
and Deriv_epsilon[simp]: "Deriv a {[]} = {}"
and Deriv_char[simp]:    "Deriv a {[b]} = (if a = b then {[]} else {})"
and Deriv_union[simp]:   "Deriv a (A ∪ B) = Deriv a A ∪ Deriv a B"
and Deriv_inter[simp]:   "Deriv a (A ∩ B) = Deriv a A ∩ Deriv a B"
and Deriv_compl[simp]:   "Deriv a (-A) = - Deriv a A"
and Deriv_Union[simp]:   "Deriv a (Union M) = Union(Deriv a  M)"
and Deriv_UN[simp]:      "Deriv a (UN x:I. S x) = (UN x:I. Deriv a (S x))"
by (auto simp: Deriv_def)

lemma Der_conc [simp]:
shows "Deriv c (A @@ B) = (Deriv c A) @@ B ∪ (if [] ∈ A then Deriv c B else {})"
unfolding Deriv_def conc_def

lemma Deriv_star [simp]:
shows "Deriv c (star A) = (Deriv c A) @@ star A"
proof -
have "Deriv c (star A) = Deriv c ({[]} ∪ A @@ star A)"
by (metis star_unfold_left sup.commute)
also have "... = Deriv c (A @@ star A)"
unfolding Deriv_union by (simp)
also have "... = (Deriv c A) @@ (star A) ∪ (if [] ∈ A then Deriv c (star A) else {})"
by simp
also have "... =  (Deriv c A) @@ star A"
unfolding conc_def Deriv_def
using star_decom by (force simp add: Cons_eq_append_conv)
finally show "Deriv c (star A) = (Deriv c A) @@ star A" .
qed

lemma Deriv_diff[simp]:
shows "Deriv c (A - B) = Deriv c A - Deriv c B"

lemma Deriv_lists[simp]: "c : S ⟹ Deriv c (lists S) = lists S"

lemma Derivs_simps [simp]:
shows "Derivs [] A = A"
and   "Derivs (c # s) A = Derivs s (Deriv c A)"
and   "Derivs (s1 @ s2) A = Derivs s2 (Derivs s1 A)"
unfolding Derivs_def Deriv_def by auto

lemma in_fold_Deriv: "v ∈ fold Deriv w L ⟷ w @ v ∈ L"
by (induct w arbitrary: L) (simp_all add: Deriv_def)

lemma Derivs_alt_def [code]: "Derivs w L = fold Deriv w L"
by (induct w arbitrary: L) simp_all

lemma Deriv_code [code]:
"Deriv x A = tl  Set.filter (λxs. case xs of x' # _ ⇒ x = x' | _ ⇒ False) A"
by (auto simp: Deriv_def Set.filter_def image_iff tl_def split: list.splits)

subsection ‹Shuffle product›

definition Shuffle (infixr "∥" 80) where
"Shuffle A B = ⋃{shuffles xs ys | xs ys. xs ∈ A ∧ ys ∈ B}"

lemma Deriv_Shuffle[simp]:
"Deriv a (A ∥ B) = Deriv a A ∥ B ∪ A ∥ Deriv a B"
unfolding Shuffle_def Deriv_def by (fastforce simp: Cons_in_shuffles_iff neq_Nil_conv)

lemma shuffle_subset_lists:
assumes "A ⊆ lists S" "B ⊆ lists S"
shows "A ∥ B ⊆ lists S"
unfolding Shuffle_def proof safe
fix x and zs xs ys :: "'a list"
assume zs: "zs ∈ shuffles xs ys" "x ∈ set zs" and "xs ∈ A" "ys ∈ B"
with assms have "xs ∈ lists S" "ys ∈ lists S" by auto
with zs show "x ∈ S" by (induct xs ys arbitrary: zs rule: shuffles.induct) auto
qed

lemma Nil_in_Shuffle[simp]: "[] ∈ A ∥ B ⟷ [] ∈ A ∧ [] ∈ B"
unfolding Shuffle_def by force

lemma shuffle_Un_distrib:
shows "A ∥ (B ∪ C) = A ∥ B ∪ A ∥ C"
and   "A ∥ (B ∪ C) = A ∥ B ∪ A ∥ C"
unfolding Shuffle_def by fast+

lemma shuffle_UNION_distrib:
shows "A ∥ ⋃(M  I) = ⋃((%i. A ∥ M i)  I)"
and   "⋃(M  I) ∥ A = ⋃((%i. M i ∥ A)  I)"
unfolding Shuffle_def by fast+

lemma Shuffle_empty[simp]:
"A ∥ {} = {}"
"{} ∥ B = {}"
unfolding Shuffle_def by auto

lemma Shuffle_eps[simp]:
"A ∥ {[]} = A"
"{[]} ∥ B = B"
unfolding Shuffle_def by auto

subsection ‹Arden's Lemma›

lemma arden_helper:
assumes eq: "X = A @@ X ∪ B"
shows "X = (A ^^ Suc n) @@ X ∪ (⋃m≤n. (A ^^ m) @@ B)"
proof (induct n)
case 0
show "X = (A ^^ Suc 0) @@ X ∪ (⋃m≤0. (A ^^ m) @@ B)"
using eq by simp
next
case (Suc n)
have ih: "X = (A ^^ Suc n) @@ X ∪ (⋃m≤n. (A ^^ m) @@ B)" by fact
also have "… = (A ^^ Suc n) @@ (A @@ X ∪ B) ∪ (⋃m≤n. (A ^^ m) @@ B)" using eq by simp
also have "… = (A ^^ Suc (Suc n)) @@ X ∪ ((A ^^ Suc n) @@ B) ∪ (⋃m≤n. (A ^^ m) @@ B)"
by (simp add: conc_Un_distrib conc_assoc[symmetric] conc_pow_comm)
also have "… = (A ^^ Suc (Suc n)) @@ X ∪ (⋃m≤Suc n. (A ^^ m) @@ B)"
finally show "X = (A ^^ Suc (Suc n)) @@ X ∪ (⋃m≤Suc n. (A ^^ m) @@ B)" .
qed

lemma Arden:
assumes "[] ∉ A"
shows "X = A @@ X ∪ B ⟷ X = star A @@ B"
proof
assume eq: "X = A @@ X ∪ B"
{ fix w assume "w : X"
let ?n = "size w"
from ‹[] ∉ A› have "∀u ∈ A. length u ≥ 1"
by (metis Suc_eq_plus1 add_leD2 le_0_eq length_0_conv not_less_eq_eq)
hence "∀u ∈ A^^(?n+1). length u ≥ ?n+1"
by (metis length_lang_pow_lb nat_mult_1)
hence "∀u ∈ A^^(?n+1)@@X. length u ≥ ?n+1"
by(auto simp only: conc_def length_append)
hence "w ∉ A^^(?n+1)@@X" by auto
hence "w : star A @@ B" using ‹w : X› using arden_helper[OF eq, where n="?n"]
by (auto simp add: star_def conc_UNION_distrib)
} moreover
{ fix w assume "w : star A @@ B"
hence "∃n. w ∈ A^^n @@ B" by(auto simp: conc_def star_def)
hence "w : X" using arden_helper[OF eq] by blast
} ultimately show "X = star A @@ B" by blast
next
assume eq: "X = star A @@ B"
have "star A = A @@ star A ∪ {[]}"
by (rule star_unfold_left)
then have "star A @@ B = (A @@ star A ∪ {[]}) @@ B"
by metis
also have "… = (A @@ star A) @@ B ∪ B"
unfolding conc_Un_distrib by simp
also have "… = A @@ (star A @@ B) ∪ B"
by (simp only: conc_assoc)
finally show "X = A @@ X ∪ B"
using eq by blast
qed

lemma reversed_arden_helper:
assumes eq: "X = X @@ A ∪ B"
shows "X = X @@ (A ^^ Suc n) ∪ (⋃m≤n. B @@ (A ^^ m))"
proof (induct n)
case 0
show "X = X @@ (A ^^ Suc 0) ∪ (⋃m≤0. B @@ (A ^^ m))"
using eq by simp
next
case (Suc n)
have ih: "X = X @@ (A ^^ Suc n) ∪ (⋃m≤n. B @@ (A ^^ m))" by fact
also have "… = (X @@ A ∪ B) @@ (A ^^ Suc n) ∪ (⋃m≤n. B @@ (A ^^ m))" using eq by simp
also have "… = X @@ (A ^^ Suc (Suc n)) ∪ (B @@ (A ^^ Suc n)) ∪ (⋃m≤n. B @@ (A ^^ m))"
also have "… = X @@ (A ^^ Suc (Suc n)) ∪ (⋃m≤Suc n. B @@ (A ^^ m))"
finally show "X = X @@ (A ^^ Suc (Suc n)) ∪ (⋃m≤Suc n. B @@ (A ^^ m))" .
qed

theorem reversed_Arden:
assumes nemp: "[] ∉ A"
shows "X = X @@ A ∪ B ⟷ X = B @@ star A"
proof
assume eq: "X = X @@ A ∪ B"
{ fix w assume "w : X"
let ?n = "size w"
from ‹[] ∉ A› have "∀u ∈ A. length u ≥ 1"
by (metis Suc_eq_plus1 add_leD2 le_0_eq length_0_conv not_less_eq_eq)
hence "∀u ∈ A^^(?n+1). length u ≥ ?n+1"
by (metis length_lang_pow_lb nat_mult_1)
hence "∀u ∈ X @@ A^^(?n+1). length u ≥ ?n+1"
by(auto simp only: conc_def length_append)
hence "w ∉ X @@ A^^(?n+1)" by auto
hence "w : B @@ star A" using ‹w : X› using reversed_arden_helper[OF eq, where n="?n"]
by (auto simp add: star_def conc_UNION_distrib)
} moreover
{ fix w assume "w : B @@ star A"
hence "∃n. w ∈ B @@ A^^n" by (auto simp: conc_def star_def)
hence "w : X" using reversed_arden_helper[OF eq] by blast
} ultimately show "X = B @@ star A" by blast
next
assume eq: "X = B @@ star A"
have "star A = {[]} ∪ star A @@ A"
unfolding conc_star_comm[symmetric]
by(metis Un_commute star_unfold_left)
then have "B @@ star A = B @@ ({[]} ∪ star A @@ A)"
by metis
also have "… = B ∪ B @@ (star A @@ A)"
unfolding conc_Un_distrib by simp
also have "… = B ∪ (B @@ star A) @@ A"
by (simp only: conc_assoc)
finally show "X = X @@ A ∪ B"
using eq by blast
qed

end


# Theory Regular_Exp

(*  Author: Tobias Nipkow *)

section "Regular expressions"

theory Regular_Exp
imports Regular_Set
begin

datatype (atoms: 'a) rexp =
is_Zero: Zero |
is_One: One |
Atom 'a |
Plus "('a rexp)" "('a rexp)" |
Times "('a rexp)" "('a rexp)" |
Star "('a rexp)"

primrec lang :: "'a rexp => 'a lang" where
"lang Zero = {}" |
"lang One = {[]}" |
"lang (Atom a) = {[a]}" |
"lang (Plus r s) = (lang r) Un (lang s)" |
"lang (Times r s) = conc (lang r) (lang s)" |
"lang (Star r) = star(lang r)"

abbreviation (input) regular_lang where "regular_lang A ≡ (∃r. lang r = A)"

primrec nullable :: "'a rexp ⇒ bool" where
"nullable Zero = False" |
"nullable One = True" |
"nullable (Atom c) = False" |
"nullable (Plus r1 r2) = (nullable r1 ∨ nullable r2)" |
"nullable (Times r1 r2) = (nullable r1 ∧ nullable r2)" |
"nullable (Star r) = True"

lemma nullable_iff [code_abbrev]: "nullable r ⟷ [] ∈ lang r"
by (induct r) (auto simp add: conc_def split: if_splits)

primrec rexp_empty where
"rexp_empty Zero ⟷ True"
| "rexp_empty One ⟷ False"
| "rexp_empty (Atom a) ⟷ False"
| "rexp_empty (Plus r s) ⟷ rexp_empty r ∧ rexp_empty s"
| "rexp_empty (Times r s) ⟷ rexp_empty r ∨ rexp_empty s"
| "rexp_empty (Star r) ⟷ False"

(* TODO Fixme: This code_abbrev rule does not work. Why? *)
lemma rexp_empty_iff [code_abbrev]: "rexp_empty r ⟷ lang r = {}"
by (induction r) auto

text‹Composition on rhs usually complicates matters:›
lemma map_map_rexp:
"map_rexp f (map_rexp g r) = map_rexp (λr. f (g r)) r"
unfolding rexp.map_comp o_def ..

lemma map_rexp_ident[simp]: "map_rexp (λx. x) = (λr. r)"
unfolding id_def[symmetric] fun_eq_iff rexp.map_id id_apply by (intro allI refl)

lemma atoms_lang: "w : lang r ⟹ set w ⊆ atoms r"
proof(induction r arbitrary: w)
case Times thus ?case by fastforce
next
case Star thus ?case by (fastforce simp add: star_conv_concat)
qed auto

lemma lang_eq_ext: "(lang r = lang s) =
(∀w ∈ lists(atoms r ∪ atoms s). w ∈ lang r ⟷ w ∈ lang s)"
by (auto simp: atoms_lang[unfolded subset_iff])

lemma lang_eq_ext_Nil_fold_Deriv:
fixes r s
defines "𝔅 ≡ {(fold Deriv w (lang r), fold Deriv w (lang s))| w. w∈lists (atoms r ∪ atoms s)}"
shows "lang r = lang s ⟷ (∀(K, L) ∈ 𝔅. [] ∈ K ⟷ [] ∈ L)"
unfolding lang_eq_ext 𝔅_def by (subst (1 2) in_fold_Deriv[of "[]", simplified, symmetric]) auto

subsection ‹Term ordering›

instantiation rexp :: (order) "{order}"
begin

fun le_rexp :: "('a::order) rexp ⇒ ('a::order) rexp ⇒ bool"
where
"le_rexp Zero _ = True"
| "le_rexp _ Zero = False"
| "le_rexp One _ = True"
| "le_rexp _ One = False"
| "le_rexp (Atom a) (Atom b) = (a <= b)"
| "le_rexp (Atom _) _ = True"
| "le_rexp _ (Atom _) = False"
| "le_rexp (Star r) (Star s) = le_rexp r s"
| "le_rexp (Star _) _ = True"
| "le_rexp _ (Star _) = False"
| "le_rexp (Plus r r') (Plus s s') =
(if r = s then le_rexp r' s' else le_rexp r s)"
| "le_rexp (Plus _ _) _ = True"
| "le_rexp _ (Plus _ _) = False"
| "le_rexp (Times r r') (Times s s') =
(if r = s then le_rexp r' s' else le_rexp r s)"

(* The class instance stuff is by Dmitriy Traytel *)

definition less_eq_rexp where "r ≤ s ≡ le_rexp r s"
definition less_rexp where "r < s ≡ le_rexp r s ∧ r ≠ s"

lemma le_rexp_Zero: "le_rexp r Zero ⟹ r = Zero"
by (induction r) auto

lemma le_rexp_refl: "le_rexp r r"
by (induction r) auto

lemma le_rexp_antisym: "⟦le_rexp r s; le_rexp s r⟧ ⟹ r = s"
by (induction r s rule: le_rexp.induct) (auto dest: le_rexp_Zero)

lemma le_rexp_trans: "⟦le_rexp r s; le_rexp s t⟧ ⟹ le_rexp r t"
proof (induction r s arbitrary: t rule: le_rexp.induct)
fix v t assume "le_rexp (Atom v) t" thus "le_rexp One t" by (cases t) auto
next
fix s1 s2 t assume "le_rexp (Plus s1 s2) t" thus "le_rexp One t" by (cases t) auto
next
fix s1 s2 t assume "le_rexp (Times s1 s2) t" thus "le_rexp One t" by (cases t) auto
next
fix s t assume "le_rexp (Star s) t" thus "le_rexp One t" by (cases t) auto
next
fix v u t assume "le_rexp (Atom v) (Atom u)" "le_rexp (Atom u) t"
thus "le_rexp (Atom v) t" by (cases t) auto
next
fix v s1 s2 t assume "le_rexp (Plus s1 s2) t" thus "le_rexp (Atom v) t" by (cases t) auto
next
fix v s1 s2 t assume "le_rexp (Times s1 s2) t" thus "le_rexp (Atom v) t" by (cases t) auto
next
fix v s t assume "le_rexp (Star s) t" thus "le_rexp (Atom v) t" by (cases t) auto
next
fix r s t
assume IH: "⋀t. le_rexp r s ⟹ le_rexp s t ⟹ le_rexp r t"
and "le_rexp (Star r) (Star s)" "le_rexp (Star s) t"
thus "le_rexp (Star r) t" by (cases t) auto
next
fix r s1 s2 t assume "le_rexp (Plus s1 s2) t" thus "le_rexp (Star r) t" by (cases t) auto
next
fix r s1 s2 t assume "le_rexp (Times s1 s2) t" thus "le_rexp (Star r) t" by (cases t) auto
next
fix r1 r2 s1 s2 t
assume "⋀t. r1 = s1 ⟹ le_rexp r2 s2 ⟹ le_rexp s2 t ⟹ le_rexp r2 t"
"⋀t. r1 ≠ s1 ⟹ le_rexp r1 s1 ⟹ le_rexp s1 t ⟹ le_rexp r1 t"
"le_rexp (Plus r1 r2) (Plus s1 s2)" "le_rexp (Plus s1 s2) t"
thus "le_rexp (Plus r1 r2) t" by (cases t) (auto split: if_split_asm intro: le_rexp_antisym)
next
fix r1 r2 s1 s2 t assume "le_rexp (Times s1 s2) t" thus "le_rexp (Plus r1 r2) t" by (cases t) auto
next
fix r1 r2 s1 s2 t
assume "⋀t. r1 = s1 ⟹ le_rexp r2 s2 ⟹ le_rexp s2 t ⟹ le_rexp r2 t"
"⋀t. r1 ≠ s1 ⟹ le_rexp r1 s1 ⟹ le_rexp s1 t ⟹ le_rexp r1 t"
"le_rexp (Times r1 r2) (Times s1 s2)" "le_rexp (Times s1 s2) t"
thus "le_rexp (Times r1 r2) t" by (cases t) (auto split: if_split_asm intro: le_rexp_antisym)
qed auto

instance proof
qed (auto simp add: less_eq_rexp_def less_rexp_def
intro: le_rexp_refl le_rexp_antisym le_rexp_trans)

end

instantiation rexp :: (linorder) "{linorder}"
begin

lemma le_rexp_total: "le_rexp (r :: 'a :: linorder rexp) s ∨ le_rexp s r"
by (induction r s rule: le_rexp.induct) auto

instance proof
qed (unfold less_eq_rexp_def less_rexp_def, rule le_rexp_total)

end

end


# Theory NDerivative

section ‹Normalizing Derivative›

theory NDerivative
imports
Regular_Exp
begin

subsection ‹Normalizing operations›

text ‹associativity, commutativity, idempotence, zero›

fun nPlus :: "'a::order rexp ⇒ 'a rexp ⇒ 'a rexp"
where
"nPlus Zero r = r"
| "nPlus r Zero = r"
| "nPlus (Plus r s) t = nPlus r (nPlus s t)"
| "nPlus r (Plus s t) =
(if r = s then (Plus s t)
else if le_rexp r s then Plus r (Plus s t)
else Plus s (nPlus r t))"
| "nPlus r s =
(if r = s then r
else if le_rexp r s then Plus r s
else Plus s r)"

lemma lang_nPlus[simp]: "lang (nPlus r s) = lang (Plus r s)"
by (induction r s rule: nPlus.induct) auto

text ‹associativity, zero, one›

fun nTimes :: "'a::order rexp ⇒ 'a rexp ⇒ 'a rexp"
where
"nTimes Zero _ = Zero"
| "nTimes _ Zero = Zero"
| "nTimes One r = r"
| "nTimes r One = r"
| "nTimes (Times r s) t = Times r (nTimes s t)"
| "nTimes r s = Times r s"

lemma lang_nTimes[simp]: "lang (nTimes r s) = lang (Times r s)"
by (induction r s rule: nTimes.induct) (auto simp: conc_assoc)

primrec norm :: "'a::order rexp ⇒ 'a rexp"
where
"norm Zero = Zero"
| "norm One = One"
| "norm (Atom a) = Atom a"
| "norm (Plus r s) = nPlus (norm r) (norm s)"
| "norm (Times r s) = nTimes (norm r) (norm s)"
| "norm (Star r) = Star (norm r)"

lemma lang_norm[simp]: "lang (norm r) = lang r"
by (induct r) auto

primrec nderiv :: "'a::order ⇒ 'a rexp ⇒ 'a rexp"
where
"nderiv _ Zero = Zero"
| "nderiv _ One = Zero"
| "nderiv a (Atom b) = (if a = b then One else Zero)"
| "nderiv a (Plus r s) = nPlus (nderiv a r) (nderiv a s)"
| "nderiv a (Times r s) =
(let r's = nTimes (nderiv a r) s
in if nullable r then nPlus r's (nderiv a s) else r's)"
| "nderiv a (Star r) = nTimes (nderiv a r) (Star r)"

lemma lang_nderiv: "lang (nderiv a r) = Deriv a (lang r)"
by (induction r) (auto simp: Let_def nullable_iff)

lemma deriv_no_occurrence:
"x ∉ atoms r ⟹ nderiv x r = Zero"
by (induction r) auto

lemma atoms_nPlus[simp]: "atoms (nPlus r s) = atoms r ∪ atoms s"
by (induction r s rule: nPlus.induct) auto

lemma atoms_nTimes: "atoms (nTimes r s) ⊆ atoms r ∪ atoms s"
by (induction r s rule: nTimes.induct) auto

lemma atoms_norm: "atoms (norm r) ⊆ atoms r"
by (induction r) (auto dest!:subsetD[OF atoms_nTimes])

lemma atoms_nderiv: "atoms (nderiv a r) ⊆ atoms r"
by (induction r) (auto simp: Let_def dest!:subsetD[OF atoms_nTimes])

end


# Theory Equivalence_Checking

section ‹Deciding Regular Expression Equivalence›

theory Equivalence_Checking
imports
NDerivative
"HOL-Library.While_Combinator"
begin

subsection ‹Bisimulation between languages and regular expressions›

coinductive bisimilar :: "'a lang ⇒ 'a lang ⇒ bool" where
"([] ∈ K ⟷ [] ∈ L)
⟹ (⋀x. bisimilar (Deriv x K) (Deriv x L))
⟹ bisimilar K L"

lemma equal_if_bisimilar:
assumes "bisimilar K L" shows "K = L"
proof (rule set_eqI)
fix w
from ‹bisimilar K L› show "w ∈ K ⟷ w ∈ L"
proof (induct w arbitrary: K L)
case Nil thus ?case by (auto elim: bisimilar.cases)
next
case (Cons a w K L)
from ‹bisimilar K L› have "bisimilar (Deriv a K) (Deriv a L)"
by (auto elim: bisimilar.cases)
then have "w ∈ Deriv a K ⟷ w ∈ Deriv a L" by (rule Cons(1))
thus ?case by (auto simp: Deriv_def)
qed
qed

lemma language_coinduct:
fixes R (infixl "∼" 50)
assumes "K ∼ L"
assumes "⋀K L. K ∼ L ⟹ ([] ∈ K ⟷ [] ∈ L)"
assumes "⋀K L x. K ∼ L ⟹ Deriv x K ∼ Deriv x L"
shows "K = L"
apply (rule equal_if_bisimilar)
apply (rule bisimilar.coinduct[of R, OF ‹K ∼ L›])
apply (auto simp: assms)
done

type_synonym 'a rexp_pair = "'a rexp * 'a rexp"
type_synonym 'a rexp_pairs = "'a rexp_pair list"

definition is_bisimulation ::  "'a::order list ⇒ 'a rexp_pair set ⇒ bool"
where
"is_bisimulation as R =
(∀(r,s)∈ R. (atoms r ∪ atoms s ⊆ set as) ∧ (nullable r ⟷ nullable s) ∧
(∀a∈set as. (nderiv a r, nderiv a s) ∈ R))"

lemma bisim_lang_eq:
assumes bisim: "is_bisimulation as ps"
assumes "(r, s) ∈ ps"
shows "lang r = lang s"
proof -
define ps' where "ps' = insert (Zero, Zero) ps"
from bisim have bisim': "is_bisimulation as ps'"
by (auto simp: ps'_def is_bisimulation_def)
let ?R = "λK L. (∃(r,s)∈ps'. K = lang r ∧ L = lang s)"
show ?thesis
proof (rule language_coinduct[where R="?R"])
from ‹(r, s) ∈ ps›
have "(r, s) ∈ ps'" by (auto simp: ps'_def)
thus "?R (lang r) (lang s)" by auto
next
fix K L assume "?R K L"
then obtain r s where rs: "(r, s) ∈ ps'"
and KL: "K = lang r" "L = lang s" by auto
with bisim' have "nullable r ⟷ nullable s"
by (auto simp: is_bisimulation_def)
thus "[] ∈ K ⟷ [] ∈ L" by (auto simp: nullable_iff KL)
fix a
show "?R (Deriv a K) (Deriv a L)"
proof cases
assume "a ∈ set as"
with rs bisim'
have "(nderiv a r, nderiv a s) ∈ ps'"
by (auto simp: is_bisimulation_def)
thus ?thesis by (force simp: KL lang_nderiv)
next
assume "a ∉ set as"
with bisim' rs
have "a ∉ atoms r" "a ∉ atoms s" by (auto simp: is_bisimulation_def)
then have "nderiv a r = Zero" "nderiv a s = Zero"
by (auto intro: deriv_no_occurrence)
then have "Deriv a K = lang Zero"
"Deriv a L = lang Zero"
unfolding KL lang_nderiv[symmetric] by auto
thus ?thesis by (auto simp: ps'_def)
qed
qed
qed

subsection ‹Closure computation›

definition closure ::
"'a::order list ⇒ 'a rexp_pair ⇒ ('a rexp_pairs * 'a rexp_pair set) option"
where
"closure as = rtrancl_while (%(r,s). nullable r = nullable s)
(%(r,s). map (λa. (nderiv a r, nderiv a s)) as)"

definition pre_bisim :: "'a::order list ⇒ 'a rexp ⇒ 'a rexp ⇒
'a rexp_pairs * 'a rexp_pair set ⇒ bool"
where
"pre_bisim as r s = (λ(ws,R).
(r,s) ∈ R ∧ set ws ⊆ R ∧
(∀(r,s)∈ R. atoms r ∪ atoms s ⊆ set as) ∧
(∀(r,s)∈ R - set ws. (nullable r ⟷ nullable s) ∧
(∀a∈set as. (nderiv a r, nderiv a s) ∈ R)))"

theorem closure_sound:
assumes result: "closure as (r,s) = Some([],R)"
and atoms: "atoms r ∪ atoms s ⊆ set as"
shows "lang r = lang s"
proof-
let ?test = "While_Combinator.rtrancl_while_test (%(r,s). nullable r = nullable s)"
let ?step = "While_Combinator.rtrancl_while_step (%(r,s). map (λa. (nderiv a r, nderiv a s)) as)"
{ fix st assume inv: "pre_bisim as r s st" and test: "?test st"
have "pre_bisim as r s (?step st)"
proof (cases st)
fix ws R assume "st = (ws, R)"
with test obtain r s t where st: "st = ((r, s) # t, R)" and "nullable r = nullable s"
by (cases ws) auto
with inv show ?thesis using atoms_nderiv[of _ r] atoms_nderiv[of _ s]
unfolding st rtrancl_while_test.simps rtrancl_while_step.simps pre_bisim_def Ball_def
by simp_all blast+
qed
}
moreover
from atoms
have "pre_bisim as r s ([(r,s)],{(r,s)})" by (simp add: pre_bisim_def)
ultimately have pre_bisim_ps: "pre_bisim as r s ([],R)"
by (rule while_option_rule[OF _ result[unfolded closure_def rtrancl_while_def]])
then have "is_bisimulation as R" "(r, s) ∈ R"
by (auto simp: pre_bisim_def is_bisimulation_def)
thus "lang r = lang s" by (rule bisim_lang_eq)
qed

subsection ‹Bisimulation-free proof of closure computation›

text‹The equivalence check can be viewed as the product construction
of two automata. The state space is the reflexive transitive closure of
the pair of next-state functions, i.e. derivatives.›

lemma rtrancl_nderiv_nderivs: defines "nderivs == foldl (%r a. nderiv a r)"
shows "{((r,s),(nderiv a r,nderiv a s))| r s a. a : A}^* =
{((r,s),(nderivs r w,nderivs s w))| r s w. w : lists A}" (is "?L = ?R")
proof-
note [simp] = nderivs_def
{ fix r s r' s'
have "((r,s),(r',s')) : ?L ⟹ ((r,s),(r',s')) : ?R"
proof(induction rule: converse_rtrancl_induct2)
case refl show ?case by (force intro!: foldl.simps(1)[symmetric])
next
case step thus ?case by(force intro!: foldl.simps(2)[symmetric])
qed
} moreover
{ fix r s r' s'
{ fix w have "∀x∈set w. x ∈ A ⟹ ((r, s), nderivs r w, nderivs s w) :?L"
proof(induction w rule: rev_induct)
case Nil show ?case by simp
next
case snoc thus ?case by (auto elim!: rtrancl_into_rtrancl)
qed
}
hence "((r,s),(r',s')) : ?R ⟹ ((r,s),(r',s')) : ?L" by auto
} ultimately show ?thesis by (auto simp: in_lists_conv_set) blast
qed

lemma nullable_nderivs:
"nullable (foldl (%r a. nderiv a r) r w) = (w : lang r)"
by (induct w arbitrary: r) (simp_all add: nullable_iff lang_nderiv Deriv_def)

theorem closure_sound_complete:
assumes result: "closure as (r,s) = Some(ws,R)"
and atoms: "set as = atoms r ∪ atoms s"
shows "ws = [] ⟷ lang r = lang s"
proof -
have leq: "(lang r = lang s) =
(∀(r',s') ∈ {((r0,s0),(nderiv a r0,nderiv a s0))| r0 s0 a. a : set as}^*  {(r,s)}.
nullable r' = nullable s')"
by(simp add: atoms rtrancl_nderiv_nderivs Ball_def lang_eq_ext imp_ex nullable_nderivs
del:Un_iff)
have "{(x,y). y ∈ set ((λ(p,q). map (λa. (nderiv a p, nderiv a q)) as) x)} =
{((r,s), nderiv a r, nderiv a s) |r s a. a ∈ set as}"
by auto
with atoms rtrancl_while_Some[OF result[unfolded closure_def]]
show ?thesis by (auto simp add: leq Ball_def split: if_splits)
qed

subsection ‹The overall procedure›

primrec add_atoms :: "'a rexp ⇒ 'a list ⇒ 'a list"
where
| "add_atoms (Atom a) = List.insert a"

lemma set_add_atoms: "set (add_atoms r as) = atoms r ∪ set as"
by (induct r arbitrary: as) auto

definition check_eqv :: "nat rexp ⇒ nat rexp ⇒ bool" where
"check_eqv r s =
(let nr = norm r; ns = norm s; as = add_atoms nr (add_atoms ns [])
in case closure as (nr, ns) of
Some([],_) ⇒ True | _ ⇒ False)"

lemma soundness:
assumes "check_eqv r s" shows "lang r = lang s"
proof -
let ?nr = "norm r" let ?ns = "norm s"
obtain R where 1: "closure ?as (?nr,?ns) = Some([],R)"
using assms by (auto simp: check_eqv_def Let_def split:option.splits list.splits)
then have "lang (norm r) = lang (norm s)"
by (rule closure_sound) (auto simp: set_add_atoms dest!: subsetD[OF atoms_norm])
thus "lang r = lang s" by simp
qed

text‹Test:›
lemma "check_eqv (Plus One (Times (Atom 0) (Star(Atom 0)))) (Star(Atom 0))"
by eval

end


# Theory Relation_Interpretation

section ‹Regular Expressions as Homogeneous Binary Relations›

theory Relation_Interpretation
imports Regular_Exp
begin

primrec rel :: "('a ⇒ ('b * 'b) set) ⇒ 'a rexp ⇒ ('b * 'b) set"
where
"rel v Zero = {}" |
"rel v One = Id" |
"rel v (Atom a) = v a" |
"rel v (Plus r s) = rel v r ∪ rel v s" |
"rel v (Times r s) = rel v r O rel v s" |
"rel v (Star r) = (rel v r)^*"

primrec word_rel :: "('a ⇒ ('b * 'b) set) ⇒ 'a list ⇒ ('b * 'b) set"
where
"word_rel v [] = Id"
| "word_rel v (a#as) = v a O word_rel v as"

lemma word_rel_append:
"word_rel v w O word_rel v w' = word_rel v (w @ w')"
by (rule sym) (induct w, auto)

lemma rel_word_rel: "rel v r = (⋃w∈lang r. word_rel v w)"
proof (induct r)
case Times thus ?case
by (auto simp: rel_def word_rel_append conc_def relcomp_UNION_distrib relcomp_UNION_distrib2)
next
case (Star r)
{ fix n
have "(rel v r) ^^ n = (⋃w ∈ lang r ^^ n. word_rel v w)"
proof (induct n)
case 0 show ?case by simp
next
case (Suc n) thus ?case
unfolding relpow.simps relpow_commute[symmetric]
by (auto simp add: Star conc_def word_rel_append
relcomp_UNION_distrib relcomp_UNION_distrib2)
qed }

thus ?case unfolding rel.simps
by (force simp: rtrancl_power star_def)
qed auto

text ‹Soundness:›

lemma soundness:
"lang r = lang s ⟹ rel v r = rel v s"
unfolding rel_word_rel by auto

end


# Theory Regexp_Method

section ‹Proving Relation (In)equalities via Regular Expressions›

theory Regexp_Method
imports Equivalence_Checking Relation_Interpretation
begin

primrec rel_of_regexp :: "('a * 'a) set list ⇒ nat rexp ⇒ ('a * 'a) set" where
"rel_of_regexp vs Zero  = {}" |
"rel_of_regexp vs One  = Id" |
"rel_of_regexp vs (Atom i)  = vs ! i" |
"rel_of_regexp vs (Plus r s)  = rel_of_regexp vs r  ∪ rel_of_regexp vs s " |
"rel_of_regexp vs (Times r s)  = rel_of_regexp vs r O rel_of_regexp vs s" |
"rel_of_regexp vs (Star r)  = (rel_of_regexp vs r)^*"

lemma rel_of_regexp_rel: "rel_of_regexp vs r = rel (λi. vs ! i) r"
by (induct r) auto

primrec rel_eq where
"rel_eq (r, s) vs = (rel_of_regexp vs r = rel_of_regexp vs s)"

lemma rel_eqI: "check_eqv r s ⟹ rel_eq (r, s) vs"
unfolding rel_eq.simps rel_of_regexp_rel
by (rule Relation_Interpretation.soundness)
(rule Equivalence_Checking.soundness)

lemmas regexp_reify = rel_of_regexp.simps rel_eq.simps
lemmas regexp_unfold = trancl_unfold_left subset_Un_eq

ML ‹
local

fun check_eqv (ct, b) = Thm.mk_binop @{cterm "Pure.eq :: bool ⇒ bool ⇒ prop"}
ct (if b then @{cterm True} else @{cterm False});

val (_, check_eqv_oracle) = Context.>>> (Context.map_theory_result

in

val regexp_conv =
@{computation_conv bool terms: check_eqv datatypes: "nat rexp"}
(fn _ => fn b => fn ct => check_eqv_oracle (ct, b))

end
›

method_setup regexp = ‹
Scan.succeed (fn ctxt =>
SIMPLE_METHOD' (
(TRY o eresolve_tac ctxt @{thms rev_subsetD})
THEN' (Subgoal.FOCUS_PARAMS (fn {context = ctxt', ...} =>
TRY (Local_Defs.unfold_tac ctxt' @{thms regexp_unfold})
THEN Reification.tac ctxt' @{thms regexp_reify} NONE 1
THEN resolve_tac ctxt' @{thms rel_eqI} 1
THEN CONVERSION (HOLogic.Trueprop_conv (regexp_conv ctxt')) 1
THEN resolve_tac ctxt' [TrueI] 1) ctxt)))
› ‹decide relation equalities via regular expressions›

hide_const (open) le_rexp nPlus nTimes norm nullable bisimilar is_bisimulation closure
pre_bisim add_atoms check_eqv rel word_rel rel_eq

text ‹Example:›

lemma "(r ∪ s^+)^* = (r ∪ s)^*"
by regexp

end


# Theory Regexp_Constructions

(*
File:     Regexp_Constructions.thy
Author:   Manuel Eberl <eberlm@in.tum.de>

Some simple constructions on regular expressions to illustrate closure properties of regular
languages: reversal, substitution, prefixes, suffixes, subwords ("fragments")
*)
section ‹Basic constructions on regular expressions›
theory Regexp_Constructions
imports
Main
"HOL-Library.Sublist"
Regular_Exp
begin

subsection ‹Reverse language›

lemma rev_conc [simp]: "rev  (A @@ B) = rev  B @@ rev  A"
unfolding conc_def image_def by force

lemma rev_compower [simp]: "rev  (A ^^ n) = (rev  A) ^^ n"
by (induction n) (simp_all add: conc_pow_comm)

lemma rev_star [simp]: "rev  star A = star (rev  A)"

subsection ‹Substituting characters in a language›

definition subst_word :: "('a ⇒ 'b list) ⇒ 'a list ⇒ 'b list" where
"subst_word f xs = concat (map f xs)"

lemma subst_word_Nil [simp]: "subst_word f [] = []"

lemma subst_word_singleton [simp]: "subst_word f [x] = f x"

lemma subst_word_append [simp]: "subst_word f (xs @ ys) = subst_word f xs @ subst_word f ys"

lemma subst_word_Cons [simp]: "subst_word f (x # xs) = f x @ subst_word f xs"

lemma subst_word_conc [simp]: "subst_word f  (A @@ B) = subst_word f  A @@ subst_word f  B"
unfolding conc_def image_def by force

lemma subst_word_compower [simp]: "subst_word f  (A ^^ n) = (subst_word f  A) ^^ n"
by (induction n) simp_all

lemma subst_word_star [simp]: "subst_word f  (star A) = star (subst_word f  A)"

text ‹Suffix language›

definition Suffixes :: "'a list set ⇒ 'a list set" where
"Suffixes A = {w. ∃q. q @ w ∈ A}"

lemma Suffixes_altdef [code]: "Suffixes A = (⋃w∈A. set (suffixes w))"
unfolding Suffixes_def set_suffixes_eq suffix_def by blast

lemma Nil_in_Suffixes_iff [simp]: "[] ∈ Suffixes A ⟷ A ≠ {}"
by (auto simp: Suffixes_def)

lemma Suffixes_empty [simp]: "Suffixes {} = {}"
by (auto simp: Suffixes_def)

lemma Suffixes_empty_iff [simp]: "Suffixes A = {} ⟷ A = {}"
by (auto simp: Suffixes_altdef)

lemma Suffixes_singleton [simp]: "Suffixes {xs} = set (suffixes xs)"
by (auto simp: Suffixes_altdef)

lemma Suffixes_insert: "Suffixes (insert xs A) = set (suffixes xs) ∪ Suffixes A"

lemma Suffixes_conc [simp]: "A ≠ {} ⟹ Suffixes (A @@ B) = Suffixes B ∪ (Suffixes A @@ B)"
unfolding Suffixes_altdef conc_def by (force simp: suffix_append)

lemma Suffixes_union [simp]: "Suffixes (A ∪ B) = Suffixes A ∪ Suffixes B"
by (auto simp: Suffixes_def)

lemma Suffixes_UNION [simp]: "Suffixes (⋃(f  A)) = ⋃((λx. Suffixes (f x))  A)"
by (auto simp: Suffixes_def)

lemma Suffixes_compower:
assumes "A ≠ {}"
shows   "Suffixes (A ^^ n) = insert [] (Suffixes A @@ (⋃k<n. A ^^ k))"
proof (induction n)
case (Suc n)
from Suc have "Suffixes (A ^^ Suc n) =
insert [] (Suffixes A @@ ((⋃k<n. A ^^ k) ∪ A ^^ n))"
also have "(⋃k<n. A ^^ k) ∪ A ^^ n = (⋃k∈insert n {..<n}. A ^^ k)"  by blast
also have "insert n {..<n} = {..<Suc n}" by auto
finally show ?case .
qed simp_all

lemma Suffixes_star [simp]:
assumes "A ≠ {}"
shows   "Suffixes (star A) = Suffixes A @@ star A"
proof -
have "star A = (⋃n. A ^^ n)" unfolding star_def ..
also have "Suffixes … = (⋃x. Suffixes (A ^^ x))" by simp
also have "… = (⋃n. insert [] (Suffixes A @@ (⋃k<n. A ^^ k)))"
using assms by (subst Suffixes_compower) auto
also have "… = insert [] (Suffixes A @@ (⋃n. (⋃k<n. A ^^ k)))"
also have "(⋃n. (⋃k<n. A ^^ k)) = (⋃n. A ^^ n)" by auto
also have "… = star A" unfolding star_def ..
also have "insert [] (Suffixes A @@ star A) = Suffixes A @@ star A"
using assms by auto
finally show ?thesis .
qed

text ‹Prefix language›

definition Prefixes :: "'a list set ⇒ 'a list set" where
"Prefixes A = {w. ∃q. w @ q ∈ A}"

lemma Prefixes_altdef [code]: "Prefixes A = (⋃w∈A. set (prefixes w))"
unfolding Prefixes_def set_prefixes_eq prefix_def by blast

lemma Nil_in_Prefixes_iff [simp]: "[] ∈ Prefixes A ⟷ A ≠ {}"
by (auto simp: Prefixes_def)

lemma Prefixes_empty [simp]: "Prefixes {} = {}"
by (auto simp: Prefixes_def)

lemma Prefixes_empty_iff [simp]: "Prefixes A = {} ⟷ A = {}"
by (auto simp: Prefixes_altdef)

lemma Prefixes_singleton [simp]: "Prefixes {xs} = set (prefixes xs)"
by (auto simp: Prefixes_altdef)

lemma Prefixes_insert: "Prefixes (insert xs A) = set (prefixes xs) ∪ Prefixes A"

lemma Prefixes_conc [simp]: "B ≠ {} ⟹ Prefixes (A @@ B) = Prefixes A ∪ (A @@ Prefixes B)"
unfolding Prefixes_altdef conc_def by (force simp: prefix_append)

lemma Prefixes_union [simp]: "Prefixes (A ∪ B) = Prefixes A ∪ Prefixes B"
by (auto simp: Prefixes_def)

lemma Prefixes_UNION [simp]: "Prefixes (⋃(f  A)) = ⋃((λx. Prefixes (f x))  A)"
by (auto simp: Prefixes_def)

lemma Prefixes_rev: "Prefixes (rev  A) = rev  Suffixes A"
by (auto simp: Prefixes_altdef prefixes_rev Suffixes_altdef)

lemma Suffixes_rev: "Suffixes (rev  A) = rev  Prefixes A"
by (auto simp: Prefixes_altdef suffixes_rev Suffixes_altdef)

lemma Prefixes_compower:
assumes "A ≠ {}"
shows   "Prefixes (A ^^ n) = insert [] ((⋃k<n. A ^^ k) @@ Prefixes A)"
proof -
have "A ^^ n = rev  ((rev  A) ^^ n)" by (simp add: image_image)
also have "Prefixes … = insert [] ((⋃k<n. A ^^ k) @@ Prefixes A)"
unfolding Prefixes_rev
by (subst Suffixes_compower) (simp_all add: image_UN image_image Suffixes_rev assms)
finally show ?thesis .
qed

lemma Prefixes_star [simp]:
assumes "A ≠ {}"
shows   "Prefixes (star A) = star A @@ Prefixes A"
proof -
have "star A = rev  star (rev  A)" by (simp add: image_image)
also have "Prefixes … = star A @@ Prefixes A"
unfolding Prefixes_rev
by (subst Suffixes_star) (simp_all add: assms image_image Suffixes_rev)
finally show ?thesis .
qed

subsection ‹Subword language›

text ‹
The language of all sub-words, i.e. all words that are a contiguous sublist of a word in
the original language.
›
definition Sublists :: "'a list set ⇒ 'a list set" where
"Sublists A = {w. ∃q∈A. sublist w q}"

lemma Sublists_altdef [code]: "Sublists A = (⋃w∈A. set (sublists w))"
by (auto simp: Sublists_def)

lemma Sublists_empty [simp]: "Sublists {} = {}"
by (auto simp: Sublists_def)

lemma Sublists_singleton [simp]: "Sublists {w} = set (sublists w)"
by (auto simp: Sublists_altdef)

lemma Sublists_insert: "Sublists (insert w A) = set (sublists w) ∪ Sublists A"
by (auto simp: Sublists_altdef)

lemma Sublists_Un [simp]: "Sublists (A ∪ B) = Sublists A ∪ Sublists B"
by (auto simp: Sublists_altdef)

lemma Sublists_UN [simp]: "Sublists (⋃(f  A)) = ⋃((λx. Sublists (f x))  A)"
by (auto simp: Sublists_altdef)

lemma Sublists_conv_Prefixes: "Sublists A = Prefixes (Suffixes A)"
by (auto simp: Sublists_def Prefixes_def Suffixes_def sublist_def)

lemma Sublists_conv_Suffixes: "Sublists A = Suffixes (Prefixes A)"
by (auto simp: Sublists_def Prefixes_def Suffixes_def sublist_def)

lemma Sublists_conc [simp]:
assumes "A ≠ {}" "B ≠ {}"
shows   "Sublists (A @@ B) = Sublists A ∪ Sublists B ∪ Suffixes A @@ Prefixes B"
using assms unfolding Sublists_conv_Suffixes by auto

lemma star_not_empty [simp]: "star A ≠ {}"
by auto

lemma Sublists_star:
"A ≠ {} ⟹ Sublists (star A) = Sublists A ∪ Suffixes A @@ star A @@ Prefixes A"

lemma Prefixes_subset_Sublists: "Prefixes A ⊆ Sublists A"
unfolding Prefixes_def Sublists_def by auto

lemma Suffixes_subset_Sublists: "Suffixes A ⊆ Sublists A"
unfolding Suffixes_def Sublists_def by auto

subsection ‹Fragment language›

text ‹
The following is the fragment language of a given language, i.e. the set of all words that
are (not necessarily contiguous) sub-sequences of a word in the original language.
›
definition Subseqs where "Subseqs A = (⋃w∈A. set (subseqs w))"

lemma Subseqs_empty [simp]: "Subseqs {} = {}"

lemma Subseqs_insert [simp]: "Subseqs (insert xs A) = set (subseqs xs) ∪ Subseqs A"

lemma Subseqs_singleton [simp]: "Subseqs {xs} = set (subseqs xs)"
by simp

lemma Subseqs_Un [simp]: "Subseqs (A ∪ B) = Subseqs A ∪ Subseqs B"

lemma Subseqs_UNION [simp]: "Subseqs (⋃(f  A)) = ⋃((λx. Subseqs (f x))  A)"

lemma Subseqs_conc [simp]: "Subseqs (A @@ B) = Subseqs A @@ Subseqs B"
proof safe
fix xs assume "xs ∈ Subseqs (A @@ B)"
then obtain ys zs where *: "ys ∈ A" "zs ∈ B" "subseq xs (ys @ zs)"
by (auto simp: Subseqs_def conc_def)
from *(3) obtain xs1 xs2 where "xs = xs1 @ xs2" "subseq xs1 ys" "subseq xs2 zs"
by (rule subseq_appendE)
with *(1,2) show "xs ∈ Subseqs A @@ Subseqs B" by (auto simp: Subseqs_def set_subseqs_eq)
next
fix xs assume "xs ∈ Subseqs A @@ Subseqs B"
then obtain xs1 xs2 ys zs
where "xs = xs1 @ xs2" "subseq xs1 ys" "subseq xs2 zs" "ys ∈ A" "zs ∈ B"
by (auto simp: conc_def Subseqs_def)
thus "xs ∈ Subseqs (A @@ B)" by (force simp: Subseqs_def conc_def intro: list_emb_append_mono)
qed

lemma Subseqs_compower [simp]: "Subseqs (A ^^ n) = Subseqs A ^^ n"
by (induction n) simp_all

lemma Subseqs_star [simp]: "Subseqs (star A) = star (Subseqs A)"

lemma Sublists_subset_Subseqs: "Sublists A ⊆ Subseqs A"
by (auto simp: Sublists_def Subseqs_def dest!: sublist_imp_subseq)

subsection ‹Various regular expression constructions›

text ‹A construction for language reversal of a regular expression:›

primrec rexp_rev where
"rexp_rev Zero = Zero"
| "rexp_rev One = One"
| "rexp_rev (Atom x) = Atom x"
| "rexp_rev (Plus r s) = Plus (rexp_rev r) (rexp_rev s)"
| "rexp_rev (Times r s) = Times (rexp_rev s) (rexp_rev r)"
| "rexp_rev (Star r) = Star (rexp_rev r)"

lemma lang_rexp_rev [simp]: "lang (rexp_rev r) = rev  lang r"
by (induction r) (simp_all add: image_Un)

text ‹The obvious construction for a singleton-language regular expression:›

fun rexp_of_word where
"rexp_of_word [] = One"
| "rexp_of_word [x] = Atom x"
| "rexp_of_word (x#xs) = Times (Atom x) (rexp_of_word xs)"

lemma lang_rexp_of_word [simp]: "lang (rexp_of_word xs) = {xs}"
by (induction xs rule: rexp_of_word.induct) (simp_all add: conc_def)

lemma size_rexp_of_word [simp]: "size (rexp_of_word xs) = Suc (2 * (length xs - 1))"
by (induction xs rule: rexp_of_word.induct) auto

text ‹Character substitution in a regular expression:›

primrec rexp_subst where
"rexp_subst f Zero = Zero"
| "rexp_subst f One = One"
| "rexp_subst f (Atom x) = rexp_of_word (f x)"
| "rexp_subst f (Plus r s) = Plus (rexp_subst f r) (rexp_subst f s)"
| "rexp_subst f (Times r s) = Times (rexp_subst f r) (rexp_subst f s)"
| "rexp_subst f (Star r) = Star (rexp_subst f r)"

lemma lang_rexp_subst: "lang (rexp_subst f r) = subst_word f  lang r"
by (induction r) (simp_all add: image_Un)

text ‹Suffix language of a regular expression:›

primrec suffix_rexp :: "'a rexp ⇒ 'a rexp" where
"suffix_rexp Zero = Zero"
| "suffix_rexp One = One"
| "suffix_rexp (Atom a) = Plus (Atom a) One"
| "suffix_rexp (Plus r s) = Plus (suffix_rexp r) (suffix_rexp s)"
| "suffix_rexp (Times r s) =
(if rexp_empty r then Zero else Plus (Times (suffix_rexp r) s) (suffix_rexp s))"
| "suffix_rexp (Star r) =
(if rexp_empty r then One else Times (suffix_rexp r) (Star r))"

theorem lang_suffix_rexp [simp]:
"lang (suffix_rexp r) = Suffixes (lang r)"
by (induction r) (auto simp: rexp_empty_iff)

text ‹Prefix language of a regular expression:›

primrec prefix_rexp :: "'a rexp ⇒ 'a rexp" where
"prefix_rexp Zero = Zero"
| "prefix_rexp One = One"
| "prefix_rexp (Atom a) = Plus (Atom a) One"
| "prefix_rexp (Plus r s) = Plus (prefix_rexp r) (prefix_rexp s)"
| "prefix_rexp (Times r s) =
(if rexp_empty s then Zero else Plus (Times r (prefix_rexp s)) (prefix_rexp r))"
| "prefix_rexp (Star r) =
(if rexp_empty r then One else Times (Star r) (prefix_rexp r))"

theorem lang_prefix_rexp [simp]:
"lang (prefix_rexp r) = Prefixes (lang r)"
by (induction r) (auto simp: rexp_empty_iff)

text ‹Sub-word language of a regular expression›

primrec sublist_rexp :: "'a rexp ⇒ 'a rexp" where
"sublist_rexp Zero = Zero"
| "sublist_rexp One = One"
| "sublist_rexp (Atom a) = Plus (Atom a) One"
| "sublist_rexp (Plus r s) = Plus (sublist_rexp r) (sublist_rexp s)"
| "sublist_rexp (Times r s) =
(if rexp_empty r ∨ rexp_empty s then Zero else
Plus (sublist_rexp r) (Plus (sublist_rexp s) (Times (suffix_rexp r) (prefix_rexp s))))"
| "sublist_rexp (Star r) =
(if rexp_empty r then One else
Plus (sublist_rexp r) (Times (suffix_rexp r) (Times (Star r) (prefix_rexp r))))"

theorem lang_sublist_rexp [simp]:
"lang (sublist_rexp r) = Sublists (lang r)"
by (induction r) (auto simp: rexp_empty_iff Sublists_star)

text ‹Fragment language of a regular expression:›

primrec subseqs_rexp :: "'a rexp ⇒ 'a rexp" where
"subseqs_rexp Zero = Zero"
| "subseqs_rexp One = One"
| "subseqs_rexp (Atom x) = Plus (Atom x) One"
| "subseqs_rexp (Plus r s) = Plus (subseqs_rexp r) (subseqs_rexp s)"
| "subseqs_rexp (Times r s) = Times (subseqs_rexp r) (subseqs_rexp s)"
| "subseqs_rexp (Star r) = Star (subseqs_rexp r)"

lemma lang_subseqs_rexp [simp]: "lang (subseqs_rexp r) = Subseqs (lang r)"
by (induction r) auto

text ‹Subword language of a regular expression›

end


# Theory Derivatives

section "Derivatives of regular expressions"

(* Author: Christian Urban *)

theory Derivatives
imports Regular_Exp
begin

text‹This theory is based on work by Brozowski \cite{Brzozowski64} and Antimirov \cite{Antimirov95}.›

subsection ‹Brzozowski's derivatives of regular expressions›

primrec
deriv :: "'a ⇒ 'a rexp ⇒ 'a rexp"
where
"deriv c (Zero) = Zero"
| "deriv c (One) = Zero"
| "deriv c (Atom c') = (if c = c' then One else Zero)"
| "deriv c (Plus r1 r2) = Plus (deriv c r1) (deriv c r2)"
| "deriv c (Times r1 r2) =
(if nullable r1 then Plus (Times (deriv c r1) r2) (deriv c r2) else Times (deriv c r1) r2)"
| "deriv c (Star r) = Times (deriv c r) (Star r)"

primrec
derivs :: "'a list ⇒ 'a rexp ⇒ 'a rexp"
where
"derivs [] r = r"
| "derivs (c # s) r = derivs s (deriv c r)"

lemma atoms_deriv_subset: "atoms (deriv x r) ⊆ atoms r"
by (induction r) (auto)

lemma atoms_derivs_subset: "atoms (derivs w r) ⊆ atoms r"
by (induction w arbitrary: r) (auto dest: atoms_deriv_subset[THEN subsetD])

lemma lang_deriv: "lang (deriv c r) = Deriv c (lang r)"
by (induct r) (simp_all add: nullable_iff)

lemma lang_derivs: "lang (derivs s r) = Derivs s (lang r)"
by (induct s arbitrary: r) (simp_all add: lang_deriv)

text ‹A regular expression matcher:›

definition matcher :: "'a rexp ⇒ 'a list ⇒ bool" where
"matcher r s = nullable (derivs s r)"

lemma matcher_correctness: "matcher r s ⟷ s ∈ lang r"
by (induct s arbitrary: r)
(simp_all add: nullable_iff lang_deriv matcher_def Deriv_def)

subsection ‹Antimirov's partial derivatives›

abbreviation
"Timess rs r ≡ (⋃r' ∈ rs. {Times r' r})"

lemma Timess_eq_image:
"Timess rs r = (λr'. Times r' r)  rs"
by auto

primrec
pderiv :: "'a ⇒ 'a rexp ⇒ 'a rexp set"
where
"pderiv c Zero = {}"
| "pderiv c One = {}"
| "pderiv c (Atom c') = (if c = c' then {One} else {})"
| "pderiv c (Plus r1 r2) = (pderiv c r1) ∪ (pderiv c r2)"
| "pderiv c (Times r1 r2) =
(if nullable r1 then Timess (pderiv c r1) r2 ∪ pderiv c r2 else Timess (pderiv c r1) r2)"
| "pderiv c (Star r) = Timess (pderiv c r) (Star r)"

primrec
pderivs :: "'a list ⇒ 'a rexp ⇒ ('a rexp) set"
where
"pderivs [] r = {r}"
| "pderivs (c # s) r = ⋃ (pderivs s  pderiv c r)"

abbreviation
pderiv_set :: "'a ⇒ 'a rexp set ⇒ 'a rexp set"
where
"pderiv_set c rs ≡ ⋃ (pderiv c  rs)"

abbreviation
pderivs_set :: "'a list ⇒ 'a rexp set ⇒ 'a rexp set"
where
"pderivs_set s rs ≡ ⋃ (pderivs s  rs)"

lemma pderivs_append:
"pderivs (s1 @ s2) r = ⋃ (pderivs s2  pderivs s1 r)"
by (induct s1 arbitrary: r) (simp_all)

lemma pderivs_snoc:
shows "pderivs (s @ [c]) r = pderiv_set c (pderivs s r)"

lemma pderivs_simps [simp]:
shows "pderivs s Zero = (if s = [] then {Zero} else {})"
and   "pderivs s One = (if s = [] then {One} else {})"
and   "pderivs s (Plus r1 r2) = (if s = [] then {Plus r1 r2} else (pderivs s r1) ∪ (pderivs s r2))"
by (induct s) (simp_all)

lemma pderivs_Atom:
shows "pderivs s (Atom c) ⊆ {Atom c, One}"
by (induct s) (simp_all)

subsection ‹Relating left-quotients and partial derivatives›

lemma Deriv_pderiv:
shows "Deriv c (lang r) = ⋃ (lang  pderiv c r)"
by (induct r) (auto simp add: nullable_iff conc_UNION_distrib)

lemma Derivs_pderivs:
shows "Derivs s (lang r) = ⋃ (lang  pderivs s r)"
proof (induct s arbitrary: r)
case (Cons c s)
have ih: "⋀r. Derivs s (lang r) = ⋃ (lang  pderivs s r)" by fact
have "Derivs (c # s) (lang r) = Derivs s (Deriv c (lang r))" by simp
also have "… = Derivs s (⋃ (lang  pderiv c r))" by (simp add: Deriv_pderiv)
also have "… = Derivss s (lang  (pderiv c r))"
also have "… = ⋃ (lang  (pderivs_set s (pderiv c r)))"
using ih by auto
also have "… = ⋃ (lang  (pderivs (c # s) r))" by simp
finally show "Derivs (c # s) (lang r) = ⋃ (lang  pderivs (c # s) r)" .

subsection ‹Relating derivatives and partial derivatives›

lemma deriv_pderiv:
shows "⋃ (lang  (pderiv c r)) = lang (deriv c r)"
unfolding lang_deriv Deriv_pderiv by simp

lemma derivs_pderivs:
shows "⋃ (lang  (pderivs s r)) = lang (derivs s r)"
unfolding lang_derivs Derivs_pderivs by simp

subsection ‹Finiteness property of partial derivatives›

definition
pderivs_lang :: "'a lang ⇒ 'a rexp ⇒ 'a rexp set"
where
"pderivs_lang A r ≡ ⋃x ∈ A. pderivs x r"

lemma pderivs_lang_subsetI:
assumes "⋀s. s ∈ A ⟹ pderivs s r ⊆ C"
shows "pderivs_lang A r ⊆ C"
using assms unfolding pderivs_lang_def by (rule UN_least)

lemma pderivs_lang_union:
shows "pderivs_lang (A ∪ B) r = (pderivs_lang A r ∪ pderivs_lang B r)"

lemma pderivs_lang_subset:
shows "A ⊆ B ⟹ pderivs_lang A r ⊆ pderivs_lang B r"

definition
"UNIV1 ≡ UNIV - {[]}"

lemma pderivs_lang_Zero [simp]:
shows "pderivs_lang UNIV1 Zero = {}"
unfolding UNIV1_def pderivs_lang_def by auto

lemma pderivs_lang_One [simp]:
shows "pderivs_lang UNIV1 One = {}"
unfolding UNIV1_def pderivs_lang_def by (auto split: if_splits)

lemma pderivs_lang_Atom [simp]:
shows "pderivs_lang UNIV1 (Atom c) = {One}"
unfolding UNIV1_def pderivs_lang_def
apply(auto)
apply(frule rev_subsetD)
apply(rule pderivs_Atom)
apply(simp)
apply(case_tac xa)
apply(auto split: if_splits)
done

lemma pderivs_lang_Plus [simp]:
shows "pderivs_lang UNIV1 (Plus r1 r2) = pderivs_lang UNIV1 r1 ∪ pderivs_lang UNIV1 r2"
unfolding UNIV1_def pderivs_lang_def by auto

text ‹Non-empty suffixes of a string (needed for the cases of @{const Times} and @{const Star} below)›

definition
"PSuf s ≡ {v. v ≠ [] ∧ (∃u. u @ v = s)}"

lemma PSuf_snoc:
shows "PSuf (s @ [c]) = (PSuf s) @@ {[c]} ∪ {[c]}"
unfolding PSuf_def conc_def
by (auto simp add: append_eq_append_conv2 append_eq_Cons_conv)

lemma PSuf_Union:
shows "(⋃v ∈ PSuf s @@ {[c]}. f v) = (⋃v ∈ PSuf s. f (v @ [c]))"

lemma pderivs_lang_snoc:
shows "pderivs_lang (PSuf s @@ {[c]}) r = (pderiv_set c (pderivs_lang (PSuf s) r))"
unfolding pderivs_lang_def

lemma pderivs_Times:
shows "pderivs s (Times r1 r2) ⊆ Timess (pderivs s r1) r2 ∪ (pderivs_lang (PSuf s) r2)"
proof (induct s rule: rev_induct)
case (snoc c s)
have ih: "pderivs s (Times r1 r2) ⊆ Timess (pderivs s r1) r2 ∪ (pderivs_lang (PSuf s) r2)"
by fact
have "pderivs (s @ [c]) (Times r1 r2) = pderiv_set c (pderivs s (Times r1 r2))"
also have "… ⊆ pderiv_set c (Timess (pderivs s r1) r2 ∪ (pderivs_lang (PSuf s) r2))"
using ih by fastforce
also have "… = pderiv_set c (Timess (pderivs s r1) r2) ∪ pderiv_set c (pderivs_lang (PSuf s) r2)"
by (simp)
also have "… = pderiv_set c (Timess (pderivs s r1) r2) ∪ pderivs_lang (PSuf s @@ {[c]}) r2"
also
have "… ⊆ pderiv_set c (Timess (pderivs s r1) r2) ∪ pderiv c r2 ∪ pderivs_lang (PSuf s @@ {[c]}) r2"
by auto
also
have "… ⊆ Timess (pderiv_set c (pderivs s r1)) r2 ∪ pderiv c r2 ∪ pderivs_lang (PSuf s @@ {[c]}) r2"
also have "… = Timess (pderivs (s @ [c]) r1) r2 ∪ pderiv c r2 ∪ pderivs_lang (PSuf s @@ {[c]}) r2"
also have "… ⊆ Timess (pderivs (s @ [c]) r1) r2 ∪ pderivs_lang (PSuf (s @ [c])) r2"
unfolding pderivs_lang_def by (auto simp add: PSuf_snoc)
finally show ?case .
qed (simp)

lemma pderivs_lang_Times_aux1:
assumes a: "s ∈ UNIV1"
shows "pderivs_lang (PSuf s) r ⊆ pderivs_lang UNIV1 r"
using a unfolding UNIV1_def PSuf_def pderivs_lang_def by auto

lemma pderivs_lang_Times_aux2:
assumes a: "s ∈ UNIV1"
shows "Timess (pderivs s r1) r2 ⊆ Timess (pderivs_lang UNIV1 r1) r2"
using a unfolding pderivs_lang_def by auto

lemma pderivs_lang_Times:
shows "pderivs_lang UNIV1 (Times r1 r2) ⊆ Timess (pderivs_lang UNIV1 r1) r2 ∪ pderivs_lang UNIV1 r2"
apply(rule pderivs_lang_subsetI)
apply(rule subset_trans)
apply(rule pderivs_Times)
using pderivs_lang_Times_aux1 pderivs_lang_Times_aux2
apply auto
apply blast
done

lemma pderivs_Star:
assumes a: "s ≠ []"
shows "pderivs s (Star r) ⊆ Timess (pderivs_lang (PSuf s) r) (Star r)"
using a
proof (induct s rule: rev_induct)
case (snoc c s)
have ih: "s ≠ [] ⟹ pderivs s (Star r) ⊆ Timess (pderivs_lang (PSuf s) r) (Star r)" by fact
{ assume asm: "s ≠ []"
have "pderivs (s @ [c]) (Star r) = pderiv_set c (pderivs s (Star r))" by (simp add: pderivs_snoc)
also have "… ⊆ pderiv_set c (Timess (pderivs_lang (PSuf s) r) (Star r))"
using ih[OF asm] by fast
also have "… ⊆ Timess (pderiv_set c (pderivs_lang (PSuf s) r)) (Star r) ∪ pderiv c (Star r)"
by (auto split: if_splits)
also have "… ⊆ Timess (pderivs_lang (PSuf (s @ [c])) r) (Star r) ∪ (Timess (pderiv c r) (Star r))"
by (simp only: PSuf_snoc pderivs_lang_snoc pderivs_lang_union)
also have "… = Timess (pderivs_lang (PSuf (s @ [c])) r) (Star r)"
by (auto simp add: PSuf_snoc PSuf_Union pderivs_snoc pderivs_lang_def)
finally have ?case .
}
moreover
{ assume asm: "s = []"
then have ?case by (auto simp add: pderivs_lang_def pderivs_snoc PSuf_def)
}
ultimately show ?case by blast
qed (simp)

lemma pderivs_lang_Star:
shows "pderivs_lang UNIV1 (Star r) ⊆ Timess (pderivs_lang UNIV1 r) (Star r)"
apply(rule pderivs_lang_subsetI)
apply(rule subset_trans)
apply(rule pderivs_Star)
done

lemma finite_Timess [simp]:
assumes a: "finite A"
shows "finite (Timess A r)"
using a by auto

lemma finite_pderivs_lang_UNIV1:
shows "finite (pderivs_lang UNIV1 r)"
apply(induct r)
finite_subset[OF pderivs_lang_Times]
finite_subset[OF pderivs_lang_Star])
done

lemma pderivs_lang_UNIV:
shows "pderivs_lang UNIV r = pderivs [] r ∪ pderivs_lang UNIV1 r"
unfolding UNIV1_def pderivs_lang_def
by blast

lemma finite_pderivs_lang_UNIV:
shows "finite (pderivs_lang UNIV r)"
unfolding pderivs_lang_UNIV

lemma finite_pderivs_lang:
shows "finite (pderivs_lang A r)"
by (metis finite_pderivs_lang_UNIV pderivs_lang_subset rev_finite_subset subset_UNIV)

text‹The following relationship between the alphabetic width of regular expressions
(called ‹awidth› below) and the number of partial derivatives was proved
by Antimirov~\cite{Antimirov95} and formalized by Max Haslbeck.›

fun awidth :: "'a rexp ⇒ nat" where
"awidth Zero = 0" |
"awidth One = 0" |
"awidth (Atom a) = 1" |
"awidth (Plus r1 r2) = awidth r1 + awidth r2" |
"awidth (Times r1 r2) = awidth r1 + awidth r2" |
"awidth (Star r1) = awidth r1"

lemma card_Timess_pderivs_lang_le:
"card (Timess (pderivs_lang A r) s) ≤ card (pderivs_lang A r)"
using finite_pderivs_lang unfolding Timess_eq_image by (rule card_image_le)

lemma card_pderivs_lang_UNIV1_le_awidth: "card (pderivs_lang UNIV1 r) ≤ awidth r"
proof (induction r)
case (Plus r1 r2)
have "card (pderivs_lang UNIV1 (Plus r1 r2)) = card (pderivs_lang UNIV1 r1 ∪ pderivs_lang UNIV1 r2)" by simp
also have "… ≤ card (pderivs_lang UNIV1 r1) + card (pderivs_lang UNIV1 r2)"
also have "… ≤ awidth (Plus r1 r2)" using Plus.IH by simp
finally show ?case .
next
case (Times r1 r2)
have "card (pderivs_lang UNIV1 (Times r1 r2)) ≤ card (Timess (pderivs_lang UNIV1 r1) r2 ∪ pderivs_lang UNIV1 r2)"
by (simp add: card_mono finite_pderivs_lang pderivs_lang_Times)
also have "… ≤ card (Timess (pderivs_lang UNIV1 r1) r2) + card (pderivs_lang UNIV1 r2)"
also have "… ≤ card (pderivs_lang UNIV1 r1) + card (pderivs_lang UNIV1 r2)"
also have "… ≤ awidth (Times r1 r2)" using Times.IH by simp
finally show ?case .
next
case (Star r)
have "card (pderivs_lang UNIV1 (Star r)) ≤ card (Timess (pderivs_lang UNIV1 r) (Star r))"
by (simp add: card_mono finite_pderivs_lang pderivs_lang_Star)
also have "… ≤ card (pderivs_lang UNIV1 r)" by (rule card_Timess_pderivs_lang_le)
also have "… ≤ awidth (Star r)" by (simp add: Star.IH)
finally show ?case .
qed (auto)

text‹Antimirov's Theorem 3.4:›
theorem card_pderivs_lang_UNIV_le_awidth: "card (pderivs_lang UNIV r) ≤ awidth r + 1"
proof -
have "card (insert r (pderivs_lang UNIV1 r)) ≤ Suc (card (pderivs_lang UNIV1 r))"
by(auto simp: card_insert_if[OF finite_pderivs_lang_UNIV1])
also have "… ≤ Suc (awidth r)" by(simp add: card_pderivs_lang_UNIV1_le_awidth)
finally show ?thesis by(simp add: pderivs_lang_UNIV)
qed

text‹Antimirov's Corollary 3.5:›
corollary card_pderivs_lang_le_awidth: "card (pderivs_lang A r) ≤ awidth r + 1"
by(rule order_trans[OF
card_mono[OF finite_pderivs_lang_UNIV pderivs_lang_subset[OF subset_UNIV]]
card_pderivs_lang_UNIV_le_awidth])

end


# Theory pEquivalence_Checking

section ‹Deciding Regular Expression Equivalence (2)›

theory pEquivalence_Checking
imports Equivalence_Checking Derivatives
begin

text‹\noindent Similar to theory @{theory "Regular-Sets.Equivalence_Checking"}, but
with partial derivatives instead of derivatives.›

text‹Lifting many notions to sets:›

definition "Lang R == UN r:R. lang r"
definition "Nullable R == EX r:R. nullable r"
definition "Pderiv a R == UN r:R. pderiv a r"
definition "Atoms R == UN r:R. atoms r"

(* FIXME: rename pderiv_set in Derivatives to Pderiv and drop the one above *)

lemma Atoms_pderiv: "Atoms(pderiv a r) ⊆ atoms r"
apply (induct r)
apply (auto simp: Atoms_def UN_subset_iff)
apply (fastforce)+
done

lemma Atoms_Pderiv: "Atoms(Pderiv a R) ⊆ Atoms R"
using Atoms_pderiv by (fastforce simp: Atoms_def Pderiv_def)

lemma pderiv_no_occurrence:
"x ∉ atoms r ⟹ pderiv x r = {}"
by (induct r) auto

lemma Pderiv_no_occurrence:
"x ∉ Atoms R ⟹ Pderiv x R = {}"
by(auto simp:pderiv_no_occurrence Atoms_def Pderiv_def)

lemma Deriv_Lang: "Deriv c (Lang R) = Lang (Pderiv c R)"
by(auto simp: Deriv_pderiv Pderiv_def Lang_def)

lemma Nullable_pderiv[simp]: "Nullable(pderivs w r) = (w : lang r)"
apply(induction w arbitrary: r)
apply (simp add: Nullable_def nullable_iff singleton_iff)
using eqset_imp_iff[OF Deriv_pderiv[where 'a = 'a]]
done

type_synonym 'a Rexp_pair = "'a rexp set * 'a rexp set"
type_synonym 'a Rexp_pairs = "'a Rexp_pair list"

definition is_Bisimulation :: "'a list ⇒ 'a Rexp_pairs ⇒ bool"
where
"is_Bisimulation as ps =
(∀(R,S)∈ set ps. Atoms R ∪ Atoms S ⊆ set as ∧
(Nullable R ⟷ Nullable S) ∧
(∀a∈set as. (Pderiv a R, Pderiv a S) ∈ set ps))"

lemma Bisim_Lang_eq:
assumes Bisim: "is_Bisimulation as ps"
assumes "(R, S) ∈ set ps"
shows "Lang R = Lang S"
proof -
define ps' where "ps' = ({}, {}) # ps"
from Bisim have Bisim': "is_Bisimulation as ps'"
by (fastforce simp: ps'_def is_Bisimulation_def UN_subset_iff Pderiv_def Atoms_def)
let ?R = "λK L. (∃(R,S)∈set ps'. K = Lang R ∧ L = Lang S)"
show ?thesis
proof (rule language_coinduct[where R="?R"])
from ‹(R,S) ∈ set ps›
have "(R,S) ∈ set ps'" by (auto simp: ps'_def)
thus "?R (Lang R) (Lang S)" by auto
next
fix K L assume "?R K L"
then obtain R S where rs: "(R, S) ∈ set ps'"
and KL: "K = Lang R" "L = Lang S" by auto
with Bisim' have "Nullable R ⟷ Nullable S"
by (auto simp: is_Bisimulation_def)
thus "[] ∈ K ⟷ [] ∈ L"
by (auto simp: nullable_iff KL Nullable_def Lang_def)
fix a
show "?R (Deriv a K) (Deriv a L)"
proof cases
assume "a ∈ set as"
with rs Bisim'
have "(Pderiv a R, Pderiv a S) ∈ set ps'"
by (auto simp: is_Bisimulation_def)
thus ?thesis by (fastforce simp: KL Deriv_Lang)
next
assume "a ∉ set as"
with Bisim' rs
have "a ∉ Atoms R ∪ Atoms S"
by (fastforce simp: is_Bisimulation_def UN_subset_iff)
then have "Pderiv a R = {}" "Pderiv a S = {}"
by (metis Pderiv_no_occurrence Un_iff)+
then have "Deriv a K = Lang {}" "Deriv a L = Lang {}"
unfolding KL Deriv_Lang by auto
thus ?thesis by (auto simp: ps'_def)
qed
qed
qed

subsection ‹Closure computation›

fun test :: "'a Rexp_pairs * 'a Rexp_pairs ⇒ bool" where
"test (ws, ps) = (case ws of [] ⇒  False | (R,S)#_ ⇒ Nullable R = Nullable S)"

fun step :: "'a list ⇒
'a Rexp_pairs * 'a Rexp_pairs ⇒ 'a Rexp_pairs * 'a Rexp_pairs"
where "step as (ws,ps) =
(let
(R,S) = hd ws;
ps' = (R,S) # ps;
succs = map (λa. (Pderiv a R, Pderiv a S)) as;
new = filter (λp. p ∉ set ps ∪ set ws) succs
in (remdups new @ tl ws, ps'))"

definition closure ::
"'a list ⇒ 'a Rexp_pairs * 'a Rexp_pairs
⇒ ('a Rexp_pairs * 'a Rexp_pairs) option" where
"closure as = while_option test (step as)"

definition pre_Bisim :: "'a list ⇒ 'a rexp set ⇒ 'a rexp set ⇒
'a Rexp_pairs * 'a Rexp_pairs ⇒ bool"
where
"pre_Bisim as R S = (λ(ws,ps).
((R,S) ∈ set ws ∪ set ps) ∧
(∀(R,S)∈ set ws ∪ set ps. Atoms R ∪ Atoms S ⊆ set as) ∧
(∀(R,S)∈ set ps. (Nullable R ⟷ Nullable S) ∧
(∀a∈set as. (Pderiv a R, Pderiv a S) ∈ set ps ∪ set ws)))"

lemma step_set_eq: "⟦ test (ws,ps); step as (ws,ps) = (ws',ps') ⟧
⟹ set ws' ∪ set ps' =
set ws ∪ set ps
∪ (⋃a∈set as. {(Pderiv a (fst(hd ws)), Pderiv a (snd(hd ws)))})"
by(auto split: list.splits)

theorem closure_sound:
assumes result: "closure as ([(R,S)],[]) = Some([],ps)"
and atoms: "Atoms R ∪ Atoms S ⊆ set as"
shows "Lang R = Lang S"
proof-
{ fix st
have "pre_Bisim as R S st ⟹ test st ⟹ pre_Bisim as R S (step as st)"
unfolding pre_Bisim_def
proof(split prod.splits, elim case_prodE conjE, intro allI impI conjI, goal_cases)
case 1 thus ?case by(auto split: list.splits)
next
case prems: (2 ws ps ws' ps')
note prems(2)[simp]
from ‹test st› obtain wstl R S where [simp]: "ws = (R,S)#wstl"
by (auto split: list.splits)
from ‹step as st = (ws',ps')› obtain P where [simp]: "ps' = (R,S) # ps"
and [simp]: "ws' = remdups(filter P (map (λa. (Pderiv a R, Pderiv a S)) as)) @ wstl"
by auto
have "∀(R',S')∈set wstl ∪ set ps'. Atoms R' ∪ Atoms S' ⊆ set as"
using prems(4) by auto
moreover
have "∀a∈set as. Atoms(Pderiv a R) ∪ Atoms(Pderiv a S) ⊆ set as"
using prems(4) by simp (metis (lifting) Atoms_Pderiv order_trans)
ultimately show ?case by simp blast
next
case 3 thus ?case
apply (clarsimp simp: image_iff split: prod.splits list.splits)
by hypsubst_thin metis
qed
}
moreover
from atoms
have "pre_Bisim as R S ([(R,S)],[])" by (simp add: pre_Bisim_def)
ultimately have pre_Bisim_ps: "pre_Bisim as R S ([],ps)"
by (rule while_option_rule[OF _ result[unfolded closure_def]])
then have "is_Bisimulation as ps" "(R,S) ∈ set ps"
by (auto simp: pre_Bisim_def is_Bisimulation_def)
thus "Lang R = Lang S" by (rule Bisim_Lang_eq)
qed

subsection ‹The overall procedure›

definition check_eqv :: "'a rexp ⇒ 'a rexp ⇒ bool"
where
"check_eqv r s =
Some([],_) ⇒ True | _ ⇒ False)"

lemma soundness: assumes "check_eqv r s" shows "lang r = lang s"
proof -
obtain ps where 1: "closure ?as ([({r},{s})],[]) = Some([],ps)"
using assms by (auto simp: check_eqv_def split:option.splits list.splits)
then have "lang r = lang s"
by(rule closure_sound[of _ "{r}" "{s}", simplified Lang_def, simplified])
thus "lang r = lang s" by simp
qed

text‹Test:›
lemma "check_eqv
(Plus One (Times (Atom 0) (Star(Atom 0))))
(Star(Atom(0::nat)))"
by eval

subsection "Termination and Completeness"

definition PDERIVS :: "'a rexp set => 'a rexp set" where
"PDERIVS R = (UN r:R. pderivs_lang UNIV r)"

lemma PDERIVS_incr[simp]: "R ⊆ PDERIVS R"
by (metis pderivs.simps(1) insertI1)

lemma Pderiv_PDERIVS: assumes "R' ⊆ PDERIVS R" shows "Pderiv a R' ⊆ PDERIVS R"
proof
fix r assume "r : Pderiv a R'"
then obtain r' where "r' : R'" "r : pderiv a r'" by(auto simp: Pderiv_def)
from ‹r' : R'› ‹R' ⊆ PDERIVS R› obtain s w where "s : R" "r' : pderivs w s"
by(auto simp: PDERIVS_def pderivs_lang_def)
hence "r ∈ pderivs (w @ [a]) s" using ‹r : pderiv a r'› by(auto simp add:pderivs_snoc)
thus "r : PDERIVS R" using ‹s : R› by(auto simp: PDERIVS_def pderivs_lang_def)
qed

lemma finite_PDERIVS: "finite R ⟹ finite(PDERIVS R)"

lemma closure_Some: assumes "finite R0" "finite S0" shows "∃p. closure as ([(R0,S0)],[]) = Some p"
proof(unfold closure_def)
let ?Inv = "%(ws,bs).  distinct ws ∧ (ALL (R,S) : set ws. R ⊆ PDERIVS R0 ∧ S ⊆ PDERIVS S0 ∧ (R,S) ∉ set bs)"
let ?m1 = "%bs. Pow(PDERIVS R0) × Pow(PDERIVS S0) - set bs"
let ?m2 = "%(ws,bs). card(?m1 bs)"
have Inv0: "?Inv ([(R0, S0)], [])" by simp
{ fix s assume "test s" "?Inv s"
obtain ws bs where [simp]: "s = (ws,bs)" by fastforce
from ‹test s› obtain R S ws' where [simp]: "ws = (R,S)#ws'"
by(auto split: prod.splits list.splits)
let ?bs' = "(R,S) # bs"
let ?succs = "map (λa. (Pderiv a R, Pderiv a S)) as"
let ?new = "filter (λp. p ∉ set bs ∪ set ws) ?succs"
let ?ws' = "remdups ?new @ ws'"
have *: "?Inv (step as s)"
proof -
from ‹?Inv s› have "distinct ?ws'" by auto
have "ALL (R,S) : set ?ws'. R ⊆ PDERIVS R0 ∧ S ⊆ PDERIVS S0 ∧ (R,S) ∉ set ?bs'" using ‹?Inv s›
by(simp add: Ball_def image_iff) (metis Pderiv_PDERIVS)
with ‹distinct ?ws'› show ?thesis by(simp)
qed
have "?m2(step as s) < ?m2 s"
proof -
have "finite(?m1 bs)"
by(metis assms finite_Diff finite_PDERIVS finite_cartesian_product finite_Pow_iff)
moreover have "?m2(step as s) < ?m2 s" using ‹?Inv s›
by(auto intro: psubset_card_mono[OF ‹finite(?m1 bs)›])
then show ?thesis using ‹?Inv s› by simp
qed
note * and this
} note step = this
show "∃p. while_option test (step as) ([(R0, S0)], []) = Some p"
by(rule measure_while_option_Some [where P = ?Inv and f = ?m2, OF _ Inv0])(simp add: step)
qed

theorem closure_Some_Inv: assumes "closure as ([({r},{s})],[]) = Some p"
shows "∀(R,S)∈set(fst p). ∃w. R = pderivs w r ∧ S = pderivs w s" (is "?Inv p")
proof-
from assms have 1: "while_option test (step as) ([({r},{s})],[]) = Some p"
have Inv0: "?Inv ([({r},{s})],[])" by simp (metis pderivs.simps(1))
{ fix p assume "?Inv p" "test p"
obtain ws bs where [simp]: "p = (ws,bs)" by fastforce
from ‹test p› obtain R S ws' where [simp]: "ws = (R,S)#ws'"
by(auto split: prod.splits list.splits)
let ?succs = "map (λa. (Pderiv a R, Pderiv a S)) as"
let ?new = "filter (λp. p ∉ set bs ∪ set ws) ?succs"
let ?ws' = "remdups ?new @ ws'"
from ‹?Inv p› obtain w where [simp]: "R = pderivs w r" "S = pderivs w s"
by auto
{ fix x assume "x : set as"
have "EX w. Pderiv x R = pderivs w r ∧ Pderiv x S = pderivs w s"
by(rule_tac x="w@[x]" in exI)(simp add: pderivs_append Pderiv_def)
}
with ‹?Inv p› have "?Inv (step as p)" by auto
} note Inv_step = this
show ?thesis
apply(rule while_option_rule[OF _ 1])
apply(erule (1) Inv_step)
apply(rule Inv0)
done
qed

lemma closure_complete: assumes "lang r = lang s"
shows "EX bs. closure as ([({r},{s})],[]) = Some([],bs)" (is ?C)
proof(rule ccontr)
assume "¬ ?C"
then obtain R S ws bs
where cl: "closure as ([({r},{s})],[]) = Some((R,S)#ws,bs)"
using closure_Some[of "{r}" "{s}", simplified]
by (metis (hide_lams, no_types) list.exhaust prod.exhaust)
from assms closure_Some_Inv[OF this]
while_option_stop[OF cl[unfolded closure_def]]
show "False" by auto
qed

corollary completeness: "lang r = lang s ⟹ check_eqv r s"
by(auto simp add: check_eqv_def dest!: closure_complete
split: option.split list.split)

end


# Theory Regular_Exp2

(* Author: Tobias Nipkow *)

section "Extended Regular Expressions"

theory Regular_Exp2
imports Regular_Set
begin

datatype (atoms: 'a) rexp =
is_Zero: Zero |
is_One: One |
Atom 'a |
Plus "('a rexp)" "('a rexp)" |
Times "('a rexp)" "('a rexp)" |
Star "('a rexp)" |
Not "('a rexp)" |
Inter "('a rexp)" "('a rexp)"

context
fixes S :: "'a set"
begin

primrec lang :: "'a rexp => 'a lang" where
"lang Zero = {}" |
"lang One = {[]}" |
"lang (Atom a) = {[a]}" |
"lang (Plus r s) = (lang r) Un (lang s)" |
"lang (Times r s) = conc (lang r) (lang s)" |
"lang (Star r) = star(lang r)" |
"lang (Not r) = lists S - lang r" |
"lang (Inter r s) = (lang r Int lang s)"

end

lemma lang_subset_lists: "atoms r ⊆ S ⟹ lang S r ⊆ lists S"
by(induction r)(auto simp: conc_subset_lists star_subset_lists)

primrec nullable :: "'a rexp ⇒ bool" where
"nullable Zero = False" |
"nullable One = True" |
"nullable (Atom c) = False" |
"nullable (Plus r1 r2) = (nullable r1 ∨ nullable r2)" |
"nullable (Times r1 r2) = (nullable r1 ∧ nullable r2)" |
"nullable (Star r) = True" |
"nullable (Not r) = (¬ (nullable r))" |
"nullable (Inter r s) = (nullable r ∧ nullable s)"

lemma nullable_iff: "nullable r ⟷ [] ∈ lang S r"
by (induct r) (auto simp add: conc_def split: if_splits)

end


# Theory Equivalence_Checking2

section ‹Deciding Equivalence of Extended Regular Expressions›

theory Equivalence_Checking2
imports Regular_Exp2 "HOL-Library.While_Combinator"
begin

subsection ‹Term ordering›

fun le_rexp :: "nat rexp ⇒ nat rexp ⇒ bool"
where
"le_rexp Zero _ = True"
| "le_rexp _ Zero = False"
| "le_rexp One _ = True"
| "le_rexp _ One = False"
| "le_rexp (Atom a) (Atom b) = (a <= b)"
| "le_rexp (Atom _) _ = True"
| "le_rexp _ (Atom _) = False"
| "le_rexp (Star r) (Star s) = le_rexp r s"
| "le_rexp (Star _) _ = True"
| "le_rexp _ (Star _) = False"
| "le_rexp (Not r) (Not s) = le_rexp r s"
| "le_rexp (Not _) _ = True"
| "le_rexp _ (Not _) = False"
| "le_rexp (Plus r r') (Plus s s') =
(if r = s then le_rexp r' s' else le_rexp r s)"
| "le_rexp (Plus _ _) _ = True"
| "le_rexp _ (Plus _ _) = False"
| "le_rexp (Times r r') (Times s s') =
(if r = s then le_rexp r' s' else le_rexp r s)"
| "le_rexp (Times _ _) _ = True"
| "le_rexp _ (Times _ _) = False"
| "le_rexp (Inter r r') (Inter s s') =
(if r = s then le_rexp r' s' else le_rexp r s)"

subsection ‹Normalizing operations›

text ‹associativity, commutativity, idempotence, zero›

fun nPlus :: "nat rexp ⇒ nat rexp ⇒ nat rexp"
where
"nPlus Zero r = r"
| "nPlus r Zero = r"
| "nPlus (Plus r s) t = nPlus r (nPlus s t)"
| "nPlus r (Plus s t) =
(if r = s then (Plus s t)
else if le_rexp r s then Plus r (Plus s t)
else Plus s (nPlus r t))"
| "nPlus r s =
(if r = s then r
else if le_rexp r s then Plus r s
else Plus s r)"

lemma lang_nPlus[simp]: "lang S (nPlus r s) = lang S (Plus r s)"
by (induct r s rule: nPlus.induct) auto

text ‹associativity, zero, one›

fun nTimes :: "nat rexp ⇒ nat rexp ⇒ nat rexp"
where
"nTimes Zero _ = Zero"
| "nTimes _ Zero = Zero"
| "nTimes One r = r"
| "nTimes r One = r"
| "nTimes (Times r s) t = Times r (nTimes s t)"
| "nTimes r s = Times r s"

lemma lang_nTimes[simp]: "lang S (nTimes r s) = lang S (Times r s)"
by (induct r s rule: nTimes.induct) (auto simp: conc_assoc)

text ‹more optimisations:›

fun nInter :: "nat rexp ⇒ nat rexp ⇒ nat rexp"
where
"nInter Zero _ = Zero"
| "nInter _ Zero = Zero"
| "nInter r s = Inter r s"

lemma lang_nInter[simp]: "lang S (nInter r s) = lang S (Inter r s)"
by (induct r s rule: nInter.induct) (auto)

primrec norm :: "nat rexp ⇒ nat rexp"
where
"norm Zero = Zero"
| "norm One = One"
| "norm (Atom a) = Atom a"
| "norm (Plus r s) = nPlus (norm r) (norm s)"
| "norm (Times r s) = nTimes (norm r) (norm s)"
| "norm (Star r) = Star (norm r)"
| "norm (Not r) = Not (norm r)"
| "norm (Inter r1 r2) = nInter (norm r1) (norm r2)"

lemma lang_norm[simp]: "lang S (norm r) = lang S r"
by (induct r) auto

subsection ‹Derivative›

primrec nderiv :: "nat ⇒ nat rexp ⇒ nat rexp"
where
"nderiv _ Zero = Zero"
| "nderiv _ One = Zero"
| "nderiv a (Atom b) = (if a = b then One else Zero)"
| "nderiv a (Plus r s) = nPlus (nderiv a r) (nderiv a s)"
| "nderiv a (Times r s) =
(let r's = nTimes (nderiv a r) s
in if nullable r then nPlus r's (nderiv a s) else r's)"
| "nderiv a (Star r) = nTimes (nderiv a r) (Star r)"
| "nderiv a (Not r) = Not (nderiv a r)"
| "nderiv a (Inter r1 r2) = nInter (nderiv a r1) (nderiv a r2)"

lemma lang_nderiv: "a:S ⟹ lang S (nderiv a r) = Deriv a (lang S r)"
by (induct r) (auto simp: Let_def nullable_iff[where S=S])

lemma atoms_nPlus[simp]: "atoms (nPlus r s) = atoms r ∪ atoms s"
by (induct r s rule: nPlus.induct) auto

lemma atoms_nTimes: "atoms (nTimes r s) ⊆ atoms r ∪ atoms s"
by (induct r s rule: nTimes.induct) auto

lemma atoms_nInter: "atoms (nInter r s) ⊆ atoms r ∪ atoms s"
by (induct r s rule: nInter.induct) auto

lemma atoms_norm: "atoms (norm r) ⊆ atoms r"
by (induct r) (auto dest!:subsetD[OF atoms_nTimes]subsetD[OF atoms_nInter])

lemma atoms_nderiv: "atoms (nderiv a r) ⊆ atoms r"
by (induct r) (auto simp: Let_def dest!:subsetD[OF atoms_nTimes]subsetD[OF atoms_nInter])

subsection ‹Bisimulation between languages and regular expressions›

context
fixes S :: "'a set"
begin

coinductive bisimilar :: "'a lang ⇒ 'a lang ⇒ bool" where
"K ⊆ lists S ⟹ L ⊆ lists S
⟹ ([] ∈ K ⟷ [] ∈ L)
⟹ (⋀x. x:S ⟹ bisimilar (Deriv x K) (Deriv x L))
⟹ bisimilar K L"

lemma equal_if_bisimilar:
assumes "K ⊆ lists S" "L ⊆ lists S" "bisimilar K L" shows "K = L"
proof (rule set_eqI)
fix w
from assms show "w ∈ K ⟷ w ∈ L"
proof (induction w arbitrary: K L)
case Nil thus ?case by (auto elim: bisimilar.cases)
next
case (Cons a w K L)
show ?case
proof cases
assume "a : S"
with ‹bisimilar K L› have "bisimilar (Deriv a K) (Deriv a L)"
by (auto elim: bisimilar.cases)
then have "w ∈ Deriv a K ⟷ w ∈ Deriv a L"
by (metis Cons.IH bisimilar.cases)
thus ?case by (auto simp: Deriv_def)
next
assume "a ∉ S"
thus ?case using Cons.prems by auto
qed
qed
qed

lemma language_coinduct:
fixes R (infixl "∼" 50)
assumes "⋀K L. K ∼ L ⟹ K ⊆ lists S ∧ L ⊆ lists S"
assumes "K ∼ L"
assumes "⋀K L. K ∼ L ⟹ ([] ∈ K ⟷ [] ∈ L)"
assumes "⋀K L x. K ∼ L ⟹ x : S ⟹ Deriv x K ∼ Deriv x L"
shows "K = L"
apply (rule equal_if_bisimilar)
apply (metis assms(1) assms(2))
apply (metis assms(1) assms(2))
apply (rule bisimilar.coinduct[of R, OF ‹K ∼ L›])
apply (auto simp: assms)
done

end

type_synonym rexp_pair = "nat rexp * nat rexp"
type_synonym rexp_pairs = "rexp_pair list"

definition is_bisimulation :: "nat list ⇒ rexp_pairs ⇒ bool"
where
"is_bisimulation as ps =
(∀(r,s)∈ set ps. (atoms r ∪ atoms s ⊆ set as) ∧ (nullable r ⟷ nullable s) ∧
(∀a∈set as. (nderiv a r, nderiv a s) ∈ set ps))"

lemma bisim_lang_eq:
assumes bisim: "is_bisimulation as ps"
assumes "(r, s) ∈ set ps"
shows "lang (set as) r = lang (set as) s"
proof -
let ?R = "λK L. (∃(r,s)∈set ps. K = lang (set as) r ∧ L = lang (set as) s)"
show ?thesis
proof (rule language_coinduct[where R="?R" and S = "set as"])
from ‹(r, s) ∈ set ps› show "?R (lang (set as) r) (lang (set as) s)"
by auto
next
fix K L assume "?R K L"
then obtain r s where rs: "(r, s) ∈ set ps"
and KL: "K = lang (set as) r" "L = lang (set as) s" by auto
with bisim have "nullable r ⟷ nullable s"
by (auto simp: is_bisimulation_def)
thus "[] ∈ K ⟷ [] ∈ L" by (auto simp: nullable_iff[where S="set as"] KL)
txt‹next case, but shared context›
from bisim rs KL lang_subset_lists[of _ "set as"]
show "K ⊆ lists (set as) ∧ L ⊆ lists (set as)"
unfolding is_bisimulation_def by blast
txt‹next case, but shared context›
fix a assume "a ∈ set as"
with rs bisim
have "(nderiv a r, nderiv a s) ∈ set ps"
by (auto simp: is_bisimulation_def)
thus "?R (Deriv a K) (Deriv a L)" using ‹a ∈ set as›
by (force simp: KL lang_nderiv)
qed
qed

subsection ‹Closure computation›

fun test :: "rexp_pairs * rexp_pairs ⇒ bool"
where "test (ws, ps) = (case ws of [] ⇒  False | (p,q)#_ ⇒ nullable p = nullable q)"

fun step :: "nat list ⇒ rexp_pairs * rexp_pairs ⇒ rexp_pairs * rexp_pairs"
where "step as (ws,ps) =
(let
(r, s) = hd ws;
ps' = (r, s) # ps;
succs = map (λa. (nderiv a r, nderiv a s)) as;
new = filter (λp. p ∉ set ps' ∪ set ws) succs
in (new @ tl ws, ps'))"

definition closure ::
"nat list ⇒ rexp_pairs * rexp_pairs
⇒ (rexp_pairs * rexp_pairs) option" where
"closure as = while_option test (step as)"

definition pre_bisim :: "nat list ⇒ nat rexp ⇒ nat rexp ⇒
rexp_pairs * rexp_pairs ⇒ bool"
where
"pre_bisim as r s = (λ(ws,ps).
((r, s) ∈ set ws ∪ set ps) ∧
(∀(r,s)∈ set ws ∪ set ps. atoms r ∪ atoms s ⊆ set as) ∧
(∀(r,s)∈ set ps. (nullable r ⟷ nullable s) ∧
(∀a∈set as. (nderiv a r, nderiv a s) ∈ set ps ∪ set ws)))"

theorem closure_sound:
assumes result: "closure as ([(r,s)],[]) = Some([],ps)"
and atoms: "atoms r ∪ atoms s ⊆ set as"
shows "lang (set as) r = lang (set as) s"
proof-
{ fix st have "pre_bisim as r s st ⟹ test st ⟹ pre_bisim as r s (step as st)"
unfolding pre_bisim_def
by (cases st) (auto simp: split_def split: list.splits prod.splits
dest!: subsetD[OF atoms_nderiv]) }
moreover
from atoms
have "pre_bisim as r s ([(r,s)],[])" by (simp add: pre_bisim_def)
ultimately have pre_bisim_ps: "pre_bisim as r s ([],ps)"
by (rule while_option_rule[OF _ result[unfolded closure_def]])
then have "is_bisimulation as ps" "(r, s) ∈ set ps"
by (auto simp: pre_bisim_def is_bisimulation_def)
thus "lang (set as) r = lang (set as) s" by (rule bisim_lang_eq)
qed

subsection ‹The overall procedure›

primrec add_atoms :: "nat rexp ⇒ nat list ⇒ nat list"
where
| "add_atoms (Atom a) = List.insert a"

lemma set_add_atoms: "set (add_atoms r as) = atoms r ∪ set as"
by (induct r arbitrary: as) auto

definition check_eqv :: "nat list ⇒ nat rexp ⇒ nat rexp ⇒ bool"
where
"check_eqv as r s ⟷ set(add_atoms r (add_atoms s [])) ⊆ set as ∧
(case closure as ([(norm r, norm s)], []) of
Some([],_) ⇒ True | _ ⇒ False)"

lemma soundness:
assumes "check_eqv as r s" shows "lang (set as) r = lang (set as) s"
proof -
obtain ps where cl: "closure as ([(norm r,norm s)],[]) = Some([],ps)"
and at: "atoms r ∪ atoms s ⊆ set as"
using assms
by (auto simp: check_eqv_def set_add_atoms split:option.splits list.splits)
hence "atoms(norm r) ∪ atoms(norm s) ⊆ set as"
using atoms_norm by blast
hence "lang (set as) (norm r) = lang (set as) (norm s)"
by (rule closure_sound[OF cl])
thus "lang (set as) r = lang (set as) s" by simp
qed

lemma "check_eqv [0] (Plus One (Times (Atom 0) (Star(Atom 0)))) (Star(Atom 0))"
by eval

lemma "check_eqv [0,1] (Not(Atom 0))
(Plus One (Times (Plus (Atom 1) (Times (Atom 0) (Plus (Atom 0) (Atom 1))))
(Star(Plus (Atom 0) (Atom 1)))))"
by eval

lemma "check_eqv [0] (Atom 0) (Inter (Star (Atom 0)) (Atom 0))"
by eval

end