Session Regular-Sets

Theory Regular_Set

(*  Author: Tobias Nipkow, Alex Krauss, Christian Urban  *)

section "Regular sets"

theory Regular_Set
imports Main
begin

type_synonym 'a lang = "'a list set"

definition conc :: "'a lang  'a lang  'a lang" (infixr "@@" 75) where
"A @@ B = {xs@ys | xs ys. xs:A & ys:B}"

text ‹checks the code preprocessor for set comprehensions›
export_code conc checking SML

overloading lang_pow == "compow :: nat  'a lang  'a lang"
begin
  primrec lang_pow :: "nat  'a lang  'a lang" where
  "lang_pow 0 A = {[]}" |
  "lang_pow (Suc n) A = A @@ (lang_pow n A)"
end

text ‹for code generation›

definition lang_pow :: "nat  'a lang  'a lang" where
  lang_pow_code_def [code_abbrev]: "lang_pow = compow"

lemma [code]:
  "lang_pow (Suc n) A = A @@ (lang_pow n A)"
  "lang_pow 0 A = {[]}"
  by (simp_all add: lang_pow_code_def)

hide_const (open) lang_pow

definition star :: "'a lang  'a lang" where
"star A = (n. A ^^ n)"


subsection@{term "(@@)"}

lemma concI[simp,intro]: "u : A  v : B  u@v : A @@ B"
by (auto simp add: conc_def)

lemma concE[elim]: 
assumes "w  A @@ B"
obtains u v where "u  A" "v  B" "w = u@v"
using assms by (auto simp: conc_def)

lemma conc_mono: "A  C  B  D  A @@ B  C @@ D"
by (auto simp: conc_def) 

lemma conc_empty[simp]: shows "{} @@ A = {}" and "A @@ {} = {}"
by auto

lemma conc_epsilon[simp]: shows "{[]} @@ A = A" and "A @@ {[]} = A"
by (simp_all add:conc_def)

lemma conc_assoc: "(A @@ B) @@ C = A @@ (B @@ C)"
by (auto elim!: concE) (simp only: append_assoc[symmetric] concI)

lemma conc_Un_distrib:
shows "A @@ (B  C) = A @@ B  A @@ C"
and   "(A  B) @@ C = A @@ C  B @@ C"
by auto

lemma conc_UNION_distrib:
shows "A @@ (M ` I) = ((%i. A @@ M i) ` I)"
and   "(M ` I) @@ A = ((%i. M i @@ A) ` I)"
by auto

lemma conc_subset_lists: "A  lists S  B  lists S  A @@ B  lists S"
by(fastforce simp: conc_def in_lists_conv_set)

lemma Nil_in_conc[simp]: "[]  A @@ B  []  A  []  B"
by (metis append_is_Nil_conv concE concI)

lemma concI_if_Nil1: "[]  A  xs : B  xs  A @@ B"
by (metis append_Nil concI)

lemma conc_Diff_if_Nil1: "[]  A  A @@ B = (A - {[]}) @@ B  B"
by (fastforce elim: concI_if_Nil1)

lemma concI_if_Nil2: "[]  B  xs : A  xs  A @@ B"
by (metis append_Nil2 concI)

lemma conc_Diff_if_Nil2: "[]  B  A @@ B = A @@ (B - {[]})  A"
by (fastforce elim: concI_if_Nil2)

lemma singleton_in_conc:
  "[x] : A @@ B  [x] : A  [] : B  [] : A  [x] : B"
by (fastforce simp: Cons_eq_append_conv append_eq_Cons_conv
       conc_Diff_if_Nil1 conc_Diff_if_Nil2)


subsection@{term "A ^^ n"}

lemma lang_pow_add: "A ^^ (n + m) = A ^^ n @@ A ^^ m"
by (induct n) (auto simp: conc_assoc)

lemma lang_pow_empty: "{} ^^ n = (if n = 0 then {[]} else {})"
by (induct n) auto

lemma lang_pow_empty_Suc[simp]: "({}::'a lang) ^^ Suc n = {}"
by (simp add: lang_pow_empty)

lemma conc_pow_comm:
  shows "A @@ (A ^^ n) = (A ^^ n) @@ A"
by (induct n) (simp_all add: conc_assoc[symmetric])

lemma length_lang_pow_ub:
  "w  A. length w  k  w : A^^n  length w  k*n"
by(induct n arbitrary: w) (fastforce simp: conc_def)+

lemma length_lang_pow_lb:
  "w  A. length w  k  w : A^^n  length w  k*n"
by(induct n arbitrary: w) (fastforce simp: conc_def)+

lemma lang_pow_subset_lists: "A  lists S  A ^^ n  lists S"
by(induct n)(auto simp: conc_subset_lists)


subsection@{const star}

lemma star_subset_lists: "A  lists S  star A  lists S"
unfolding star_def by(blast dest: lang_pow_subset_lists)

lemma star_if_lang_pow[simp]: "w : A ^^ n  w : star A"
by (auto simp: star_def)

lemma Nil_in_star[iff]: "[] : star A"
proof (rule star_if_lang_pow)
  show "[] : A ^^ 0" by simp
qed

lemma star_if_lang[simp]: assumes "w : A" shows "w : star A"
proof (rule star_if_lang_pow)
  show "w : A ^^ 1" using w : A by simp
qed

lemma append_in_starI[simp]:
assumes "u : star A" and "v : star A" shows "u@v : star A"
proof -
  from u : star A obtain m where "u : A ^^ m" by (auto simp: star_def)
  moreover
  from v : star A obtain n where "v : A ^^ n" by (auto simp: star_def)
  ultimately have "u@v : A ^^ (m+n)" by (simp add: lang_pow_add)
  thus ?thesis by simp
qed

lemma conc_star_star: "star A @@ star A = star A"
by (auto simp: conc_def)

lemma conc_star_comm:
  shows "A @@ star A = star A @@ A"
unfolding star_def conc_pow_comm conc_UNION_distrib
by simp

lemma star_induct[consumes 1, case_names Nil append, induct set: star]:
assumes "w : star A"
  and "P []"
  and step: "!!u v. u : A  v : star A  P v  P (u@v)"
shows "P w"
proof -
  { fix n have "w : A ^^ n  P w"
    by (induct n arbitrary: w) (auto intro: P [] step star_if_lang_pow) }
  with w : star A show "P w" by (auto simp: star_def)
qed

lemma star_empty[simp]: "star {} = {[]}"
by (auto elim: star_induct)

lemma star_epsilon[simp]: "star {[]} = {[]}"
by (auto elim: star_induct)

lemma star_idemp[simp]: "star (star A) = star A"
by (auto elim: star_induct)

lemma star_unfold_left: "star A = A @@ star A  {[]}" (is "?L = ?R")
proof
  show "?L  ?R" by (rule, erule star_induct) auto
qed auto

lemma concat_in_star: "set ws  A  concat ws : star A"
by (induct ws) simp_all

lemma in_star_iff_concat:
  "w  star A = (ws. set ws  A  w = concat ws)"
  (is "_ = (ws. ?R w ws)")
proof
  assume "w : star A" thus "ws. ?R w ws"
  proof induct
    case Nil have "?R [] []" by simp
    thus ?case ..
  next
    case (append u v)
    then obtain ws where "set ws  A  v = concat ws" by blast
    with append have "?R (u@v) (u#ws)" by auto
    thus ?case ..
  qed
next
  assume "us. ?R w us" thus "w : star A"
  by (auto simp: concat_in_star)
qed

lemma star_conv_concat: "star A = {concat ws|ws. set ws  A}"
by (fastforce simp: in_star_iff_concat)

lemma star_insert_eps[simp]: "star (insert [] A) = star(A)"
proof-
  { fix us
    have "set us  insert [] A  vs. concat us = concat vs  set vs  A"
      (is "?P  vs. ?Q vs")
    proof
      let ?vs = "filter (%u. u  []) us"
      show "?P  ?Q ?vs" by (induct us) auto
    qed
  } thus ?thesis by (auto simp: star_conv_concat)
qed

lemma star_unfold_left_Nil: "star A = (A - {[]}) @@ (star A)  {[]}"
by (metis insert_Diff_single star_insert_eps star_unfold_left)

lemma star_Diff_Nil_fold: "(A - {[]}) @@ star A = star A - {[]}"
proof -
  have "[]  (A - {[]}) @@ star A" by simp
  thus ?thesis using star_unfold_left_Nil by blast
qed

lemma star_decom: 
  assumes a: "x  star A" "x  []"
  shows "a b. x = a @ b  a  []  a  A  b  star A"
using a by (induct rule: star_induct) (blast)+


subsection ‹Left-Quotients of languages›

definition Deriv :: "'a  'a lang  'a lang"
  where "Deriv x A = { xs. x#xs  A }"
    
definition Derivs :: "'a list  'a lang  'a lang"
where "Derivs xs A = { ys. xs @ ys  A }"

abbreviation 
  Derivss :: "'a list  'a lang set  'a lang"
where
  "Derivss s As   (Derivs s ` As)"


lemma Deriv_empty[simp]:   "Deriv a {} = {}"
  and Deriv_epsilon[simp]: "Deriv a {[]} = {}"
  and Deriv_char[simp]:    "Deriv a {[b]} = (if a = b then {[]} else {})"
  and Deriv_union[simp]:   "Deriv a (A  B) = Deriv a A  Deriv a B"
  and Deriv_inter[simp]:   "Deriv a (A  B) = Deriv a A  Deriv a B"
  and Deriv_compl[simp]:   "Deriv a (-A) = - Deriv a A"
  and Deriv_Union[simp]:   "Deriv a (Union M) = Union(Deriv a ` M)"
  and Deriv_UN[simp]:      "Deriv a (UN x:I. S x) = (UN x:I. Deriv a (S x))"
by (auto simp: Deriv_def)

lemma Der_conc [simp]: 
  shows "Deriv c (A @@ B) = (Deriv c A) @@ B  (if []  A then Deriv c B else {})"
unfolding Deriv_def conc_def
by (auto simp add: Cons_eq_append_conv)

lemma Deriv_star [simp]: 
  shows "Deriv c (star A) = (Deriv c A) @@ star A"
proof -
  have "Deriv c (star A) = Deriv c ({[]}  A @@ star A)"
    by (metis star_unfold_left sup.commute)
  also have "... = Deriv c (A @@ star A)"
    unfolding Deriv_union by (simp)
  also have "... = (Deriv c A) @@ (star A)  (if []  A then Deriv c (star A) else {})"
    by simp
  also have "... =  (Deriv c A) @@ star A"
    unfolding conc_def Deriv_def
    using star_decom by (force simp add: Cons_eq_append_conv)
  finally show "Deriv c (star A) = (Deriv c A) @@ star A" .
qed

lemma Deriv_diff[simp]:   
  shows "Deriv c (A - B) = Deriv c A - Deriv c B"
by(auto simp add: Deriv_def)

lemma Deriv_lists[simp]: "c : S  Deriv c (lists S) = lists S"
by(auto simp add: Deriv_def)

lemma Derivs_simps [simp]:
  shows "Derivs [] A = A"
  and   "Derivs (c # s) A = Derivs s (Deriv c A)"
  and   "Derivs (s1 @ s2) A = Derivs s2 (Derivs s1 A)"
unfolding Derivs_def Deriv_def by auto

lemma in_fold_Deriv: "v  fold Deriv w L  w @ v  L"
  by (induct w arbitrary: L) (simp_all add: Deriv_def)

lemma Derivs_alt_def [code]: "Derivs w L = fold Deriv w L"
  by (induct w arbitrary: L) simp_all

lemma Deriv_code [code]: 
  "Deriv x A = tl ` Set.filter (λxs. case xs of x' # _  x = x' | _  False) A"
  by (auto simp: Deriv_def Set.filter_def image_iff tl_def split: list.splits)

subsection ‹Shuffle product›

definition Shuffle (infixr "" 80) where
  "Shuffle A B = {shuffles xs ys | xs ys. xs  A  ys  B}"

lemma Deriv_Shuffle[simp]:
  "Deriv a (A  B) = Deriv a A  B  A  Deriv a B"
  unfolding Shuffle_def Deriv_def by (fastforce simp: Cons_in_shuffles_iff neq_Nil_conv)

lemma shuffle_subset_lists:
  assumes "A  lists S" "B  lists S"
  shows "A  B  lists S"
unfolding Shuffle_def proof safe
  fix x and zs xs ys :: "'a list"
  assume zs: "zs  shuffles xs ys" "x  set zs" and "xs  A" "ys  B"
  with assms have "xs  lists S" "ys  lists S" by auto
  with zs show "x  S" by (induct xs ys arbitrary: zs rule: shuffles.induct) auto
qed

lemma Nil_in_Shuffle[simp]: "[]  A  B  []  A  []  B"
  unfolding Shuffle_def by force

lemma shuffle_Un_distrib:
shows "A  (B  C) = A  B  A  C"
and   "A  (B  C) = A  B  A  C"
unfolding Shuffle_def by fast+

lemma shuffle_UNION_distrib:
shows "A  (M ` I) = ((%i. A  M i) ` I)"
and   "(M ` I)  A = ((%i. M i  A) ` I)"
unfolding Shuffle_def by fast+

lemma Shuffle_empty[simp]:
  "A  {} = {}"
  "{}  B = {}"
  unfolding Shuffle_def by auto

lemma Shuffle_eps[simp]:
  "A  {[]} = A"
  "{[]}  B = B"
  unfolding Shuffle_def by auto


subsection ‹Arden's Lemma›

lemma arden_helper:
  assumes eq: "X = A @@ X  B"
  shows "X = (A ^^ Suc n) @@ X  (mn. (A ^^ m) @@ B)"
proof (induct n)
  case 0 
  show "X = (A ^^ Suc 0) @@ X  (m0. (A ^^ m) @@ B)"
    using eq by simp
next
  case (Suc n)
  have ih: "X = (A ^^ Suc n) @@ X  (mn. (A ^^ m) @@ B)" by fact
  also have " = (A ^^ Suc n) @@ (A @@ X  B)  (mn. (A ^^ m) @@ B)" using eq by simp
  also have " = (A ^^ Suc (Suc n)) @@ X  ((A ^^ Suc n) @@ B)  (mn. (A ^^ m) @@ B)"
    by (simp add: conc_Un_distrib conc_assoc[symmetric] conc_pow_comm)
  also have " = (A ^^ Suc (Suc n)) @@ X  (mSuc n. (A ^^ m) @@ B)"
    by (auto simp add: atMost_Suc)
  finally show "X = (A ^^ Suc (Suc n)) @@ X  (mSuc n. (A ^^ m) @@ B)" .
qed

lemma Arden:
  assumes "[]  A" 
  shows "X = A @@ X  B  X = star A @@ B"
proof
  assume eq: "X = A @@ X  B"
  { fix w assume "w : X"
    let ?n = "size w"
    from []  A have "u  A. length u  1"
      by (metis Suc_eq_plus1 add_leD2 le_0_eq length_0_conv not_less_eq_eq)
    hence "u  A^^(?n+1). length u  ?n+1"
      by (metis length_lang_pow_lb nat_mult_1)
    hence "u  A^^(?n+1)@@X. length u  ?n+1"
      by(auto simp only: conc_def length_append)
    hence "w  A^^(?n+1)@@X" by auto
    hence "w : star A @@ B" using w : X using arden_helper[OF eq, where n="?n"]
      by (auto simp add: star_def conc_UNION_distrib)
  } moreover
  { fix w assume "w : star A @@ B"
    hence "n. w  A^^n @@ B" by(auto simp: conc_def star_def)
    hence "w : X" using arden_helper[OF eq] by blast
  } ultimately show "X = star A @@ B" by blast 
next
  assume eq: "X = star A @@ B"
  have "star A = A @@ star A  {[]}"
    by (rule star_unfold_left)
  then have "star A @@ B = (A @@ star A  {[]}) @@ B"
    by metis
  also have " = (A @@ star A) @@ B  B"
    unfolding conc_Un_distrib by simp
  also have " = A @@ (star A @@ B)  B" 
    by (simp only: conc_assoc)
  finally show "X = A @@ X  B" 
    using eq by blast 
qed


lemma reversed_arden_helper:
  assumes eq: "X = X @@ A  B"
  shows "X = X @@ (A ^^ Suc n)  (mn. B @@ (A ^^ m))"
proof (induct n)
  case 0 
  show "X = X @@ (A ^^ Suc 0)  (m0. B @@ (A ^^ m))"
    using eq by simp
next
  case (Suc n)
  have ih: "X = X @@ (A ^^ Suc n)  (mn. B @@ (A ^^ m))" by fact
  also have " = (X @@ A  B) @@ (A ^^ Suc n)  (mn. B @@ (A ^^ m))" using eq by simp
  also have " = X @@ (A ^^ Suc (Suc n))  (B @@ (A ^^ Suc n))  (mn. B @@ (A ^^ m))"
    by (simp add: conc_Un_distrib conc_assoc)
  also have " = X @@ (A ^^ Suc (Suc n))  (mSuc n. B @@ (A ^^ m))"
    by (auto simp add: atMost_Suc)
  finally show "X = X @@ (A ^^ Suc (Suc n))  (mSuc n. B @@ (A ^^ m))" .
qed

theorem reversed_Arden:
  assumes nemp: "[]  A"
  shows "X = X @@ A  B  X = B @@ star A"
proof
 assume eq: "X = X @@ A  B"
  { fix w assume "w : X"
    let ?n = "size w"
    from []  A have "u  A. length u  1"
      by (metis Suc_eq_plus1 add_leD2 le_0_eq length_0_conv not_less_eq_eq)
    hence "u  A^^(?n+1). length u  ?n+1"
      by (metis length_lang_pow_lb nat_mult_1)
    hence "u  X @@ A^^(?n+1). length u  ?n+1"
      by(auto simp only: conc_def length_append)
    hence "w  X @@ A^^(?n+1)" by auto
    hence "w : B @@ star A" using w : X using reversed_arden_helper[OF eq, where n="?n"]
      by (auto simp add: star_def conc_UNION_distrib)
  } moreover
  { fix w assume "w : B @@ star A"
    hence "n. w  B @@ A^^n" by (auto simp: conc_def star_def)
    hence "w : X" using reversed_arden_helper[OF eq] by blast
  } ultimately show "X = B @@ star A" by blast 
next 
  assume eq: "X = B @@ star A"
  have "star A = {[]}  star A @@ A" 
    unfolding conc_star_comm[symmetric]
    by(metis Un_commute star_unfold_left)
  then have "B @@ star A = B @@ ({[]}  star A @@ A)"
    by metis
  also have " = B  B @@ (star A @@ A)"
    unfolding conc_Un_distrib by simp
  also have " = B  (B @@ star A) @@ A" 
    by (simp only: conc_assoc)
  finally show "X = X @@ A  B" 
    using eq by blast 
qed

end

Theory Regular_Exp

(*  Author: Tobias Nipkow *)

section "Regular expressions"

theory Regular_Exp
imports Regular_Set
begin

datatype (atoms: 'a) rexp =
  is_Zero: Zero |
  is_One: One |
  Atom 'a |
  Plus "('a rexp)" "('a rexp)" |
  Times "('a rexp)" "('a rexp)" |
  Star "('a rexp)"

primrec lang :: "'a rexp => 'a lang" where
"lang Zero = {}" |
"lang One = {[]}" |
"lang (Atom a) = {[a]}" |
"lang (Plus r s) = (lang r) Un (lang s)" |
"lang (Times r s) = conc (lang r) (lang s)" |
"lang (Star r) = star(lang r)"

abbreviation (input) regular_lang where "regular_lang A  (r. lang r = A)"

primrec nullable :: "'a rexp  bool" where
"nullable Zero = False" |
"nullable One = True" |
"nullable (Atom c) = False" |
"nullable (Plus r1 r2) = (nullable r1  nullable r2)" |
"nullable (Times r1 r2) = (nullable r1  nullable r2)" |
"nullable (Star r) = True"

lemma nullable_iff [code_abbrev]: "nullable r  []  lang r"
  by (induct r) (auto simp add: conc_def split: if_splits)

primrec rexp_empty where
  "rexp_empty Zero  True"
| "rexp_empty One  False"
| "rexp_empty (Atom a)  False"
| "rexp_empty (Plus r s)  rexp_empty r  rexp_empty s"
| "rexp_empty (Times r s)  rexp_empty r  rexp_empty s"
| "rexp_empty (Star r)  False"

(* TODO Fixme: This code_abbrev rule does not work. Why? *)
lemma rexp_empty_iff [code_abbrev]: "rexp_empty r  lang r = {}"
  by (induction r) auto



text‹Composition on rhs usually complicates matters:›
lemma map_map_rexp:
  "map_rexp f (map_rexp g r) = map_rexp (λr. f (g r)) r"
  unfolding rexp.map_comp o_def ..

lemma map_rexp_ident[simp]: "map_rexp (λx. x) = (λr. r)"
  unfolding id_def[symmetric] fun_eq_iff rexp.map_id id_apply by (intro allI refl)

lemma atoms_lang: "w : lang r  set w  atoms r"
proof(induction r arbitrary: w)
  case Times thus ?case by fastforce
next
  case Star thus ?case by (fastforce simp add: star_conv_concat)
qed auto

lemma lang_eq_ext: "(lang r = lang s) =
  (w  lists(atoms r  atoms s). w  lang r  w  lang s)"
  by (auto simp: atoms_lang[unfolded subset_iff])

lemma lang_eq_ext_Nil_fold_Deriv:
  fixes r s
  defines "𝔅  {(fold Deriv w (lang r), fold Deriv w (lang s))| w. wlists (atoms r  atoms s)}"
  shows "lang r = lang s  ((K, L)  𝔅. []  K  []  L)"
  unfolding lang_eq_ext 𝔅_def by (subst (1 2) in_fold_Deriv[of "[]", simplified, symmetric]) auto


subsection ‹Term ordering›

instantiation rexp :: (order) "{order}"
begin

fun le_rexp :: "('a::order) rexp  ('a::order) rexp  bool"
where
  "le_rexp Zero _ = True"
| "le_rexp _ Zero = False"
| "le_rexp One _ = True"
| "le_rexp _ One = False"
| "le_rexp (Atom a) (Atom b) = (a <= b)"
| "le_rexp (Atom _) _ = True"
| "le_rexp _ (Atom _) = False"
| "le_rexp (Star r) (Star s) = le_rexp r s"
| "le_rexp (Star _) _ = True"
| "le_rexp _ (Star _) = False"
| "le_rexp (Plus r r') (Plus s s') =
    (if r = s then le_rexp r' s' else le_rexp r s)"
| "le_rexp (Plus _ _) _ = True"
| "le_rexp _ (Plus _ _) = False"
| "le_rexp (Times r r') (Times s s') =
    (if r = s then le_rexp r' s' else le_rexp r s)"

(* The class instance stuff is by Dmitriy Traytel *)

definition less_eq_rexp where "r  s  le_rexp r s"
definition less_rexp where "r < s  le_rexp r s  r  s"

lemma le_rexp_Zero: "le_rexp r Zero  r = Zero"
by (induction r) auto

lemma le_rexp_refl: "le_rexp r r"
by (induction r) auto

lemma le_rexp_antisym: "le_rexp r s; le_rexp s r  r = s"
by (induction r s rule: le_rexp.induct) (auto dest: le_rexp_Zero)

lemma le_rexp_trans: "le_rexp r s; le_rexp s t  le_rexp r t"
proof (induction r s arbitrary: t rule: le_rexp.induct)
  fix v t assume "le_rexp (Atom v) t" thus "le_rexp One t" by (cases t) auto
next
  fix s1 s2 t assume "le_rexp (Plus s1 s2) t" thus "le_rexp One t" by (cases t) auto
next
  fix s1 s2 t assume "le_rexp (Times s1 s2) t" thus "le_rexp One t" by (cases t) auto
next
  fix s t assume "le_rexp (Star s) t" thus "le_rexp One t" by (cases t) auto
next
  fix v u t assume "le_rexp (Atom v) (Atom u)" "le_rexp (Atom u) t"
  thus "le_rexp (Atom v) t" by (cases t) auto
next
  fix v s1 s2 t assume "le_rexp (Plus s1 s2) t" thus "le_rexp (Atom v) t" by (cases t) auto
next
  fix v s1 s2 t assume "le_rexp (Times s1 s2) t" thus "le_rexp (Atom v) t" by (cases t) auto
next
  fix v s t assume "le_rexp (Star s) t" thus "le_rexp (Atom v) t" by (cases t) auto
next
  fix r s t
  assume IH: "t. le_rexp r s  le_rexp s t  le_rexp r t"
    and "le_rexp (Star r) (Star s)" "le_rexp (Star s) t"
  thus "le_rexp (Star r) t" by (cases t) auto
next
  fix r s1 s2 t assume "le_rexp (Plus s1 s2) t" thus "le_rexp (Star r) t" by (cases t) auto
next
  fix r s1 s2 t assume "le_rexp (Times s1 s2) t" thus "le_rexp (Star r) t" by (cases t) auto
next
  fix r1 r2 s1 s2 t
  assume "t. r1 = s1  le_rexp r2 s2  le_rexp s2 t  le_rexp r2 t"
         "t. r1  s1  le_rexp r1 s1  le_rexp s1 t  le_rexp r1 t"
         "le_rexp (Plus r1 r2) (Plus s1 s2)" "le_rexp (Plus s1 s2) t"
  thus "le_rexp (Plus r1 r2) t" by (cases t) (auto split: if_split_asm intro: le_rexp_antisym)
next
  fix r1 r2 s1 s2 t assume "le_rexp (Times s1 s2) t" thus "le_rexp (Plus r1 r2) t" by (cases t) auto
next
  fix r1 r2 s1 s2 t
  assume "t. r1 = s1  le_rexp r2 s2  le_rexp s2 t  le_rexp r2 t"
         "t. r1  s1  le_rexp r1 s1  le_rexp s1 t  le_rexp r1 t"
         "le_rexp (Times r1 r2) (Times s1 s2)" "le_rexp (Times s1 s2) t"
  thus "le_rexp (Times r1 r2) t" by (cases t) (auto split: if_split_asm intro: le_rexp_antisym)
qed auto

instance proof
qed (auto simp add: less_eq_rexp_def less_rexp_def
       intro: le_rexp_refl le_rexp_antisym le_rexp_trans)

end

instantiation rexp :: (linorder) "{linorder}"
begin

lemma le_rexp_total: "le_rexp (r :: 'a :: linorder rexp) s  le_rexp s r"
by (induction r s rule: le_rexp.induct) auto

instance proof
qed (unfold less_eq_rexp_def less_rexp_def, rule le_rexp_total)

end

end

Theory NDerivative

section ‹Normalizing Derivative›

theory NDerivative
imports
  Regular_Exp
begin

subsection ‹Normalizing operations›

text ‹associativity, commutativity, idempotence, zero›

fun nPlus :: "'a::order rexp  'a rexp  'a rexp"
where
  "nPlus Zero r = r"
| "nPlus r Zero = r"
| "nPlus (Plus r s) t = nPlus r (nPlus s t)"
| "nPlus r (Plus s t) =
     (if r = s then (Plus s t)
     else if le_rexp r s then Plus r (Plus s t)
     else Plus s (nPlus r t))"
| "nPlus r s =
     (if r = s then r
      else if le_rexp r s then Plus r s
      else Plus s r)"

lemma lang_nPlus[simp]: "lang (nPlus r s) = lang (Plus r s)"
by (induction r s rule: nPlus.induct) auto

text ‹associativity, zero, one›

fun nTimes :: "'a::order rexp  'a rexp  'a rexp"
where
  "nTimes Zero _ = Zero"
| "nTimes _ Zero = Zero"
| "nTimes One r = r"
| "nTimes r One = r"
| "nTimes (Times r s) t = Times r (nTimes s t)"
| "nTimes r s = Times r s"

lemma lang_nTimes[simp]: "lang (nTimes r s) = lang (Times r s)"
by (induction r s rule: nTimes.induct) (auto simp: conc_assoc)

primrec norm :: "'a::order rexp  'a rexp"
where
  "norm Zero = Zero"
| "norm One = One"
| "norm (Atom a) = Atom a"
| "norm (Plus r s) = nPlus (norm r) (norm s)"
| "norm (Times r s) = nTimes (norm r) (norm s)"
| "norm (Star r) = Star (norm r)"

lemma lang_norm[simp]: "lang (norm r) = lang r"
by (induct r) auto

primrec nderiv :: "'a::order  'a rexp  'a rexp"
where
  "nderiv _ Zero = Zero"
| "nderiv _ One = Zero"
| "nderiv a (Atom b) = (if a = b then One else Zero)"
| "nderiv a (Plus r s) = nPlus (nderiv a r) (nderiv a s)"
| "nderiv a (Times r s) =
    (let r's = nTimes (nderiv a r) s
     in if nullable r then nPlus r's (nderiv a s) else r's)"
| "nderiv a (Star r) = nTimes (nderiv a r) (Star r)"

lemma lang_nderiv: "lang (nderiv a r) = Deriv a (lang r)"
by (induction r) (auto simp: Let_def nullable_iff)

lemma deriv_no_occurrence: 
  "x  atoms r  nderiv x r = Zero"
by (induction r) auto

lemma atoms_nPlus[simp]: "atoms (nPlus r s) = atoms r  atoms s"
by (induction r s rule: nPlus.induct) auto

lemma atoms_nTimes: "atoms (nTimes r s)  atoms r  atoms s"
by (induction r s rule: nTimes.induct) auto

lemma atoms_norm: "atoms (norm r)  atoms r"
by (induction r) (auto dest!:subsetD[OF atoms_nTimes])

lemma atoms_nderiv: "atoms (nderiv a r)  atoms r"
by (induction r) (auto simp: Let_def dest!:subsetD[OF atoms_nTimes])

end

Theory Equivalence_Checking

section ‹Deciding Regular Expression Equivalence›

theory Equivalence_Checking
imports
  NDerivative
  "HOL-Library.While_Combinator"
begin


subsection ‹Bisimulation between languages and regular expressions›

coinductive bisimilar :: "'a lang  'a lang  bool" where
"([]  K  []  L) 
  (x. bisimilar (Deriv x K) (Deriv x L))
  bisimilar K L"

lemma equal_if_bisimilar:
assumes "bisimilar K L" shows "K = L"
proof (rule set_eqI)
  fix w
  from ‹bisimilar K L show "w  K  w  L"
  proof (induct w arbitrary: K L)
    case Nil thus ?case by (auto elim: bisimilar.cases)
  next
    case (Cons a w K L)
    from ‹bisimilar K L have "bisimilar (Deriv a K) (Deriv a L)"
      by (auto elim: bisimilar.cases)
    then have "w  Deriv a K  w  Deriv a L" by (rule Cons(1))
    thus ?case by (auto simp: Deriv_def)
  qed
qed

lemma language_coinduct:
fixes R (infixl "" 50)
assumes "K  L"
assumes "K L. K  L  ([]  K  []  L)"
assumes "K L x. K  L  Deriv x K  Deriv x L"
shows "K = L"
apply (rule equal_if_bisimilar)
apply (rule bisimilar.coinduct[of R, OF K  L])
apply (auto simp: assms)
done

type_synonym 'a rexp_pair = "'a rexp * 'a rexp"
type_synonym 'a rexp_pairs = "'a rexp_pair list"

definition is_bisimulation ::  "'a::order list  'a rexp_pair set  bool"
where
"is_bisimulation as R =
  ((r,s) R. (atoms r  atoms s  set as)  (nullable r  nullable s) 
    (aset as. (nderiv a r, nderiv a s)  R))"

lemma bisim_lang_eq:
assumes bisim: "is_bisimulation as ps"
assumes "(r, s)  ps"
shows "lang r = lang s"
proof -
  define ps' where "ps' = insert (Zero, Zero) ps"
  from bisim have bisim': "is_bisimulation as ps'"
    by (auto simp: ps'_def is_bisimulation_def)
  let ?R = "λK L. ((r,s)ps'. K = lang r  L = lang s)"
  show ?thesis
  proof (rule language_coinduct[where R="?R"])
    from (r, s)  ps 
    have "(r, s)  ps'" by (auto simp: ps'_def)
    thus "?R (lang r) (lang s)" by auto
  next
    fix K L assume "?R K L"
    then obtain r s where rs: "(r, s)  ps'"
      and KL: "K = lang r" "L = lang s" by auto
    with bisim' have "nullable r  nullable s"
      by (auto simp: is_bisimulation_def)
    thus "[]  K  []  L" by (auto simp: nullable_iff KL)
    fix a
    show "?R (Deriv a K) (Deriv a L)"
    proof cases
      assume "a  set as"
      with rs bisim'
      have "(nderiv a r, nderiv a s)  ps'"
        by (auto simp: is_bisimulation_def)
      thus ?thesis by (force simp: KL lang_nderiv)
    next
      assume "a  set as"
      with bisim' rs
      have "a  atoms r" "a  atoms s" by (auto simp: is_bisimulation_def)
      then have "nderiv a r = Zero" "nderiv a s = Zero"
        by (auto intro: deriv_no_occurrence)
      then have "Deriv a K = lang Zero" 
        "Deriv a L = lang Zero" 
        unfolding KL lang_nderiv[symmetric] by auto
      thus ?thesis by (auto simp: ps'_def)
    qed
  qed  
qed

subsection ‹Closure computation›

definition closure ::
  "'a::order list  'a rexp_pair  ('a rexp_pairs * 'a rexp_pair set) option"
where
"closure as = rtrancl_while (%(r,s). nullable r = nullable s)
  (%(r,s). map (λa. (nderiv a r, nderiv a s)) as)"

definition pre_bisim :: "'a::order list  'a rexp  'a rexp 
 'a rexp_pairs * 'a rexp_pair set  bool"
where
"pre_bisim as r s = (λ(ws,R).
 (r,s)  R  set ws  R 
 ((r,s) R. atoms r  atoms s  set as) 
 ((r,s) R - set ws. (nullable r  nullable s) 
   (aset as. (nderiv a r, nderiv a s)  R)))"

theorem closure_sound:
assumes result: "closure as (r,s) = Some([],R)"
and atoms: "atoms r  atoms s  set as"
shows "lang r = lang s"
proof-
  let ?test = "While_Combinator.rtrancl_while_test (%(r,s). nullable r = nullable s)"
  let ?step = "While_Combinator.rtrancl_while_step (%(r,s). map (λa. (nderiv a r, nderiv a s)) as)"
  { fix st assume inv: "pre_bisim as r s st" and test: "?test st"
    have "pre_bisim as r s (?step st)"
    proof (cases st)
      fix ws R assume "st = (ws, R)"
      with test obtain r s t where st: "st = ((r, s) # t, R)" and "nullable r = nullable s"
        by (cases ws) auto
      with inv show ?thesis using atoms_nderiv[of _ r] atoms_nderiv[of _ s]
        unfolding st rtrancl_while_test.simps rtrancl_while_step.simps pre_bisim_def Ball_def
        by simp_all blast+
    qed
 }
  moreover
  from atoms
  have "pre_bisim as r s ([(r,s)],{(r,s)})" by (simp add: pre_bisim_def)
  ultimately have pre_bisim_ps: "pre_bisim as r s ([],R)"
    by (rule while_option_rule[OF _ result[unfolded closure_def rtrancl_while_def]])
  then have "is_bisimulation as R" "(r, s)  R"
    by (auto simp: pre_bisim_def is_bisimulation_def)
  thus "lang r = lang s" by (rule bisim_lang_eq)
qed

subsection ‹Bisimulation-free proof of closure computation›

text‹The equivalence check can be viewed as the product construction
of two automata. The state space is the reflexive transitive closure of
the pair of next-state functions, i.e. derivatives.›

lemma rtrancl_nderiv_nderivs: defines "nderivs == foldl (%r a. nderiv a r)"
shows "{((r,s),(nderiv a r,nderiv a s))| r s a. a : A}^* =
       {((r,s),(nderivs r w,nderivs s w))| r s w. w : lists A}" (is "?L = ?R")
proof-
  note [simp] = nderivs_def
  { fix r s r' s'
    have "((r,s),(r',s')) : ?L  ((r,s),(r',s')) : ?R"
    proof(induction rule: converse_rtrancl_induct2)
      case refl show ?case by (force intro!: foldl.simps(1)[symmetric])
    next
      case step thus ?case by(force intro!: foldl.simps(2)[symmetric])
    qed
  } moreover
  { fix r s r' s'
    { fix w have "xset w. x  A  ((r, s), nderivs r w, nderivs s w) :?L"
      proof(induction w rule: rev_induct)
        case Nil show ?case by simp
      next
        case snoc thus ?case by (auto elim!: rtrancl_into_rtrancl)
      qed
    } 
    hence "((r,s),(r',s')) : ?R  ((r,s),(r',s')) : ?L" by auto
  } ultimately show ?thesis by (auto simp: in_lists_conv_set) blast
qed

lemma nullable_nderivs:
  "nullable (foldl (%r a. nderiv a r) r w) = (w : lang r)"
by (induct w arbitrary: r) (simp_all add: nullable_iff lang_nderiv Deriv_def)

theorem closure_sound_complete:
assumes result: "closure as (r,s) = Some(ws,R)"
and atoms: "set as = atoms r  atoms s"
shows "ws = []  lang r = lang s"
proof -
  have leq: "(lang r = lang s) =
  ((r',s')  {((r0,s0),(nderiv a r0,nderiv a s0))| r0 s0 a. a : set as}^* `` {(r,s)}.
    nullable r' = nullable s')"
    by(simp add: atoms rtrancl_nderiv_nderivs Ball_def lang_eq_ext imp_ex nullable_nderivs
         del:Un_iff)
  have "{(x,y). y  set ((λ(p,q). map (λa. (nderiv a p, nderiv a q)) as) x)} =
    {((r,s), nderiv a r, nderiv a s) |r s a. a  set as}"
    by auto
  with atoms rtrancl_while_Some[OF result[unfolded closure_def]]
  show ?thesis by (auto simp add: leq Ball_def split: if_splits)
qed

subsection ‹The overall procedure›

primrec add_atoms :: "'a rexp  'a list  'a list"
where
  "add_atoms Zero = id"
| "add_atoms One = id"
| "add_atoms (Atom a) = List.insert a"
| "add_atoms (Plus r s) = add_atoms s o add_atoms r"
| "add_atoms (Times r s) = add_atoms s o add_atoms r"
| "add_atoms (Star r) = add_atoms r"

lemma set_add_atoms: "set (add_atoms r as) = atoms r  set as"
by (induct r arbitrary: as) auto


definition check_eqv :: "nat rexp  nat rexp  bool" where
"check_eqv r s =
  (let nr = norm r; ns = norm s; as = add_atoms nr (add_atoms ns [])
   in case closure as (nr, ns) of
     Some([],_)  True | _  False)"

lemma soundness: 
assumes "check_eqv r s" shows "lang r = lang s"
proof -
  let ?nr = "norm r" let ?ns = "norm s"
  let ?as = "add_atoms ?nr (add_atoms ?ns [])"
  obtain R where 1: "closure ?as (?nr,?ns) = Some([],R)"
    using assms by (auto simp: check_eqv_def Let_def split:option.splits list.splits)
  then have "lang (norm r) = lang (norm s)"
    by (rule closure_sound) (auto simp: set_add_atoms dest!: subsetD[OF atoms_norm])
  thus "lang r = lang s" by simp
qed

text‹Test:›
lemma "check_eqv (Plus One (Times (Atom 0) (Star(Atom 0)))) (Star(Atom 0))"
by eval

end

Theory Relation_Interpretation

section ‹Regular Expressions as Homogeneous Binary Relations›

theory Relation_Interpretation
imports Regular_Exp
begin

primrec rel :: "('a  ('b * 'b) set)  'a rexp  ('b * 'b) set"
where
  "rel v Zero = {}" |
  "rel v One = Id" |
  "rel v (Atom a) = v a" |
  "rel v (Plus r s) = rel v r  rel v s" |
  "rel v (Times r s) = rel v r O rel v s" |
  "rel v (Star r) = (rel v r)^*"

primrec word_rel :: "('a  ('b * 'b) set)  'a list  ('b * 'b) set"
where
  "word_rel v [] = Id"
| "word_rel v (a#as) = v a O word_rel v as"

lemma word_rel_append: 
  "word_rel v w O word_rel v w' = word_rel v (w @ w')"
by (rule sym) (induct w, auto)

lemma rel_word_rel: "rel v r = (wlang r. word_rel v w)"
proof (induct r)
  case Times thus ?case 
    by (auto simp: rel_def word_rel_append conc_def relcomp_UNION_distrib relcomp_UNION_distrib2)
next
  case (Star r)
  { fix n
    have "(rel v r) ^^ n = (w  lang r ^^ n. word_rel v w)"
    proof (induct n)
      case 0 show ?case by simp
    next
      case (Suc n) thus ?case
        unfolding relpow.simps relpow_commute[symmetric]
        by (auto simp add: Star conc_def word_rel_append
          relcomp_UNION_distrib relcomp_UNION_distrib2)
    qed }

  thus ?case unfolding rel.simps
    by (force simp: rtrancl_power star_def)
qed auto


text ‹Soundness:›

lemma soundness:
 "lang r = lang s  rel v r = rel v s"
unfolding rel_word_rel by auto

end

Theory Regexp_Method

section ‹Proving Relation (In)equalities via Regular Expressions›

theory Regexp_Method
imports Equivalence_Checking Relation_Interpretation
begin

primrec rel_of_regexp :: "('a * 'a) set list  nat rexp  ('a * 'a) set" where
"rel_of_regexp vs Zero  = {}" |
"rel_of_regexp vs One  = Id" |
"rel_of_regexp vs (Atom i)  = vs ! i" |
"rel_of_regexp vs (Plus r s)  = rel_of_regexp vs r   rel_of_regexp vs s " |
"rel_of_regexp vs (Times r s)  = rel_of_regexp vs r O rel_of_regexp vs s" |
"rel_of_regexp vs (Star r)  = (rel_of_regexp vs r)^*"

lemma rel_of_regexp_rel: "rel_of_regexp vs r = rel (λi. vs ! i) r"
by (induct r) auto

primrec rel_eq where
"rel_eq (r, s) vs = (rel_of_regexp vs r = rel_of_regexp vs s)"

lemma rel_eqI: "check_eqv r s  rel_eq (r, s) vs"
unfolding rel_eq.simps rel_of_regexp_rel
by (rule Relation_Interpretation.soundness)
 (rule Equivalence_Checking.soundness)

lemmas regexp_reify = rel_of_regexp.simps rel_eq.simps
lemmas regexp_unfold = trancl_unfold_left subset_Un_eq

ML local

fun check_eqv (ct, b) = Thm.mk_binop @{cterm "Pure.eq :: bool  bool  prop"}
  ct (if b then @{cterm True} else @{cterm False});

val (_, check_eqv_oracle) = Context.>>> (Context.map_theory_result
  (Thm.add_oracle (@{binding check_eqv}, check_eqv)));

in

val regexp_conv =
  @{computation_conv bool terms: check_eqv datatypes: "nat rexp"}
  (fn _ => fn b => fn ct => check_eqv_oracle (ct, b))

end
  
method_setup regexp = ‹
  Scan.succeed (fn ctxt =>
    SIMPLE_METHOD' (
      (TRY o eresolve_tac ctxt @{thms rev_subsetD})
      THEN' (Subgoal.FOCUS_PARAMS (fn {context = ctxt', ...} =>
        TRY (Local_Defs.unfold_tac ctxt' @{thms regexp_unfold})
        THEN Reification.tac ctxt' @{thms regexp_reify} NONE 1
        THEN resolve_tac ctxt' @{thms rel_eqI} 1
        THEN CONVERSION (HOLogic.Trueprop_conv (regexp_conv ctxt')) 1
        THEN resolve_tac ctxt' [TrueI] 1) ctxt))) ‹decide relation equalities via regular expressions›

hide_const (open) le_rexp nPlus nTimes norm nullable bisimilar is_bisimulation closure
  pre_bisim add_atoms check_eqv rel word_rel rel_eq

text ‹Example:›

lemma "(r  s^+)^* = (r  s)^*"
  by regexp

end

Theory Regexp_Constructions

(*
  File:     Regexp_Constructions.thy
  Author:   Manuel Eberl <eberlm@in.tum.de>

  Some simple constructions on regular expressions to illustrate closure properties of regular
  languages: reversal, substitution, prefixes, suffixes, subwords ("fragments")
*)
section ‹Basic constructions on regular expressions›
theory Regexp_Constructions
imports
  Main
  "HOL-Library.Sublist"
  Regular_Exp
begin

subsection ‹Reverse language›

lemma rev_conc [simp]: "rev ` (A @@ B) = rev ` B @@ rev ` A"
  unfolding conc_def image_def by force

lemma rev_compower [simp]: "rev ` (A ^^ n) = (rev ` A) ^^ n"
  by (induction n) (simp_all add: conc_pow_comm)

lemma rev_star [simp]: "rev ` star A = star (rev ` A)"
  by (simp add: star_def image_UN)


subsection ‹Substituting characters in a language›    

definition subst_word :: "('a  'b list)  'a list  'b list" where
  "subst_word f xs = concat (map f xs)"
  
lemma subst_word_Nil [simp]: "subst_word f [] = []"
  by (simp add: subst_word_def)
    
lemma subst_word_singleton [simp]: "subst_word f [x] = f x"
  by (simp add: subst_word_def)
    
lemma subst_word_append [simp]: "subst_word f (xs @ ys) = subst_word f xs @ subst_word f ys"
  by (simp add: subst_word_def)
    
lemma subst_word_Cons [simp]: "subst_word f (x # xs) = f x @ subst_word f xs"
  by (simp add: subst_word_def)
    
lemma subst_word_conc [simp]: "subst_word f ` (A @@ B) = subst_word f ` A @@ subst_word f ` B"
  unfolding conc_def image_def by force 

lemma subst_word_compower [simp]: "subst_word f ` (A ^^ n) = (subst_word f ` A) ^^ n"
  by (induction n) simp_all
    
lemma subst_word_star [simp]: "subst_word f ` (star A) = star (subst_word f ` A)"
  by (simp add: star_def image_UN)
    

text ‹Suffix language›

definition Suffixes :: "'a list set  'a list set" where
  "Suffixes A = {w. q. q @ w  A}"

lemma Suffixes_altdef [code]: "Suffixes A = (wA. set (suffixes w))"
  unfolding Suffixes_def set_suffixes_eq suffix_def by blast

lemma Nil_in_Suffixes_iff [simp]: "[]  Suffixes A  A  {}"
  by (auto simp: Suffixes_def)

lemma Suffixes_empty [simp]: "Suffixes {} = {}"
  by (auto simp: Suffixes_def)
    
lemma Suffixes_empty_iff [simp]: "Suffixes A = {}  A = {}"
  by (auto simp: Suffixes_altdef)
    
lemma Suffixes_singleton [simp]: "Suffixes {xs} = set (suffixes xs)"
  by (auto simp: Suffixes_altdef)
    
lemma Suffixes_insert: "Suffixes (insert xs A) = set (suffixes xs)  Suffixes A"
  by (simp add: Suffixes_altdef)

lemma Suffixes_conc [simp]: "A  {}  Suffixes (A @@ B) = Suffixes B  (Suffixes A @@ B)"
  unfolding Suffixes_altdef conc_def by (force simp: suffix_append)
    
lemma Suffixes_union [simp]: "Suffixes (A  B) = Suffixes A  Suffixes B"
  by (auto simp: Suffixes_def)
  
lemma Suffixes_UNION [simp]: "Suffixes ((f ` A)) = ((λx. Suffixes (f x)) ` A)"
  by (auto simp: Suffixes_def)

lemma Suffixes_compower: 
  assumes "A  {}"
  shows   "Suffixes (A ^^ n) = insert [] (Suffixes A @@ (k<n. A ^^ k))"
proof (induction n)
  case (Suc n)
  from Suc have "Suffixes (A ^^ Suc n) = 
                   insert [] (Suffixes A @@ ((k<n. A ^^ k)  A ^^ n))"
    by (simp_all add: assms conc_Un_distrib)
  also have "(k<n. A ^^ k)  A ^^ n = (kinsert n {..<n}. A ^^ k)"  by blast
  also have "insert n {..<n} = {..<Suc n}" by auto
  finally show ?case .
qed simp_all

lemma Suffixes_star [simp]: 
  assumes "A  {}"
  shows   "Suffixes (star A) = Suffixes A @@ star A"
proof -
  have "star A = (n. A ^^ n)" unfolding star_def ..
  also have "Suffixes  = (x. Suffixes (A ^^ x))" by simp
  also have " = (n. insert [] (Suffixes A @@ (k<n. A ^^ k)))"
    using assms by (subst Suffixes_compower) auto
  also have " = insert [] (Suffixes A @@ (n. (k<n. A ^^ k)))"
    by (simp_all add: conc_UNION_distrib)
  also have "(n. (k<n. A ^^ k)) = (n. A ^^ n)" by auto
  also have " = star A" unfolding star_def ..
  also have "insert [] (Suffixes A @@ star A) = Suffixes A @@ star A" 
    using assms by auto
  finally show ?thesis .
qed
  
text ‹Prefix language›

definition Prefixes :: "'a list set  'a list set" where
  "Prefixes A = {w. q. w @ q  A}"

lemma Prefixes_altdef [code]: "Prefixes A = (wA. set (prefixes w))"
  unfolding Prefixes_def set_prefixes_eq prefix_def by blast

lemma Nil_in_Prefixes_iff [simp]: "[]  Prefixes A  A  {}"
  by (auto simp: Prefixes_def)

lemma Prefixes_empty [simp]: "Prefixes {} = {}"
  by (auto simp: Prefixes_def)
    
lemma Prefixes_empty_iff [simp]: "Prefixes A = {}  A = {}"
  by (auto simp: Prefixes_altdef)
    
lemma Prefixes_singleton [simp]: "Prefixes {xs} = set (prefixes xs)"
  by (auto simp: Prefixes_altdef)
    
lemma Prefixes_insert: "Prefixes (insert xs A) = set (prefixes xs)  Prefixes A"
  by (simp add: Prefixes_altdef)

lemma Prefixes_conc [simp]: "B  {}  Prefixes (A @@ B) = Prefixes A  (A @@ Prefixes B)"
  unfolding Prefixes_altdef conc_def by (force simp: prefix_append)
    
lemma Prefixes_union [simp]: "Prefixes (A  B) = Prefixes A  Prefixes B"
  by (auto simp: Prefixes_def)
  
lemma Prefixes_UNION [simp]: "Prefixes ((f ` A)) = ((λx. Prefixes (f x)) ` A)"
  by (auto simp: Prefixes_def)


lemma Prefixes_rev: "Prefixes (rev ` A) = rev ` Suffixes A"
  by (auto simp: Prefixes_altdef prefixes_rev Suffixes_altdef)

lemma Suffixes_rev: "Suffixes (rev ` A) = rev ` Prefixes A"
  by (auto simp: Prefixes_altdef suffixes_rev Suffixes_altdef)    


lemma Prefixes_compower:
  assumes "A  {}"
  shows   "Prefixes (A ^^ n) = insert [] ((k<n. A ^^ k) @@ Prefixes A)"
proof -
  have "A ^^ n = rev ` ((rev ` A) ^^ n)" by (simp add: image_image)
  also have "Prefixes  = insert [] ((k<n. A ^^ k) @@ Prefixes A)"
    unfolding Prefixes_rev 
    by (subst Suffixes_compower) (simp_all add: image_UN image_image Suffixes_rev assms)
  finally show ?thesis .
qed

lemma Prefixes_star [simp]:
  assumes "A  {}"
  shows   "Prefixes (star A) = star A @@ Prefixes A"
proof -
  have "star A = rev ` star (rev ` A)" by (simp add: image_image)
  also have "Prefixes  = star A @@ Prefixes A"
    unfolding Prefixes_rev
    by (subst Suffixes_star) (simp_all add: assms image_image Suffixes_rev)
  finally show ?thesis .
qed
  

subsection ‹Subword language›

text ‹
  The language of all sub-words, i.e. all words that are a contiguous sublist of a word in
  the original language.
›
definition Sublists :: "'a list set  'a list set" where
  "Sublists A = {w. qA. sublist w q}"

lemma Sublists_altdef [code]: "Sublists A = (wA. set (sublists w))"
  by (auto simp: Sublists_def)

lemma Sublists_empty [simp]: "Sublists {} = {}"
  by (auto simp: Sublists_def)
    
lemma Sublists_singleton [simp]: "Sublists {w} = set (sublists w)"
  by (auto simp: Sublists_altdef)

lemma Sublists_insert: "Sublists (insert w A) = set (sublists w)  Sublists A"
  by (auto simp: Sublists_altdef)
    
lemma Sublists_Un [simp]: "Sublists (A  B) = Sublists A  Sublists B"
  by (auto simp: Sublists_altdef)

lemma Sublists_UN [simp]: "Sublists ((f ` A)) = ((λx. Sublists (f x)) ` A)"
  by (auto simp: Sublists_altdef)

lemma Sublists_conv_Prefixes: "Sublists A = Prefixes (Suffixes A)"
  by (auto simp: Sublists_def Prefixes_def Suffixes_def sublist_def)
    
lemma Sublists_conv_Suffixes: "Sublists A = Suffixes (Prefixes A)"
  by (auto simp: Sublists_def Prefixes_def Suffixes_def sublist_def)

lemma Sublists_conc [simp]: 
  assumes "A  {}" "B  {}"
  shows   "Sublists (A @@ B) = Sublists A  Sublists B  Suffixes A @@ Prefixes B"
  using assms unfolding Sublists_conv_Suffixes by auto

lemma star_not_empty [simp]: "star A  {}"
  by auto
    
lemma Sublists_star:
  "A  {}  Sublists (star A) = Sublists A  Suffixes A @@ star A @@ Prefixes A"
  by (simp add: Sublists_conv_Prefixes)

lemma Prefixes_subset_Sublists: "Prefixes A  Sublists A"
  unfolding Prefixes_def Sublists_def by auto

lemma Suffixes_subset_Sublists: "Suffixes A  Sublists A"
  unfolding Suffixes_def Sublists_def by auto


subsection ‹Fragment language›
  
text ‹
  The following is the fragment language of a given language, i.e. the set of all words that
  are (not necessarily contiguous) sub-sequences of a word in the original language.
›
definition Subseqs where "Subseqs A = (wA. set (subseqs w))"

lemma Subseqs_empty [simp]: "Subseqs {} = {}"
  by (simp add: Subseqs_def)

lemma Subseqs_insert [simp]: "Subseqs (insert xs A) = set (subseqs xs)  Subseqs A"
  by (simp add: Subseqs_def)
    
lemma Subseqs_singleton [simp]: "Subseqs {xs} = set (subseqs xs)"
  by simp
    
lemma Subseqs_Un [simp]: "Subseqs (A  B) = Subseqs A  Subseqs B"
  by (simp add: Subseqs_def)
    
lemma Subseqs_UNION [simp]: "Subseqs ((f ` A)) = ((λx. Subseqs (f x)) ` A)"
  by (simp add: Subseqs_def)
  
lemma Subseqs_conc [simp]: "Subseqs (A @@ B) = Subseqs A @@ Subseqs B"
proof safe
  fix xs assume "xs  Subseqs (A @@ B)"
  then obtain ys zs where *: "ys  A" "zs  B" "subseq xs (ys @ zs)" 
    by (auto simp: Subseqs_def conc_def)
  from *(3) obtain xs1 xs2 where "xs = xs1 @ xs2" "subseq xs1 ys" "subseq xs2 zs"
    by (rule subseq_appendE)
  with *(1,2) show "xs  Subseqs A @@ Subseqs B" by (auto simp: Subseqs_def set_subseqs_eq)
next
  fix xs assume "xs  Subseqs A @@ Subseqs B"
  then obtain xs1 xs2 ys zs 
    where "xs = xs1 @ xs2" "subseq xs1 ys" "subseq xs2 zs" "ys  A" "zs  B"
    by (auto simp: conc_def Subseqs_def)
  thus "xs  Subseqs (A @@ B)" by (force simp: Subseqs_def conc_def intro: list_emb_append_mono)
qed

lemma Subseqs_compower [simp]: "Subseqs (A ^^ n) = Subseqs A ^^ n"
  by (induction n) simp_all
    
lemma Subseqs_star [simp]: "Subseqs (star A) = star (Subseqs A)"
  by (simp add: star_def)
    
lemma Sublists_subset_Subseqs: "Sublists A  Subseqs A"
  by (auto simp: Sublists_def Subseqs_def dest!: sublist_imp_subseq)


subsection ‹Various regular expression constructions›

text ‹A construction for language reversal of a regular expression:›

primrec rexp_rev where
  "rexp_rev Zero = Zero"
| "rexp_rev One = One"
| "rexp_rev (Atom x) = Atom x"
| "rexp_rev (Plus r s) = Plus (rexp_rev r) (rexp_rev s)"
| "rexp_rev (Times r s) = Times (rexp_rev s) (rexp_rev r)"
| "rexp_rev (Star r) = Star (rexp_rev r)"

lemma lang_rexp_rev [simp]: "lang (rexp_rev r) = rev ` lang r"
  by (induction r) (simp_all add: image_Un)  
    

text ‹The obvious construction for a singleton-language regular expression:›

fun rexp_of_word where
  "rexp_of_word [] = One"
| "rexp_of_word [x] = Atom x"
| "rexp_of_word (x#xs) = Times (Atom x) (rexp_of_word xs)"
  
lemma lang_rexp_of_word [simp]: "lang (rexp_of_word xs) = {xs}"
  by (induction xs rule: rexp_of_word.induct) (simp_all add: conc_def)

lemma size_rexp_of_word [simp]: "size (rexp_of_word xs) = Suc (2 * (length xs - 1))"
  by (induction xs rule: rexp_of_word.induct) auto


text ‹Character substitution in a regular expression:›

primrec rexp_subst where
  "rexp_subst f Zero = Zero"
| "rexp_subst f One = One"
| "rexp_subst f (Atom x) = rexp_of_word (f x)"
| "rexp_subst f (Plus r s) = Plus (rexp_subst f r) (rexp_subst f s)"
| "rexp_subst f (Times r s) = Times (rexp_subst f r) (rexp_subst f s)"
| "rexp_subst f (Star r) = Star (rexp_subst f r)"

lemma lang_rexp_subst: "lang (rexp_subst f r) = subst_word f ` lang r"
  by (induction r) (simp_all add: image_Un)


text ‹Suffix language of a regular expression:›

primrec suffix_rexp :: "'a rexp  'a rexp" where
  "suffix_rexp Zero = Zero"
| "suffix_rexp One = One"
| "suffix_rexp (Atom a) = Plus (Atom a) One"
| "suffix_rexp (Plus r s) = Plus (suffix_rexp r) (suffix_rexp s)"
| "suffix_rexp (Times r s) =
    (if rexp_empty r then Zero else Plus (Times (suffix_rexp r) s) (suffix_rexp s))"
| "suffix_rexp (Star r) =
    (if rexp_empty r then One else Times (suffix_rexp r) (Star r))"

theorem lang_suffix_rexp [simp]:
  "lang (suffix_rexp r) = Suffixes (lang r)"
  by (induction r) (auto simp: rexp_empty_iff)


text ‹Prefix language of a regular expression:›

primrec prefix_rexp :: "'a rexp  'a rexp" where
  "prefix_rexp Zero = Zero"
| "prefix_rexp One = One"
| "prefix_rexp (Atom a) = Plus (Atom a) One"
| "prefix_rexp (Plus r s) = Plus (prefix_rexp r) (prefix_rexp s)"
| "prefix_rexp (Times r s) =
    (if rexp_empty s then Zero else Plus (Times r (prefix_rexp s)) (prefix_rexp r))"
| "prefix_rexp (Star r) =
    (if rexp_empty r then One else Times (Star r) (prefix_rexp r))"

theorem lang_prefix_rexp [simp]:
  "lang (prefix_rexp r) = Prefixes (lang r)"
  by (induction r) (auto simp: rexp_empty_iff)


text ‹Sub-word language of a regular expression›    

primrec sublist_rexp :: "'a rexp  'a rexp" where
  "sublist_rexp Zero = Zero"
| "sublist_rexp One = One"
| "sublist_rexp (Atom a) = Plus (Atom a) One"
| "sublist_rexp (Plus r s) = Plus (sublist_rexp r) (sublist_rexp s)"
| "sublist_rexp (Times r s) =
    (if rexp_empty r  rexp_empty s then Zero else 
       Plus (sublist_rexp r) (Plus (sublist_rexp s) (Times (suffix_rexp r) (prefix_rexp s))))"
| "sublist_rexp (Star r) =
    (if rexp_empty r then One else 
       Plus (sublist_rexp r) (Times (suffix_rexp r) (Times (Star r) (prefix_rexp r))))"

theorem lang_sublist_rexp [simp]:
  "lang (sublist_rexp r) = Sublists (lang r)"
  by (induction r) (auto simp: rexp_empty_iff Sublists_star)


text ‹Fragment language of a regular expression:›
  
primrec subseqs_rexp :: "'a rexp  'a rexp" where
  "subseqs_rexp Zero = Zero"
| "subseqs_rexp One = One"
| "subseqs_rexp (Atom x) = Plus (Atom x) One"
| "subseqs_rexp (Plus r s) = Plus (subseqs_rexp r) (subseqs_rexp s)"
| "subseqs_rexp (Times r s) = Times (subseqs_rexp r) (subseqs_rexp s)"
| "subseqs_rexp (Star r) = Star (subseqs_rexp r)"

lemma lang_subseqs_rexp [simp]: "lang (subseqs_rexp r) = Subseqs (lang r)"
  by (induction r) auto


text ‹Subword language of a regular expression›


end

Theory Derivatives

section "Derivatives of regular expressions"

(* Author: Christian Urban *)

theory Derivatives
imports Regular_Exp
begin

text‹This theory is based on work by Brozowski \cite{Brzozowski64} and Antimirov \cite{Antimirov95}.›

subsection ‹Brzozowski's derivatives of regular expressions›

primrec
  deriv :: "'a  'a rexp  'a rexp"
where
  "deriv c (Zero) = Zero"
| "deriv c (One) = Zero"
| "deriv c (Atom c') = (if c = c' then One else Zero)"
| "deriv c (Plus r1 r2) = Plus (deriv c r1) (deriv c r2)"
| "deriv c (Times r1 r2) = 
    (if nullable r1 then Plus (Times (deriv c r1) r2) (deriv c r2) else Times (deriv c r1) r2)"
| "deriv c (Star r) = Times (deriv c r) (Star r)"

primrec 
  derivs :: "'a list  'a rexp  'a rexp"
where
  "derivs [] r = r"
| "derivs (c # s) r = derivs s (deriv c r)"


lemma atoms_deriv_subset: "atoms (deriv x r)  atoms r"
by (induction r) (auto)

lemma atoms_derivs_subset: "atoms (derivs w r)  atoms r"
by (induction w arbitrary: r) (auto dest: atoms_deriv_subset[THEN subsetD])

lemma lang_deriv: "lang (deriv c r) = Deriv c (lang r)"
by (induct r) (simp_all add: nullable_iff)

lemma lang_derivs: "lang (derivs s r) = Derivs s (lang r)"
by (induct s arbitrary: r) (simp_all add: lang_deriv)

text ‹A regular expression matcher:›

definition matcher :: "'a rexp  'a list  bool" where
"matcher r s = nullable (derivs s r)"

lemma matcher_correctness: "matcher r s  s  lang r"
by (induct s arbitrary: r)
   (simp_all add: nullable_iff lang_deriv matcher_def Deriv_def)


subsection ‹Antimirov's partial derivatives›

abbreviation
  "Timess rs r  (r'  rs. {Times r' r})"

lemma Timess_eq_image:
  "Timess rs r = (λr'. Times r' r) ` rs"
  by auto

primrec
  pderiv :: "'a  'a rexp  'a rexp set"
where
  "pderiv c Zero = {}"
| "pderiv c One = {}"
| "pderiv c (Atom c') = (if c = c' then {One} else {})"
| "pderiv c (Plus r1 r2) = (pderiv c r1)  (pderiv c r2)"
| "pderiv c (Times r1 r2) = 
    (if nullable r1 then Timess (pderiv c r1) r2  pderiv c r2 else Timess (pderiv c r1) r2)"
| "pderiv c (Star r) = Timess (pderiv c r) (Star r)"

primrec
  pderivs :: "'a list  'a rexp  ('a rexp) set"
where
  "pderivs [] r = {r}"
| "pderivs (c # s) r =  (pderivs s ` pderiv c r)"

abbreviation
 pderiv_set :: "'a  'a rexp set  'a rexp set"
where
  "pderiv_set c rs   (pderiv c ` rs)"

abbreviation
  pderivs_set :: "'a list  'a rexp set  'a rexp set"
where
  "pderivs_set s rs   (pderivs s ` rs)"

lemma pderivs_append:
  "pderivs (s1 @ s2) r =  (pderivs s2 ` pderivs s1 r)"
by (induct s1 arbitrary: r) (simp_all)

lemma pderivs_snoc:
  shows "pderivs (s @ [c]) r = pderiv_set c (pderivs s r)"
by (simp add: pderivs_append)

lemma pderivs_simps [simp]:
  shows "pderivs s Zero = (if s = [] then {Zero} else {})"
  and   "pderivs s One = (if s = [] then {One} else {})"
  and   "pderivs s (Plus r1 r2) = (if s = [] then {Plus r1 r2} else (pderivs s r1)  (pderivs s r2))"
by (induct s) (simp_all)

lemma pderivs_Atom:
  shows "pderivs s (Atom c)  {Atom c, One}"
by (induct s) (simp_all)

subsection ‹Relating left-quotients and partial derivatives›

lemma Deriv_pderiv:
  shows "Deriv c (lang r) =  (lang ` pderiv c r)"
by (induct r) (auto simp add: nullable_iff conc_UNION_distrib)

lemma Derivs_pderivs:
  shows "Derivs s (lang r) =  (lang ` pderivs s r)"
proof (induct s arbitrary: r)
  case (Cons c s)
  have ih: "r. Derivs s (lang r) =  (lang ` pderivs s r)" by fact
  have "Derivs (c # s) (lang r) = Derivs s (Deriv c (lang r))" by simp
  also have " = Derivs s ( (lang ` pderiv c r))" by (simp add: Deriv_pderiv)
  also have " = Derivss s (lang ` (pderiv c r))"
    by (auto simp add:  Derivs_def)
  also have " =  (lang ` (pderivs_set s (pderiv c r)))"
    using ih by auto
  also have " =  (lang ` (pderivs (c # s) r))" by simp
  finally show "Derivs (c # s) (lang r) =  (lang ` pderivs (c # s) r)" .
qed (simp add: Derivs_def)

subsection ‹Relating derivatives and partial derivatives›

lemma deriv_pderiv:
  shows " (lang ` (pderiv c r)) = lang (deriv c r)"
unfolding lang_deriv Deriv_pderiv by simp

lemma derivs_pderivs:
  shows " (lang ` (pderivs s r)) = lang (derivs s r)"
unfolding lang_derivs Derivs_pderivs by simp


subsection ‹Finiteness property of partial derivatives›

definition
  pderivs_lang :: "'a lang  'a rexp  'a rexp set"
where
  "pderivs_lang A r  x  A. pderivs x r"

lemma pderivs_lang_subsetI:
  assumes "s. s  A  pderivs s r  C"
  shows "pderivs_lang A r  C"
using assms unfolding pderivs_lang_def by (rule UN_least)

lemma pderivs_lang_union:
  shows "pderivs_lang (A  B) r = (pderivs_lang A r  pderivs_lang B r)"
by (simp add: pderivs_lang_def)

lemma pderivs_lang_subset:
  shows "A  B  pderivs_lang A r  pderivs_lang B r"
by (auto simp add: pderivs_lang_def)

definition
  "UNIV1  UNIV - {[]}"

lemma pderivs_lang_Zero [simp]:
  shows "pderivs_lang UNIV1 Zero = {}"
unfolding UNIV1_def pderivs_lang_def by auto

lemma pderivs_lang_One [simp]:
  shows "pderivs_lang UNIV1 One = {}"
unfolding UNIV1_def pderivs_lang_def by (auto split: if_splits)

lemma pderivs_lang_Atom [simp]:
  shows "pderivs_lang UNIV1 (Atom c) = {One}"
unfolding UNIV1_def pderivs_lang_def 
apply(auto)
apply(frule rev_subsetD)
apply(rule pderivs_Atom)
apply(simp)
apply(case_tac xa)
apply(auto split: if_splits)
done

lemma pderivs_lang_Plus [simp]:
  shows "pderivs_lang UNIV1 (Plus r1 r2) = pderivs_lang UNIV1 r1  pderivs_lang UNIV1 r2"
unfolding UNIV1_def pderivs_lang_def by auto


text ‹Non-empty suffixes of a string (needed for the cases of @{const Times} and @{const Star} below)›

definition
  "PSuf s  {v. v  []  (u. u @ v = s)}"

lemma PSuf_snoc:
  shows "PSuf (s @ [c]) = (PSuf s) @@ {[c]}  {[c]}"
unfolding PSuf_def conc_def
by (auto simp add: append_eq_append_conv2 append_eq_Cons_conv)

lemma PSuf_Union:
  shows "(v  PSuf s @@ {[c]}. f v) = (v  PSuf s. f (v @ [c]))"
by (auto simp add: conc_def)

lemma pderivs_lang_snoc:
  shows "pderivs_lang (PSuf s @@ {[c]}) r = (pderiv_set c (pderivs_lang (PSuf s) r))"
unfolding pderivs_lang_def
by (simp add: PSuf_Union pderivs_snoc)

lemma pderivs_Times:
  shows "pderivs s (Times r1 r2)  Timess (pderivs s r1) r2  (pderivs_lang (PSuf s) r2)"
proof (induct s rule: rev_induct)
  case (snoc c s)
  have ih: "pderivs s (Times r1 r2)  Timess (pderivs s r1) r2  (pderivs_lang (PSuf s) r2)" 
    by fact
  have "pderivs (s @ [c]) (Times r1 r2) = pderiv_set c (pderivs s (Times r1 r2))" 
    by (simp add: pderivs_snoc)
  also have "  pderiv_set c (Timess (pderivs s r1) r2  (pderivs_lang (PSuf s) r2))"
    using ih by fastforce
  also have " = pderiv_set c (Timess (pderivs s r1) r2)  pderiv_set c (pderivs_lang (PSuf s) r2)"
    by (simp)
  also have " = pderiv_set c (Timess (pderivs s r1) r2)  pderivs_lang (PSuf s @@ {[c]}) r2"
    by (simp add: pderivs_lang_snoc)
  also 
  have "  pderiv_set c (Timess (pderivs s r1) r2)  pderiv c r2  pderivs_lang (PSuf s @@ {[c]}) r2"
    by auto
  also 
  have "  Timess (pderiv_set c (pderivs s r1)) r2  pderiv c r2  pderivs_lang (PSuf s @@ {[c]}) r2"
    by (auto simp add: if_splits)
  also have " = Timess (pderivs (s @ [c]) r1) r2  pderiv c r2  pderivs_lang (PSuf s @@ {[c]}) r2"
    by (simp add: pderivs_snoc)
  also have "  Timess (pderivs (s @ [c]) r1) r2  pderivs_lang (PSuf (s @ [c])) r2"
    unfolding pderivs_lang_def by (auto simp add: PSuf_snoc)  
  finally show ?case .
qed (simp) 

lemma pderivs_lang_Times_aux1:
  assumes a: "s  UNIV1"
  shows "pderivs_lang (PSuf s) r  pderivs_lang UNIV1 r"
using a unfolding UNIV1_def PSuf_def pderivs_lang_def by auto

lemma pderivs_lang_Times_aux2:
  assumes a: "s  UNIV1"
  shows "Timess (pderivs s r1) r2  Timess (pderivs_lang UNIV1 r1) r2"
using a unfolding pderivs_lang_def by auto

lemma pderivs_lang_Times:
  shows "pderivs_lang UNIV1 (Times r1 r2)  Timess (pderivs_lang UNIV1 r1) r2  pderivs_lang UNIV1 r2"
apply(rule pderivs_lang_subsetI)
apply(rule subset_trans)
apply(rule pderivs_Times)
using pderivs_lang_Times_aux1 pderivs_lang_Times_aux2
apply auto
apply blast
done

lemma pderivs_Star:
  assumes a: "s  []"
  shows "pderivs s (Star r)  Timess (pderivs_lang (PSuf s) r) (Star r)"
using a
proof (induct s rule: rev_induct)
  case (snoc c s)
  have ih: "s  []  pderivs s (Star r)  Timess (pderivs_lang (PSuf s) r) (Star r)" by fact
  { assume asm: "s  []"
    have "pderivs (s @ [c]) (Star r) = pderiv_set c (pderivs s (Star r))" by (simp add: pderivs_snoc)
    also have "  pderiv_set c (Timess (pderivs_lang (PSuf s) r) (Star r))"
      using ih[OF asm] by fast
    also have "  Timess (pderiv_set c (pderivs_lang (PSuf s) r)) (Star r)  pderiv c (Star r)"
      by (auto split: if_splits)
    also have "  Timess (pderivs_lang (PSuf (s @ [c])) r) (Star r)  (Timess (pderiv c r) (Star r))"
      by (simp only: PSuf_snoc pderivs_lang_snoc pderivs_lang_union)
         (auto simp add: pderivs_lang_def)
    also have " = Timess (pderivs_lang (PSuf (s @ [c])) r) (Star r)"
      by (auto simp add: PSuf_snoc PSuf_Union pderivs_snoc pderivs_lang_def)
    finally have ?case .
  }
  moreover
  { assume asm: "s = []"
    then have ?case by (auto simp add: pderivs_lang_def pderivs_snoc PSuf_def)
  }
  ultimately show ?case by blast
qed (simp)

lemma pderivs_lang_Star:
  shows "pderivs_lang UNIV1 (Star r)  Timess (pderivs_lang UNIV1 r) (Star r)"
apply(rule pderivs_lang_subsetI)
apply(rule subset_trans)
apply(rule pderivs_Star)
apply(simp add: UNIV1_def)
apply(simp add: UNIV1_def PSuf_def)
apply(auto simp add: pderivs_lang_def)
done

lemma finite_Timess [simp]:
  assumes a: "finite A"
  shows "finite (Timess A r)"
using a by auto

lemma finite_pderivs_lang_UNIV1:
  shows "finite (pderivs_lang UNIV1 r)"
apply(induct r)
apply(simp_all add: 
  finite_subset[OF pderivs_lang_Times]
  finite_subset[OF pderivs_lang_Star])
done
    
lemma pderivs_lang_UNIV:
  shows "pderivs_lang UNIV r = pderivs [] r  pderivs_lang UNIV1 r"
unfolding UNIV1_def pderivs_lang_def
by blast

lemma finite_pderivs_lang_UNIV:
  shows "finite (pderivs_lang UNIV r)"
unfolding pderivs_lang_UNIV
by (simp add: finite_pderivs_lang_UNIV1)

lemma finite_pderivs_lang:
  shows "finite (pderivs_lang A r)"
by (metis finite_pderivs_lang_UNIV pderivs_lang_subset rev_finite_subset subset_UNIV)


text‹The following relationship between the alphabetic width of regular expressions
(called awidth› below) and the number of partial derivatives was proved
by Antimirov~\cite{Antimirov95} and formalized by Max Haslbeck.›

fun awidth :: "'a rexp  nat" where
"awidth Zero = 0" |
"awidth One = 0" |
"awidth (Atom a) = 1" |
"awidth (Plus r1 r2) = awidth r1 + awidth r2" |
"awidth (Times r1 r2) = awidth r1 + awidth r2" |
"awidth (Star r1) = awidth r1"

lemma card_Timess_pderivs_lang_le:
  "card (Timess (pderivs_lang A r) s)  card (pderivs_lang A r)"
  using finite_pderivs_lang unfolding Timess_eq_image by (rule card_image_le)

lemma card_pderivs_lang_UNIV1_le_awidth: "card (pderivs_lang UNIV1 r)  awidth r"
proof (induction r)
  case (Plus r1 r2)
  have "card (pderivs_lang UNIV1 (Plus r1 r2)) = card (pderivs_lang UNIV1 r1  pderivs_lang UNIV1 r2)" by simp
  also have "  card (pderivs_lang UNIV1 r1) + card (pderivs_lang UNIV1 r2)"
    by(simp add: card_Un_le)
  also have "  awidth (Plus r1 r2)" using Plus.IH by simp
  finally show ?case .
next
  case (Times r1 r2)
  have "card (pderivs_lang UNIV1 (Times r1 r2))  card (Timess (pderivs_lang UNIV1 r1) r2  pderivs_lang UNIV1 r2)"
    by (simp add: card_mono finite_pderivs_lang pderivs_lang_Times)
  also have "  card (Timess (pderivs_lang UNIV1 r1) r2) + card (pderivs_lang UNIV1 r2)"
    by (simp add: card_Un_le)
  also have "  card (pderivs_lang UNIV1 r1) + card (pderivs_lang UNIV1 r2)"
    by (simp add: card_Timess_pderivs_lang_le)
  also have "  awidth (Times r1 r2)" using Times.IH by simp
  finally show ?case .
next
  case (Star r)
  have "card (pderivs_lang UNIV1 (Star r))  card (Timess (pderivs_lang UNIV1 r) (Star r))"
    by (simp add: card_mono finite_pderivs_lang pderivs_lang_Star)
  also have "  card (pderivs_lang UNIV1 r)" by (rule card_Timess_pderivs_lang_le)
  also have "  awidth (Star r)" by (simp add: Star.IH)
  finally show ?case .
qed (auto)

text‹Antimirov's Theorem 3.4:›
theorem card_pderivs_lang_UNIV_le_awidth: "card (pderivs_lang UNIV r)  awidth r + 1"
proof -
  have "card (insert r (pderivs_lang UNIV1 r))  Suc (card (pderivs_lang UNIV1 r))"
    by(auto simp: card_insert_if[OF finite_pderivs_lang_UNIV1])
  also have "  Suc (awidth r)" by(simp add: card_pderivs_lang_UNIV1_le_awidth)
  finally show ?thesis by(simp add: pderivs_lang_UNIV)
qed 

text‹Antimirov's Corollary 3.5:›
corollary card_pderivs_lang_le_awidth: "card (pderivs_lang A r)  awidth r + 1"
by(rule order_trans[OF
  card_mono[OF finite_pderivs_lang_UNIV pderivs_lang_subset[OF subset_UNIV]]
  card_pderivs_lang_UNIV_le_awidth])

end

Theory pEquivalence_Checking

section ‹Deciding Regular Expression Equivalence (2)›

theory pEquivalence_Checking
imports Equivalence_Checking Derivatives
begin

text‹\noindent Similar to theory @{theory "Regular-Sets.Equivalence_Checking"}, but
with partial derivatives instead of derivatives.›

text‹Lifting many notions to sets:›

definition "Lang R == UN r:R. lang r"
definition "Nullable R == EX r:R. nullable r"
definition "Pderiv a R == UN r:R. pderiv a r"
definition "Atoms R == UN r:R. atoms r"

(* FIXME: rename pderiv_set in Derivatives to Pderiv and drop the one above *)

lemma Atoms_pderiv: "Atoms(pderiv a r)  atoms r"
apply (induct r)
apply (auto simp: Atoms_def UN_subset_iff)
apply (fastforce)+
done

lemma Atoms_Pderiv: "Atoms(Pderiv a R)  Atoms R"
using Atoms_pderiv by (fastforce simp: Atoms_def Pderiv_def)

lemma pderiv_no_occurrence: 
  "x  atoms r  pderiv x r = {}"
by (induct r) auto

lemma Pderiv_no_occurrence: 
  "x  Atoms R  Pderiv x R = {}"
by(auto simp:pderiv_no_occurrence Atoms_def Pderiv_def)

lemma Deriv_Lang: "Deriv c (Lang R) = Lang (Pderiv c R)"
by(auto simp: Deriv_pderiv Pderiv_def Lang_def)

lemma Nullable_pderiv[simp]: "Nullable(pderivs w r) = (w : lang r)"
apply(induction w arbitrary: r)
apply (simp add: Nullable_def nullable_iff singleton_iff)
using eqset_imp_iff[OF Deriv_pderiv[where 'a = 'a]]
apply (simp add: Nullable_def Deriv_def)
done


type_synonym 'a Rexp_pair = "'a rexp set * 'a rexp set"
type_synonym 'a Rexp_pairs = "'a Rexp_pair list"

definition is_Bisimulation :: "'a list  'a Rexp_pairs  bool"
where
"is_Bisimulation as ps =
  ((R,S) set ps. Atoms R  Atoms S  set as 
    (Nullable R  Nullable S) 
    (aset as. (Pderiv a R, Pderiv a S)  set ps))"

lemma Bisim_Lang_eq:
assumes Bisim: "is_Bisimulation as ps"
assumes "(R, S)  set ps"
shows "Lang R = Lang S"
proof -
  define ps' where "ps' = ({}, {}) # ps"
  from Bisim have Bisim': "is_Bisimulation as ps'"
    by (fastforce simp: ps'_def is_Bisimulation_def UN_subset_iff Pderiv_def Atoms_def)
  let ?R = "λK L. ((R,S)set ps'. K = Lang R  L = Lang S)"
  show ?thesis
  proof (rule language_coinduct[where R="?R"])
    from (R,S)  set ps
    have "(R,S)  set ps'" by (auto simp: ps'_def)
    thus "?R (Lang R) (Lang S)" by auto
  next
    fix K L assume "?R K L"
    then obtain R S where rs: "(R, S)  set ps'"
      and KL: "K = Lang R" "L = Lang S" by auto
    with Bisim' have "Nullable R  Nullable S"
      by (auto simp: is_Bisimulation_def)
    thus "[]  K  []  L"
      by (auto simp: nullable_iff KL Nullable_def Lang_def)
    fix a
    show "?R (Deriv a K) (Deriv a L)"
    proof cases
      assume "a  set as"
      with rs Bisim'
      have "(Pderiv a R, Pderiv a S)  set ps'"
        by (auto simp: is_Bisimulation_def)
      thus ?thesis by (fastforce simp: KL Deriv_Lang)
    next
      assume "a  set as"
      with Bisim' rs
      have "a  Atoms R  Atoms S"
        by (fastforce simp: is_Bisimulation_def UN_subset_iff)
      then have "Pderiv a R = {}" "Pderiv a S = {}"
        by (metis Pderiv_no_occurrence Un_iff)+
      then have "Deriv a K = Lang {}" "Deriv a L = Lang {}" 
        unfolding KL Deriv_Lang by auto
      thus ?thesis by (auto simp: ps'_def)
    qed
  qed
qed


subsection ‹Closure computation›

fun test :: "'a Rexp_pairs * 'a Rexp_pairs  bool" where
"test (ws, ps) = (case ws of []   False | (R,S)#_  Nullable R = Nullable S)"

fun step :: "'a list 
  'a Rexp_pairs * 'a Rexp_pairs  'a Rexp_pairs * 'a Rexp_pairs"
where "step as (ws,ps) =
    (let
      (R,S) = hd ws;
      ps' = (R,S) # ps;
      succs = map (λa. (Pderiv a R, Pderiv a S)) as;
      new = filter (λp. p  set ps  set ws) succs
    in (remdups new @ tl ws, ps'))"

definition closure ::
  "'a list  'a Rexp_pairs * 'a Rexp_pairs
    ('a Rexp_pairs * 'a Rexp_pairs) option" where
"closure as = while_option test (step as)"

definition pre_Bisim :: "'a list  'a rexp set  'a rexp set 
 'a Rexp_pairs * 'a Rexp_pairs  bool"
where
"pre_Bisim as R S = (λ(ws,ps).
 ((R,S)  set ws  set ps) 
 ((R,S) set ws  set ps. Atoms R  Atoms S  set as) 
 ((R,S) set ps. (Nullable R  Nullable S) 
   (aset as. (Pderiv a R, Pderiv a S)  set ps  set ws)))"

lemma step_set_eq: " test (ws,ps); step as (ws,ps) = (ws',ps') 
   set ws'  set ps' =
     set ws  set ps
      (aset as. {(Pderiv a (fst(hd ws)), Pderiv a (snd(hd ws)))})"
by(auto split: list.splits)

theorem closure_sound:
assumes result: "closure as ([(R,S)],[]) = Some([],ps)"
and atoms: "Atoms R  Atoms S  set as"
shows "Lang R = Lang S"
proof-
  { fix st
    have "pre_Bisim as R S st  test st  pre_Bisim as R S (step as st)"
    unfolding pre_Bisim_def
    proof(split prod.splits, elim case_prodE conjE, intro allI impI conjI, goal_cases)
      case 1 thus ?case by(auto split: list.splits)
    next
      case prems: (2 ws ps ws' ps')
      note prems(2)[simp]
      from ‹test st obtain wstl R S where [simp]: "ws = (R,S)#wstl"
        by (auto split: list.splits)
      from ‹step as st = (ws',ps') obtain P where [simp]: "ps' = (R,S) # ps"
        and [simp]: "ws' = remdups(filter P (map (λa. (Pderiv a R, Pderiv a S)) as)) @ wstl"
        by auto
      have "(R',S')set wstl  set ps'. Atoms R'  Atoms S'  set as"
        using prems(4) by auto
      moreover
      have "aset as. Atoms(Pderiv a R)  Atoms(Pderiv a S)  set as"
        using prems(4) by simp (metis (lifting) Atoms_Pderiv order_trans)
      ultimately show ?case by simp blast
    next
      case 3 thus ?case
        apply (clarsimp simp: image_iff split: prod.splits list.splits)
        by hypsubst_thin metis
    qed
  }
  moreover
  from atoms
  have "pre_Bisim as R S ([(R,S)],[])" by (simp add: pre_Bisim_def)
  ultimately have pre_Bisim_ps: "pre_Bisim as R S ([],ps)"
    by (rule while_option_rule[OF _ result[unfolded closure_def]])
  then have "is_Bisimulation as ps" "(R,S)  set ps"
    by (auto simp: pre_Bisim_def is_Bisimulation_def)
  thus "Lang R = Lang S" by (rule Bisim_Lang_eq)
qed


subsection ‹The overall procedure›

definition check_eqv :: "'a rexp  'a rexp  bool"
where
"check_eqv r s =
  (case closure (add_atoms r (add_atoms s [])) ([({r}, {s})], []) of
     Some([],_)  True | _  False)"

lemma soundness: assumes "check_eqv r s" shows "lang r = lang s"
proof -
  let ?as = "add_atoms r (add_atoms s [])"
  obtain ps where 1: "closure ?as ([({r},{s})],[]) = Some([],ps)"
    using assms by (auto simp: check_eqv_def split:option.splits list.splits)
  then have "lang r = lang s"
    by(rule closure_sound[of _ "{r}" "{s}", simplified Lang_def, simplified])
      (auto simp: set_add_atoms Atoms_def)
  thus "lang r = lang s" by simp
qed

text‹Test:›
lemma "check_eqv
  (Plus One (Times (Atom 0) (Star(Atom 0))))
  (Star(Atom(0::nat)))"
by eval


subsection "Termination and Completeness"

definition PDERIVS :: "'a rexp set => 'a rexp set" where
"PDERIVS R = (UN r:R. pderivs_lang UNIV r)"

lemma PDERIVS_incr[simp]: "R  PDERIVS R"
apply(auto simp add: PDERIVS_def pderivs_lang_def)
by (metis pderivs.simps(1) insertI1)

lemma Pderiv_PDERIVS: assumes "R'  PDERIVS R" shows "Pderiv a R'  PDERIVS R"
proof
  fix r assume "r : Pderiv a R'"
  then obtain r' where "r' : R'" "r : pderiv a r'" by(auto simp: Pderiv_def)
  from r' : R' R'  PDERIVS R obtain s w where "s : R" "r' : pderivs w s"
    by(auto simp: PDERIVS_def pderivs_lang_def)
  hence "r  pderivs (w @ [a]) s" using r : pderiv a r' by(auto simp add:pderivs_snoc)
  thus "r : PDERIVS R" using s : R by(auto simp: PDERIVS_def pderivs_lang_def)
qed

lemma finite_PDERIVS: "finite R  finite(PDERIVS R)"
by(simp add: PDERIVS_def finite_pderivs_lang_UNIV)

lemma closure_Some: assumes "finite R0" "finite S0" shows "p. closure as ([(R0,S0)],[]) = Some p"
proof(unfold closure_def)
  let ?Inv = "%(ws,bs).  distinct ws  (ALL (R,S) : set ws. R  PDERIVS R0  S  PDERIVS S0  (R,S)  set bs)"
  let ?m1 = "%bs. Pow(PDERIVS R0) × Pow(PDERIVS S0) - set bs"
  let ?m2 = "%(ws,bs). card(?m1 bs)"
  have Inv0: "?Inv ([(R0, S0)], [])" by simp
  { fix s assume "test s" "?Inv s"
    obtain ws bs where [simp]: "s = (ws,bs)" by fastforce
    from ‹test s obtain R S ws' where [simp]: "ws = (R,S)#ws'"
      by(auto split: prod.splits list.splits)
    let ?bs' = "(R,S) # bs"
    let ?succs = "map (λa. (Pderiv a R, Pderiv a S)) as"
    let ?new = "filter (λp. p  set bs  set ws) ?succs"
    let ?ws' = "remdups ?new @ ws'"
    have *: "?Inv (step as s)"
    proof -
      from ?Inv s have "distinct ?ws'" by auto
      have "ALL (R,S) : set ?ws'. R  PDERIVS R0  S  PDERIVS S0  (R,S)  set ?bs'" using ?Inv s
        by(simp add: Ball_def image_iff) (metis Pderiv_PDERIVS)
      with ‹distinct ?ws' show ?thesis by(simp)
    qed
    have "?m2(step as s) < ?m2 s"
    proof -
      have "finite(?m1 bs)"
        by(metis assms finite_Diff finite_PDERIVS finite_cartesian_product finite_Pow_iff)
      moreover have "?m2(step as s) < ?m2 s" using ?Inv s
        by(auto intro: psubset_card_mono[OF ‹finite(?m1 bs)])
      then show ?thesis using ?Inv s by simp
    qed
    note * and this
  } note step = this
  show "p. while_option test (step as) ([(R0, S0)], []) = Some p" 
    by(rule measure_while_option_Some [where P = ?Inv and f = ?m2, OF _ Inv0])(simp add: step)
qed

theorem closure_Some_Inv: assumes "closure as ([({r},{s})],[]) = Some p"
shows "(R,S)set(fst p). w. R = pderivs w r  S = pderivs w s" (is "?Inv p")
proof-
  from assms have 1: "while_option test (step as) ([({r},{s})],[]) = Some p"
    by(simp add: closure_def)
  have Inv0: "?Inv ([({r},{s})],[])" by simp (metis pderivs.simps(1))
  { fix p assume "?Inv p" "test p"
    obtain ws bs where [simp]: "p = (ws,bs)" by fastforce
    from ‹test p obtain R S ws' where [simp]: "ws = (R,S)#ws'"
      by(auto split: prod.splits list.splits)
    let ?succs = "map (λa. (Pderiv a R, Pderiv a S)) as"
    let ?new = "filter (λp. p  set bs  set ws) ?succs"
    let ?ws' = "remdups ?new @ ws'"
    from ?Inv p obtain w where [simp]: "R = pderivs w r" "S = pderivs w s"
      by auto
    { fix x assume "x : set as"
      have "EX w. Pderiv x R = pderivs w r  Pderiv x S = pderivs w s"
        by(rule_tac x="w@[x]" in exI)(simp add: pderivs_append Pderiv_def)
    }
    with ?Inv p have "?Inv (step as p)" by auto
  } note Inv_step = this
  show ?thesis
    apply(rule while_option_rule[OF _ 1])
    apply(erule (1) Inv_step)
    apply(rule Inv0)
    done
qed

lemma closure_complete: assumes "lang r = lang s"
 shows "EX bs. closure as ([({r},{s})],[]) = Some([],bs)" (is ?C)
proof(rule ccontr)
  assume "¬ ?C"
  then obtain R S ws bs
    where cl: "closure as ([({r},{s})],[]) = Some((R,S)#ws,bs)"
    using closure_Some[of "{r}" "{s}", simplified]
    by (metis (hide_lams, no_types) list.exhaust prod.exhaust)
  from assms closure_Some_Inv[OF this]
    while_option_stop[OF cl[unfolded closure_def]]
  show "False" by auto
qed

corollary completeness: "lang r = lang s  check_eqv r s"
by(auto simp add: check_eqv_def dest!: closure_complete
      split: option.split list.split)

end

Theory Regular_Exp2

(* Author: Tobias Nipkow *)

section "Extended Regular Expressions"

theory Regular_Exp2
imports Regular_Set
begin

datatype (atoms: 'a) rexp =
  is_Zero: Zero |
  is_One: One |
  Atom 'a |
  Plus "('a rexp)" "('a rexp)" |
  Times "('a rexp)" "('a rexp)" |
  Star "('a rexp)" |
  Not "('a rexp)" |
  Inter "('a rexp)" "('a rexp)"

context
fixes S :: "'a set"
begin

primrec lang :: "'a rexp => 'a lang" where
"lang Zero = {}" |
"lang One = {[]}" |
"lang (Atom a) = {[a]}" |
"lang (Plus r s) = (lang r) Un (lang s)" |
"lang (Times r s) = conc (lang r) (lang s)" |
"lang (Star r) = star(lang r)" |
"lang (Not r) = lists S - lang r" |
"lang (Inter r s) = (lang r Int lang s)"

end

lemma lang_subset_lists: "atoms r  S  lang S r  lists S"
by(induction r)(auto simp: conc_subset_lists star_subset_lists)

primrec nullable :: "'a rexp  bool" where
"nullable Zero = False" |
"nullable One = True" |
"nullable (Atom c) = False" |
"nullable (Plus r1 r2) = (nullable r1  nullable r2)" |
"nullable (Times r1 r2) = (nullable r1  nullable r2)" |
"nullable (Star r) = True" |
"nullable (Not r) = (¬ (nullable r))" |
"nullable (Inter r s) = (nullable r  nullable s)"

lemma nullable_iff: "nullable r  []  lang S r"
by (induct r) (auto simp add: conc_def split: if_splits)

end

Theory Equivalence_Checking2

section ‹Deciding Equivalence of Extended Regular Expressions›

theory Equivalence_Checking2
imports Regular_Exp2 "HOL-Library.While_Combinator"
begin

subsection ‹Term ordering›

fun le_rexp :: "nat rexp  nat rexp  bool"
where
  "le_rexp Zero _ = True"
| "le_rexp _ Zero = False"
| "le_rexp One _ = True"
| "le_rexp _ One = False"
| "le_rexp (Atom a) (Atom b) = (a <= b)"
| "le_rexp (Atom _) _ = True"
| "le_rexp _ (Atom _) = False"
| "le_rexp (Star r) (Star s) = le_rexp r s"
| "le_rexp (Star _) _ = True"
| "le_rexp _ (Star _) = False"
| "le_rexp (Not r) (Not s) = le_rexp r s"
| "le_rexp (Not _) _ = True"
| "le_rexp _ (Not _) = False"
| "le_rexp (Plus r r') (Plus s s') =
    (if r = s then le_rexp r' s' else le_rexp r s)"
| "le_rexp (Plus _ _) _ = True"
| "le_rexp _ (Plus _ _) = False"
| "le_rexp (Times r r') (Times s s') =
    (if r = s then le_rexp r' s' else le_rexp r s)"
| "le_rexp (Times _ _) _ = True"
| "le_rexp _ (Times _ _) = False"
| "le_rexp (Inter r r') (Inter s s') =
    (if r = s then le_rexp r' s' else le_rexp r s)"

subsection ‹Normalizing operations›

text ‹associativity, commutativity, idempotence, zero›

fun nPlus :: "nat rexp  nat rexp  nat rexp"
where
  "nPlus Zero r = r"
| "nPlus r Zero = r"
| "nPlus (Plus r s) t = nPlus r (nPlus s t)"
| "nPlus r (Plus s t) =
     (if r = s then (Plus s t)
     else if le_rexp r s then Plus r (Plus s t)
     else Plus s (nPlus r t))"
| "nPlus r s =
     (if r = s then r
      else if le_rexp r s then Plus r s
      else Plus s r)"

lemma lang_nPlus[simp]: "lang S (nPlus r s) = lang S (Plus r s)"
by (induct r s rule: nPlus.induct) auto

text ‹associativity, zero, one›

fun nTimes :: "nat rexp  nat rexp  nat rexp"
where
  "nTimes Zero _ = Zero"
| "nTimes _ Zero = Zero"
| "nTimes One r = r"
| "nTimes r One = r"
| "nTimes (Times r s) t = Times r (nTimes s t)"
| "nTimes r s = Times r s"

lemma lang_nTimes[simp]: "lang S (nTimes r s) = lang S (Times r s)"
by (induct r s rule: nTimes.induct) (auto simp: conc_assoc)

text ‹more optimisations:›

fun nInter :: "nat rexp  nat rexp  nat rexp"
where
  "nInter Zero _ = Zero"
| "nInter _ Zero = Zero"
| "nInter r s = Inter r s"

lemma lang_nInter[simp]: "lang S (nInter r s) = lang S (Inter r s)"
by (induct r s rule: nInter.induct) (auto)

primrec norm :: "nat rexp  nat rexp"
where
  "norm Zero = Zero"
| "norm One = One"
| "norm (Atom a) = Atom a"
| "norm (Plus r s) = nPlus (norm r) (norm s)"
| "norm (Times r s) = nTimes (norm r) (norm s)"
| "norm (Star r) = Star (norm r)"
| "norm (Not r) = Not (norm r)"
| "norm (Inter r1 r2) = nInter (norm r1) (norm r2)"

lemma lang_norm[simp]: "lang S (norm r) = lang S r"
by (induct r) auto


subsection ‹Derivative›

primrec nderiv :: "nat  nat rexp  nat rexp"
where
  "nderiv _ Zero = Zero"
| "nderiv _ One = Zero"
| "nderiv a (Atom b) = (if a = b then One else Zero)"
| "nderiv a (Plus r s) = nPlus (nderiv a r) (nderiv a s)"
| "nderiv a (Times r s) =
    (let r's = nTimes (nderiv a r) s
     in if nullable r then nPlus r's (nderiv a s) else r's)"
| "nderiv a (Star r) = nTimes (nderiv a r) (Star r)"
| "nderiv a (Not r) = Not (nderiv a r)"
| "nderiv a (Inter r1 r2) = nInter (nderiv a r1) (nderiv a r2)"

lemma lang_nderiv: "a:S  lang S (nderiv a r) = Deriv a (lang S r)"
by (induct r) (auto simp: Let_def nullable_iff[where S=S])

lemma atoms_nPlus[simp]: "atoms (nPlus r s) = atoms r  atoms s"
by (induct r s rule: nPlus.induct) auto

lemma atoms_nTimes: "atoms (nTimes r s)  atoms r  atoms s"
by (induct r s rule: nTimes.induct) auto

lemma atoms_nInter: "atoms (nInter r s)  atoms r  atoms s"
by (induct r s rule: nInter.induct) auto

lemma atoms_norm: "atoms (norm r)  atoms r"
by (induct r) (auto dest!:subsetD[OF atoms_nTimes]subsetD[OF atoms_nInter])

lemma atoms_nderiv: "atoms (nderiv a r)  atoms r"
by (induct r) (auto simp: Let_def dest!:subsetD[OF atoms_nTimes]subsetD[OF atoms_nInter])


subsection ‹Bisimulation between languages and regular expressions›

context
fixes S :: "'a set"
begin

coinductive bisimilar :: "'a lang  'a lang  bool" where
"K  lists S  L  lists S
  ([]  K  []  L) 
  (x. x:S  bisimilar (Deriv x K) (Deriv x L))
  bisimilar K L"

lemma equal_if_bisimilar:
assumes "K  lists S" "L  lists S" "bisimilar K L" shows "K = L"
proof (rule set_eqI)
  fix w
  from assms show "w  K  w  L"
  proof (induction w arbitrary: K L)
    case Nil thus ?case by (auto elim: bisimilar.cases)
  next
    case (Cons a w K L)
    show ?case
    proof cases
      assume "a : S"
      with ‹bisimilar K L have "bisimilar (Deriv a K) (Deriv a L)"
        by (auto elim: bisimilar.cases)
      then have "w  Deriv a K  w  Deriv a L"
        by (metis Cons.IH bisimilar.cases)
      thus ?case by (auto simp: Deriv_def)
    next
      assume "a  S"
      thus ?case using Cons.prems by auto
    qed
  qed
qed

lemma language_coinduct:
fixes R (infixl "" 50)
assumes "K L. K  L  K  lists S  L  lists S"
assumes "K  L"
assumes "K L. K  L  ([]  K  []  L)"
assumes "K L x. K  L  x : S  Deriv x K  Deriv x L"
shows "K = L"
apply (rule equal_if_bisimilar)
apply (metis assms(1) assms(2))
apply (metis assms(1) assms(2))
apply (rule bisimilar.coinduct[of R, OF K  L])
apply (auto simp: assms)
done

end

type_synonym rexp_pair = "nat rexp * nat rexp"
type_synonym rexp_pairs = "rexp_pair list"

definition is_bisimulation :: "nat list  rexp_pairs  bool"
where
"is_bisimulation as ps =
  ((r,s) set ps. (atoms r  atoms s  set as)  (nullable r  nullable s) 
    (aset as. (nderiv a r, nderiv a s)  set ps))"


lemma bisim_lang_eq:
assumes bisim: "is_bisimulation as ps"
assumes "(r, s)  set ps"
shows "lang (set as) r = lang (set as) s"
proof -
  let ?R = "λK L. ((r,s)set ps. K = lang (set as) r  L = lang (set as) s)"
  show ?thesis
  proof (rule language_coinduct[where R="?R" and S = "set as"])
    from (r, s)  set ps show "?R (lang (set as) r) (lang (set as) s)"
      by auto
  next
    fix K L assume "?R K L"
    then obtain r s where rs: "(r, s)  set ps"
      and KL: "K = lang (set as) r" "L = lang (set as) s" by auto
    with bisim have "nullable r  nullable s"
      by (auto simp: is_bisimulation_def)
    thus "[]  K  []  L" by (auto simp: nullable_iff[where S="set as"] KL)
  txt‹next case, but shared context›
    from bisim rs KL lang_subset_lists[of _ "set as"]
    show "K  lists (set as)  L  lists (set as)"
      unfolding is_bisimulation_def by blast
  txt‹next case, but shared context›
    fix a assume "a  set as"
    with rs bisim
    have "(nderiv a r, nderiv a s)  set ps"
      by (auto simp: is_bisimulation_def)
    thus "?R (Deriv a K) (Deriv a L)" using a  set as
      by (force simp: KL lang_nderiv)
  qed
qed

subsection ‹Closure computation›

fun test :: "rexp_pairs * rexp_pairs  bool"
where "test (ws, ps) = (case ws of []   False | (p,q)#_  nullable p = nullable q)"

fun step :: "nat list  rexp_pairs * rexp_pairs  rexp_pairs * rexp_pairs"
where "step as (ws,ps) =
    (let 
      (r, s) = hd ws;
      ps' = (r, s) # ps;
      succs = map (λa. (nderiv a r, nderiv a s)) as;
      new = filter (λp. p  set ps'  set ws) succs
    in (new @ tl ws, ps'))"

definition closure ::
  "nat list  rexp_pairs * rexp_pairs
    (rexp_pairs * rexp_pairs) option" where
"closure as = while_option test (step as)"

definition pre_bisim :: "nat list  nat rexp  nat rexp 
 rexp_pairs * rexp_pairs  bool"
where
"pre_bisim as r s = (λ(ws,ps).
 ((r, s)  set ws  set ps) 
 ((r,s) set ws  set ps. atoms r  atoms s  set as) 
 ((r,s) set ps. (nullable r  nullable s) 
   (aset as. (nderiv a r, nderiv a s)  set ps  set ws)))"

theorem closure_sound:
assumes result: "closure as ([(r,s)],[]) = Some([],ps)"
and atoms: "atoms r  atoms s  set as"
shows "lang (set as) r = lang (set as) s"
proof-
  { fix st have "pre_bisim as r s st  test st  pre_bisim as r s (step as st)"
      unfolding pre_bisim_def
      by (cases st) (auto simp: split_def split: list.splits prod.splits
        dest!: subsetD[OF atoms_nderiv]) }
  moreover
  from atoms
  have "pre_bisim as r s ([(r,s)],[])" by (simp add: pre_bisim_def)
  ultimately have pre_bisim_ps: "pre_bisim as r s ([],ps)"
    by (rule while_option_rule[OF _ result[unfolded closure_def]])
  then have "is_bisimulation as ps" "(r, s)  set ps"
    by (auto simp: pre_bisim_def is_bisimulation_def)
  thus "lang (set as) r = lang (set as) s" by (rule bisim_lang_eq)
qed


subsection ‹The overall procedure›

primrec add_atoms :: "nat rexp  nat list  nat list"
where
  "add_atoms Zero = id"
| "add_atoms One = id"
| "add_atoms (Atom a) = List.insert a"
| "add_atoms (Plus r s) = add_atoms s o add_atoms r"
| "add_atoms (Times r s) = add_atoms s o add_atoms r"
| "add_atoms (Not r) = add_atoms r"
| "add_atoms (Inter r s) = add_atoms s o add_atoms r"
| "add_atoms (Star r) = add_atoms r"

lemma set_add_atoms: "set (add_atoms r as) = atoms r  set as"
by (induct r arbitrary: as) auto

definition check_eqv :: "nat list  nat rexp  nat rexp  bool"
where
"check_eqv as r s  set(add_atoms r (add_atoms s []))  set as 
  (case closure as ([(norm r, norm s)], []) of
     Some([],_)  True | _  False)"

lemma soundness: 
assumes "check_eqv as r s" shows "lang (set as) r = lang (set as) s"
proof -
  obtain ps where cl: "closure as ([(norm r,norm s)],[]) = Some([],ps)"
    and at: "atoms r  atoms s  set as"
    using assms
    by (auto simp: check_eqv_def set_add_atoms split:option.splits list.splits)
  hence "atoms(norm r)  atoms(norm s)  set as"
    using atoms_norm by blast
  hence "lang (set as) (norm r) = lang (set as) (norm s)"
    by (rule closure_sound[OF cl])
  thus "lang (set as) r = lang (set as) s" by simp
qed

lemma "check_eqv [0] (Plus One (Times (Atom 0) (Star(Atom 0)))) (Star(Atom 0))"
by eval

lemma "check_eqv [0,1] (Not(Atom 0))
  (Plus One (Times (Plus (Atom 1) (Times (Atom 0) (Plus (Atom 0) (Atom 1))))
                   (Star(Plus (Atom 0) (Atom 1)))))"
by eval

lemma "check_eqv [0] (Atom 0) (Inter (Star (Atom 0)) (Atom 0))"
by eval

end