Session Myhill-Nerode

Theory Folds

(* Author: Xingyuan Zhang, Chunhan Wu, Christian Urban *)
theory Folds
imports "Regular-Sets.Regular_Exp"
begin

section ‹``Summation'' for regular expressions›

text ‹
  To obtain equational system out of finite set of equivalence classes, a fold operation
  on finite sets folds› is defined. The use of SOME› makes folds›
  more robust than the fold› in the Isabelle library. The expression folds f›
  makes sense when f› is not associative› and commutitive›,
  while fold f› does not.  
›


definition 
  folds :: "('a  'b  'b)  'b  'a set  'b"
where
  "folds f z S  SOME x. fold_graph f z S x"

text ‹Plus-combination for a set of regular expressions›

abbreviation
  Setalt :: "'a rexp set  'a rexp" ("_" [1000] 999) 
where
  "A  folds Plus Zero A"

text ‹
  For finite sets, @{term Setalt} is preserved under @{term lang}.
›

lemma folds_plus_simp [simp]:
  fixes rs::"('a rexp) set"
  assumes a: "finite rs"
  shows "lang (rs) =  (lang ` rs)"
unfolding folds_def
apply(rule set_eqI)
apply(rule someI2_ex)
apply(rule_tac finite_imp_fold_graph[OF a])
apply(erule fold_graph.induct)
apply(auto)
done

end

Theory Myhill_1

(* Author: Xingyuan Zhang, Chunhan Wu, Christian Urban *)
theory Myhill_1
imports Folds
        "HOL-Library.While_Combinator" 
begin

section ‹First direction of MN: finite partition ⇒ regular language›

notation 
  conc (infixr "" 100) and
  star ("_" [101] 102)

lemma Pair_Collect [simp]:
  shows "(x, y)  {(x, y). P x y}  P x y"
by simp

text ‹Myhill-Nerode relation›

definition
  str_eq :: "'a lang  ('a list × 'a list) set" ("_" [100] 100)
where
  "A  {(x, y).  (z. x @ z  A  y @ z  A)}"

abbreviation
  str_eq_applied :: "'a list  'a lang  'a list  bool" ("_ _ _")
where
  "x A y  (x, y)  A"

lemma str_eq_conv_Derivs:
  "str_eq A = {(u,v). Derivs u A = Derivs v A}"
  by (auto simp: str_eq_def Derivs_def)  

definition 
  finals :: "'a lang  'a lang set"
where
  "finals A  {A `` {s} | s . s  A}"

lemma lang_is_union_of_finals: 
  shows "A = (finals A)"
unfolding finals_def
unfolding Image_def
unfolding str_eq_def
by (auto) (metis append_Nil2)

lemma finals_in_partitions:
  shows "finals A  (UNIV // A)"
unfolding finals_def quotient_def
by auto

subsection ‹Equational systems›

text ‹The two kinds of terms in the rhs of equations.›

datatype 'a trm = 
   Lam "'a rexp"            (* Lambda-marker *)
 | Trn "'a lang" "'a rexp"     (* Transition *)

fun 
  lang_trm::"'a trm  'a lang"
where
  "lang_trm (Lam r) = lang r" 
| "lang_trm (Trn X r) = X  lang r"

fun 
  lang_rhs::"('a trm) set  'a lang"
where 
  "lang_rhs rhs =  (lang_trm ` rhs)"

lemma lang_rhs_set:
  shows "lang_rhs {Trn X r | r. P r} = {lang_trm (Trn X r) | r. P r}"
by (auto)

lemma lang_rhs_union_distrib:
  shows "lang_rhs A  lang_rhs B = lang_rhs (A  B)"
by simp


text ‹Transitions between equivalence classes›

definition 
  transition :: "'a lang  'a  'a lang  bool" ("_ __" [100,100,100] 100)
where
  "Y c X  Y  {[c]}  X"

text ‹Initial equational system›

definition
  "Init_rhs CS X   
      if ([]  X) then 
          {Lam One}  {Trn Y (Atom c) | Y c. Y  CS  Y c X}
      else 
          {Trn Y (Atom c)| Y c. Y  CS  Y c X}"

definition 
  "Init CS  {(X, Init_rhs CS X) | X.  X  CS}"


subsection ‹Arden Operation on equations›

fun 
  Append_rexp :: "'a rexp  'a trm  'a trm"
where
  "Append_rexp r (Lam rexp)   = Lam (Times rexp r)"
| "Append_rexp r (Trn X rexp) = Trn X (Times rexp r)"


definition
  "Append_rexp_rhs rhs rexp  (Append_rexp rexp) ` rhs"

definition 
  "Arden X rhs  
     Append_rexp_rhs (rhs - {Trn X r | r. Trn X r  rhs}) (Star ( {r. Trn X r  rhs}))"


subsection ‹Substitution Operation on equations›

definition 
  "Subst rhs X xrhs  
        (rhs - {Trn X r | r. Trn X r  rhs})  (Append_rexp_rhs xrhs ( {r. Trn X r  rhs}))"

definition
  Subst_all :: "('a lang × ('a trm) set) set  'a lang  ('a trm) set  ('a lang × ('a trm) set) set"
where
  "Subst_all ES X xrhs  {(Y, Subst yrhs X xrhs) | Y yrhs. (Y, yrhs)  ES}"

definition
  "Remove ES X xrhs  
      Subst_all  (ES - {(X, xrhs)}) X (Arden X xrhs)"


subsection ‹While-combinator and invariants›

definition 
  "Iter X ES  (let (Y, yrhs) = SOME (Y, yrhs). (Y, yrhs)  ES  X  Y
                in Remove ES Y yrhs)"

lemma IterI2:
  assumes "(Y, yrhs)  ES"
  and     "X  Y"
  and     "Y yrhs. (Y, yrhs)  ES; X  Y  Q (Remove ES Y yrhs)"
  shows "Q (Iter X ES)"
unfolding Iter_def using assms
by (rule_tac a="(Y, yrhs)" in someI2) (auto)

abbreviation
  "Cond ES  card ES  1"

definition 
  "Solve X ES  while Cond (Iter X) ES"


definition 
  "distinctness ES  
      X rhs rhs'. (X, rhs)  ES  (X, rhs')  ES  rhs = rhs'"

definition 
  "soundness ES  (X, rhs)  ES. X = lang_rhs rhs"

definition 
  "ardenable rhs  ( Y r. Trn Y r  rhs  []  lang r)"

definition 
  "ardenable_all ES  (X, rhs)  ES. ardenable rhs"

definition
  "finite_rhs ES  (X, rhs)  ES. finite rhs"

lemma finite_rhs_def2:
  "finite_rhs ES = ( X rhs. (X, rhs)  ES  finite rhs)"
unfolding finite_rhs_def by auto

definition 
  "rhss rhs  {X | X r. Trn X r  rhs}"

definition
  "lhss ES  {Y | Y yrhs. (Y, yrhs)  ES}"

definition 
  "validity ES  (X, rhs)  ES. rhss rhs  lhss ES"

lemma rhss_union_distrib:
  shows "rhss (A  B) = rhss A  rhss B"
by (auto simp add: rhss_def)

lemma lhss_union_distrib:
  shows "lhss (A  B) = lhss A  lhss B"
by (auto simp add: lhss_def)


definition 
  "invariant ES  finite ES
                 finite_rhs ES
                 soundness ES 
                 distinctness ES 
                 ardenable_all ES 
                 validity ES"


lemma invariantI:
  assumes "soundness ES" "finite ES" "distinctness ES" "ardenable_all ES" 
          "finite_rhs ES" "validity ES"
  shows "invariant ES"
using assms by (simp add: invariant_def)


declare [[simproc add: finite_Collect]]

lemma finite_Trn:
  assumes fin: "finite rhs"
  shows "finite {r. Trn Y r  rhs}"
using assms by (auto intro!: finite_vimageI simp add: inj_on_def)

lemma finite_Lam:
  assumes fin: "finite rhs"
  shows "finite {r. Lam r  rhs}"
using assms by (auto intro!: finite_vimageI simp add: inj_on_def)

lemma trm_soundness:
  assumes finite:"finite rhs"
  shows "lang_rhs ({Trn X r| r. Trn X r  rhs}) = X  (lang ({r. Trn X r  rhs}))"
proof -
  have "finite {r. Trn X r  rhs}" 
    by (rule finite_Trn[OF finite]) 
  then show "lang_rhs ({Trn X r| r. Trn X r  rhs}) = X  (lang ({r. Trn X r  rhs}))"
    by (simp only: lang_rhs_set lang_trm.simps) (auto simp add: conc_def)
qed

lemma lang_of_append_rexp:
  "lang_trm (Append_rexp r trm) = lang_trm trm  lang r"
by (induct rule: Append_rexp.induct)
   (auto simp add: conc_assoc)

lemma lang_of_append_rexp_rhs:
  "lang_rhs (Append_rexp_rhs rhs r) = lang_rhs rhs  lang r"
unfolding Append_rexp_rhs_def
by (auto simp add: conc_def lang_of_append_rexp)


subsection ‹Intial Equational Systems›

lemma defined_by_str:
  assumes "s  X" "X  UNIV // A" 
  shows "X = A `` {s}"
using assms
unfolding quotient_def Image_def str_eq_def 
by auto

lemma every_eqclass_has_transition:
  assumes has_str: "s @ [c]  X"
  and     in_CS:   "X  UNIV // A"
  obtains Y where "Y  UNIV // A" and "Y  {[c]}  X" and "s  Y"
proof -
  define Y where "Y = A `` {s}"
  have "Y  UNIV // A" 
    unfolding Y_def quotient_def by auto
  moreover
  have "X = A `` {s @ [c]}" 
    using has_str in_CS defined_by_str by blast
  then have "Y  {[c]}  X" 
    unfolding Y_def Image_def conc_def
    unfolding str_eq_def
    by clarsimp
  moreover
  have "s  Y" unfolding Y_def 
    unfolding Image_def str_eq_def by simp
  ultimately show thesis using that by blast
qed

lemma l_eq_r_in_eqs:
  assumes X_in_eqs: "(X, rhs)  Init (UNIV // A)"
  shows "X = lang_rhs rhs"
proof 
  show "X  lang_rhs rhs"
  proof
    fix x
    assume in_X: "x  X"
    { assume empty: "x = []"
      then have "x  lang_rhs rhs" using X_in_eqs in_X
        unfolding Init_def Init_rhs_def
        by auto
    }
    moreover
    { assume not_empty: "x  []"
      then obtain s c where decom: "x = s @ [c]"
        using rev_cases by blast
      have "X  UNIV // A" using X_in_eqs unfolding Init_def by auto
      then obtain Y where "Y  UNIV // A" "Y  {[c]}  X" "s  Y"
        using decom in_X every_eqclass_has_transition by metis
      then have "x  lang_rhs {Trn Y (Atom c)| Y c. Y  UNIV // A  Y c X}"
        unfolding transition_def
        using decom by (force simp add: conc_def)
      then have "x  lang_rhs rhs" using X_in_eqs in_X
        unfolding Init_def Init_rhs_def by simp
    }
    ultimately show "x  lang_rhs rhs" by blast
  qed
next
  show "lang_rhs rhs  X" using X_in_eqs
    unfolding Init_def Init_rhs_def transition_def
    by auto 
qed


lemma finite_Init_rhs: 
  fixes CS::"(('a::finite) lang) set"
  assumes finite: "finite CS"
  shows "finite (Init_rhs CS X)"
using assms unfolding Init_rhs_def transition_def by simp


lemma Init_ES_satisfies_invariant:
  fixes A::"(('a::finite) lang)"
  assumes finite_CS: "finite (UNIV // A)"
  shows "invariant (Init (UNIV // A))"
proof (rule invariantI)
  show "soundness (Init (UNIV // A))"
    unfolding soundness_def 
    using l_eq_r_in_eqs by auto
  show "finite (Init (UNIV // A))" using finite_CS
    unfolding Init_def by simp
  show "distinctness (Init (UNIV // A))"     
    unfolding distinctness_def Init_def by simp
  show "ardenable_all (Init (UNIV // A))"
    unfolding ardenable_all_def Init_def Init_rhs_def ardenable_def
   by auto 
  show "finite_rhs (Init (UNIV // A))"
    using finite_Init_rhs[OF finite_CS]
    unfolding finite_rhs_def Init_def by auto
  show "validity (Init (UNIV // A))"
    unfolding validity_def Init_def Init_rhs_def rhss_def lhss_def
    by auto
qed

subsection ‹Interations›

lemma Arden_preserves_soundness:
  assumes l_eq_r: "X = lang_rhs rhs"
  and not_empty: "ardenable rhs"
  and finite: "finite rhs"
  shows "X = lang_rhs (Arden X rhs)"
proof -
  define A where "A = lang ({r. Trn X r  rhs})"
  define b where "b = {Trn X r | r. Trn X r  rhs}"
  define B where "B = lang_rhs (rhs - b)"
  have not_empty2: "[]  A" 
    using finite_Trn[OF finite] not_empty
    unfolding A_def ardenable_def by simp
  have "X = lang_rhs rhs" using l_eq_r by simp
  also have " = lang_rhs (b  (rhs - b))" unfolding b_def by auto
  also have " = lang_rhs b  B" unfolding B_def by (simp only: lang_rhs_union_distrib)
  also have " = X  A  B"
    unfolding b_def
    unfolding trm_soundness[OF finite]
    unfolding A_def
    by blast
  finally have "X = X  A  B" . 
  then have "X = B  A"
    by (simp add: reversed_Arden[OF not_empty2])
  also have " = lang_rhs (Arden X rhs)"
    unfolding Arden_def A_def B_def b_def
    by (simp only: lang_of_append_rexp_rhs lang.simps)
  finally show "X = lang_rhs (Arden X rhs)" by simp
qed 

lemma Append_preserves_finite:
  "finite rhs  finite (Append_rexp_rhs rhs r)"
by (auto simp: Append_rexp_rhs_def)

lemma Arden_preserves_finite:
  "finite rhs  finite (Arden X rhs)"
by (auto simp: Arden_def Append_preserves_finite)

lemma Append_preserves_ardenable:
  "ardenable rhs  ardenable (Append_rexp_rhs rhs r)"
apply (auto simp: ardenable_def Append_rexp_rhs_def)
by (case_tac x, auto simp: conc_def)

lemma ardenable_set_sub:
  "ardenable rhs  ardenable (rhs - A)"
by (auto simp:ardenable_def)

lemma ardenable_set_union:
  "ardenable rhs; ardenable rhs'  ardenable (rhs  rhs')"
by (auto simp:ardenable_def)

lemma Arden_preserves_ardenable:
  "ardenable rhs  ardenable (Arden X rhs)"
by (simp only:Arden_def Append_preserves_ardenable ardenable_set_sub)


lemma Subst_preserves_ardenable:
  "ardenable rhs; ardenable xrhs  ardenable (Subst rhs X xrhs)"
by (simp only: Subst_def Append_preserves_ardenable ardenable_set_union ardenable_set_sub)

lemma Subst_preserves_soundness:
  assumes substor: "X = lang_rhs xrhs"
  and finite: "finite rhs"
  shows "lang_rhs (Subst rhs X xrhs) = lang_rhs rhs" (is "?Left = ?Right")
proof-
  define A where "A = lang_rhs (rhs - {Trn X r | r. Trn X r  rhs})"
  have "?Left = A  lang_rhs (Append_rexp_rhs xrhs ({r. Trn X r  rhs}))"
    unfolding Subst_def
    unfolding lang_rhs_union_distrib[symmetric]
    by (simp add: A_def)
  moreover have "?Right = A  lang_rhs {Trn X r | r. Trn X r  rhs}"
  proof-
    have "rhs = (rhs - {Trn X r | r. Trn X r  rhs})  ({Trn X r | r. Trn X r  rhs})" by auto
    thus ?thesis 
      unfolding A_def
      unfolding lang_rhs_union_distrib
      by simp
  qed
  moreover 
  have "lang_rhs (Append_rexp_rhs xrhs ({r. Trn X r  rhs})) = lang_rhs {Trn X r | r. Trn X r  rhs}" 
    using finite substor by (simp only: lang_of_append_rexp_rhs trm_soundness)
  ultimately show ?thesis by simp
qed

lemma Subst_preserves_finite_rhs:
  "finite rhs; finite yrhs  finite (Subst rhs Y yrhs)"
by (auto simp: Subst_def Append_preserves_finite)

lemma Subst_all_preserves_finite:
  assumes finite: "finite ES"
  shows "finite (Subst_all ES Y yrhs)"
using assms unfolding Subst_all_def by simp

declare [[simproc del: finite_Collect]]

lemma Subst_all_preserves_finite_rhs:
  "finite_rhs ES; finite yrhs  finite_rhs (Subst_all ES Y yrhs)"
by (auto intro:Subst_preserves_finite_rhs simp add:Subst_all_def finite_rhs_def)

lemma append_rhs_preserves_cls:
  "rhss (Append_rexp_rhs rhs r) = rhss rhs"
apply (auto simp: rhss_def Append_rexp_rhs_def)
apply (case_tac xa, auto simp: image_def)
by (rule_tac x = "Times ra r" in exI, rule_tac x = "Trn x ra" in bexI, simp+)

lemma Arden_removes_cl:
  "rhss (Arden Y yrhs) = rhss yrhs - {Y}"
apply (simp add:Arden_def append_rhs_preserves_cls)
by (auto simp: rhss_def)

lemma lhss_preserves_cls:
  "lhss (Subst_all ES Y yrhs) = lhss ES"
by (auto simp: lhss_def Subst_all_def)

lemma Subst_updates_cls:
  "X  rhss xrhs  
      rhss (Subst rhs X xrhs) = rhss rhs  rhss xrhs - {X}"
apply (simp only:Subst_def append_rhs_preserves_cls rhss_union_distrib)
by (auto simp: rhss_def)

lemma Subst_all_preserves_validity:
  assumes sc: "validity (ES  {(Y, yrhs)})"        (is "validity ?A")
  shows "validity (Subst_all ES Y (Arden Y yrhs))"  (is "validity ?B")
proof -
  { fix X xrhs'
    assume "(X, xrhs')  ?B"
    then obtain xrhs 
      where xrhs_xrhs': "xrhs' = Subst xrhs Y (Arden Y yrhs)"
      and X_in: "(X, xrhs)  ES" by (simp add:Subst_all_def, blast)    
    have "rhss xrhs'  lhss ?B"
    proof-
      have "lhss ?B = lhss ES" by (auto simp add:lhss_def Subst_all_def)
      moreover have "rhss xrhs'  lhss ES"
      proof-
        have "rhss xrhs'   rhss xrhs  rhss (Arden Y yrhs) - {Y}"
        proof -
          have "Y  rhss (Arden Y yrhs)" 
            using Arden_removes_cl by auto
          thus ?thesis using xrhs_xrhs' by (auto simp: Subst_updates_cls)
        qed
        moreover have "rhss xrhs  lhss ES  {Y}" using X_in sc
          apply (simp only:validity_def lhss_union_distrib)
          by (drule_tac x = "(X, xrhs)" in bspec, auto simp:lhss_def)
        moreover have "rhss (Arden Y yrhs)  lhss ES  {Y}" 
          using sc 
          by (auto simp add: Arden_removes_cl validity_def lhss_def)
        ultimately show ?thesis by auto
      qed
      ultimately show ?thesis by simp
    qed
  } thus ?thesis by (auto simp only:Subst_all_def validity_def)
qed

lemma Subst_all_satisfies_invariant:
  assumes invariant_ES: "invariant (ES  {(Y, yrhs)})"
  shows "invariant (Subst_all ES Y (Arden Y yrhs))"
proof (rule invariantI)
  have Y_eq_yrhs: "Y = lang_rhs yrhs" 
    using invariant_ES by (simp only:invariant_def soundness_def, blast)
   have finite_yrhs: "finite yrhs" 
    using invariant_ES by (auto simp:invariant_def finite_rhs_def)
  have ardenable_yrhs: "ardenable yrhs" 
    using invariant_ES by (auto simp:invariant_def ardenable_all_def)
  show "soundness (Subst_all ES Y (Arden Y yrhs))"
  proof -
    have "Y = lang_rhs (Arden Y yrhs)" 
      using Y_eq_yrhs invariant_ES finite_yrhs
      using finite_Trn[OF finite_yrhs]
      apply(rule_tac Arden_preserves_soundness)
      apply(simp_all)
      unfolding invariant_def ardenable_all_def ardenable_def
      apply(auto)
      done
    thus ?thesis using invariant_ES
      unfolding invariant_def finite_rhs_def2 soundness_def Subst_all_def
      by (auto simp add: Subst_preserves_soundness simp del: lang_rhs.simps)
  qed
  show "finite (Subst_all ES Y (Arden Y yrhs))" 
    using invariant_ES by (simp add:invariant_def Subst_all_preserves_finite)
  show "distinctness (Subst_all ES Y (Arden Y yrhs))" 
    using invariant_ES 
    unfolding distinctness_def Subst_all_def invariant_def by auto
  show "ardenable_all (Subst_all ES Y (Arden Y yrhs))"
  proof - 
    { fix X rhs
      assume "(X, rhs)  ES"
      hence "ardenable rhs"  using invariant_ES  
        by (auto simp add:invariant_def ardenable_all_def)
      with ardenable_yrhs 
      have "ardenable (Subst rhs Y (Arden Y yrhs))"
        by (simp add:ardenable_yrhs 
               Subst_preserves_ardenable Arden_preserves_ardenable)
    } thus ?thesis by (auto simp add:ardenable_all_def Subst_all_def)
  qed
  show "finite_rhs (Subst_all ES Y (Arden Y yrhs))"
  proof-
    have "finite_rhs ES" using invariant_ES 
      by (simp add:invariant_def finite_rhs_def)
    moreover have "finite (Arden Y yrhs)"
    proof -
      have "finite yrhs" using invariant_ES 
        by (auto simp:invariant_def finite_rhs_def)
      thus ?thesis using Arden_preserves_finite by auto
    qed
    ultimately show ?thesis 
      by (simp add:Subst_all_preserves_finite_rhs)
  qed
  show "validity (Subst_all ES Y (Arden Y yrhs))"
    using invariant_ES Subst_all_preserves_validity by (auto simp add: invariant_def)
qed

lemma Remove_in_card_measure:
  assumes finite: "finite ES"
  and     in_ES: "(X, rhs)  ES"
  shows "(Remove ES X rhs, ES)  measure card"
proof -
  define f where "f x = ((fst x)::'a lang, Subst (snd x) X (Arden X rhs))" for x
  define ES' where "ES' = ES - {(X, rhs)}"
  have "Subst_all ES' X (Arden X rhs) = f ` ES'" 
    apply (auto simp: Subst_all_def f_def image_def)
    by (rule_tac x = "(Y, yrhs)" in bexI, simp+)
  then have "card (Subst_all ES' X (Arden X rhs))  card ES'"
    unfolding ES'_def using finite by (auto intro: card_image_le)
  also have " < card ES" unfolding ES'_def 
    using in_ES finite by (rule_tac card_Diff1_less)
  finally show "(Remove ES X rhs, ES)  measure card" 
    unfolding Remove_def ES'_def by simp
qed
    

lemma Subst_all_cls_remains: 
  "(X, xrhs)  ES   xrhs'. (X, xrhs')  (Subst_all ES Y yrhs)"
by (auto simp: Subst_all_def)

lemma card_noteq_1_has_more:
  assumes card:"Cond ES"
  and e_in: "(X, xrhs)  ES"
  and finite: "finite ES"
  shows "(Y, yrhs)  ES. (X, xrhs)  (Y, yrhs)"
proof-
  have "card ES > 1" using card e_in finite 
    by (cases "card ES") (auto) 
  then have "card (ES - {(X, xrhs)}) > 0"
    using finite e_in by auto
  then have "(ES - {(X, xrhs)})  {}" using finite by (rule_tac notI, simp)
  then show "(Y, yrhs)  ES. (X, xrhs)  (Y, yrhs)"
    by auto
qed

lemma iteration_step_measure:
  assumes Inv_ES: "invariant ES"
  and    X_in_ES: "(X, xrhs)  ES"
  and    Cnd:     "Cond ES "
  shows "(Iter X ES, ES)  measure card"
proof -
  have fin: "finite ES" using Inv_ES unfolding invariant_def by simp
  then obtain Y yrhs 
    where Y_in_ES: "(Y, yrhs)  ES" and not_eq: "(X, xrhs)  (Y, yrhs)" 
    using Cnd X_in_ES by (drule_tac card_noteq_1_has_more) (auto)
  then have "(Y, yrhs)  ES " "X  Y"  
    using X_in_ES Inv_ES unfolding invariant_def distinctness_def
    by auto
  then show "(Iter X ES, ES)  measure card" 
  apply(rule IterI2)
  apply(rule Remove_in_card_measure)
  apply(simp_all add: fin)
  done
qed

lemma iteration_step_invariant:
  assumes Inv_ES: "invariant ES"
  and    X_in_ES: "(X, xrhs)  ES"
  and    Cnd: "Cond ES"
  shows "invariant (Iter X ES)"
proof -
  have finite_ES: "finite ES" using Inv_ES by (simp add: invariant_def)
  then obtain Y yrhs 
    where Y_in_ES: "(Y, yrhs)  ES" and not_eq: "(X, xrhs)  (Y, yrhs)" 
    using Cnd X_in_ES by (drule_tac card_noteq_1_has_more) (auto)
  then have "(Y, yrhs)  ES" "X  Y" 
    using X_in_ES Inv_ES unfolding invariant_def distinctness_def
    by auto
  then show "invariant (Iter X ES)" 
  proof(rule IterI2)
    fix Y yrhs
    assume h: "(Y, yrhs)  ES" "X  Y"
    then have "ES - {(Y, yrhs)}  {(Y, yrhs)} = ES" by auto
    then show "invariant (Remove ES Y yrhs)" unfolding Remove_def
      using Inv_ES
      by (rule_tac Subst_all_satisfies_invariant) (simp) 
  qed
qed

lemma iteration_step_ex:
  assumes Inv_ES: "invariant ES"
  and    X_in_ES: "(X, xrhs)  ES"
  and    Cnd: "Cond ES"
  shows "xrhs'. (X, xrhs')  (Iter X ES)"
proof -
  have finite_ES: "finite ES" using Inv_ES by (simp add: invariant_def)
  then obtain Y yrhs 
    where "(Y, yrhs)  ES" "(X, xrhs)  (Y, yrhs)" 
    using Cnd X_in_ES by (drule_tac card_noteq_1_has_more) (auto)
  then have "(Y, yrhs)  ES " "X  Y"  
    using X_in_ES Inv_ES unfolding invariant_def distinctness_def
    by auto
  then show "xrhs'. (X, xrhs')  (Iter X ES)" 
  apply(rule IterI2)
  unfolding Remove_def
  apply(rule Subst_all_cls_remains)
  using X_in_ES
  apply(auto)
  done
qed


subsection ‹The conclusion of the first direction›

lemma Solve:
  fixes A::"('a::finite) lang"
  assumes fin: "finite (UNIV // A)"
  and     X_in: "X  (UNIV // A)"
  shows "rhs. Solve X (Init (UNIV // A)) = {(X, rhs)}  invariant {(X, rhs)}"
proof -
  define Inv where "Inv ES  invariant ES  (rhs. (X, rhs)  ES)" for ES
  have "Inv (Init (UNIV // A))" unfolding Inv_def
      using fin X_in by (simp add: Init_ES_satisfies_invariant, simp add: Init_def)
  moreover
  { fix ES
    assume inv: "Inv ES" and crd: "Cond ES"
    then have "Inv (Iter X ES)"
      unfolding Inv_def
      by (auto simp add: iteration_step_invariant iteration_step_ex) }
  moreover
  { fix ES
    assume inv: "Inv ES" and not_crd: "¬Cond ES"
    from inv obtain rhs where "(X, rhs)  ES" unfolding Inv_def by auto
    moreover
    from not_crd have "card ES = 1" by simp
    ultimately 
    have "ES = {(X, rhs)}" by (auto simp add: card_Suc_eq) 
    then have "rhs'. ES = {(X, rhs')}  invariant {(X, rhs')}" using inv
      unfolding Inv_def by auto }
  moreover
    have "wf (measure card)" by simp
  moreover
  { fix ES
    assume inv: "Inv ES" and crd: "Cond ES"
    then have "(Iter X ES, ES)  measure card"
      unfolding Inv_def
      apply(clarify)
      apply(rule_tac iteration_step_measure)
      apply(auto)
      done }
  ultimately 
  show "rhs. Solve X (Init (UNIV // A)) = {(X, rhs)}  invariant {(X, rhs)}" 
    unfolding Solve_def by (rule while_rule)
qed

lemma every_eqcl_has_reg:
  fixes A::"('a::finite) lang"
  assumes finite_CS: "finite (UNIV // A)"
  and X_in_CS: "X  (UNIV // A)"
  shows "r. X = lang r" 
proof -
  from finite_CS X_in_CS 
  obtain xrhs where Inv_ES: "invariant {(X, xrhs)}"
    using Solve by metis

  define A where "A = Arden X xrhs"
  have "rhss xrhs  {X}" using Inv_ES 
    unfolding validity_def invariant_def rhss_def lhss_def
    by auto
  then have "rhss A = {}" unfolding A_def 
    by (simp add: Arden_removes_cl)
  then have eq: "{Lam r | r. Lam r  A} = A" unfolding rhss_def
    by (auto, case_tac x, auto)
  
  have "finite A" using Inv_ES unfolding A_def invariant_def finite_rhs_def
    using Arden_preserves_finite by auto
  then have fin: "finite {r. Lam r  A}" by (rule finite_Lam)

  have "X = lang_rhs xrhs" using Inv_ES unfolding invariant_def soundness_def
    by simp
  then have "X = lang_rhs A" using Inv_ES 
    unfolding A_def invariant_def ardenable_all_def finite_rhs_def 
    by (rule_tac Arden_preserves_soundness) (simp_all add: finite_Trn)
  then have "X = lang_rhs {Lam r | r. Lam r  A}" using eq by simp
  then have "X = lang ({r. Lam r  A})" using fin by auto
  then show "r. X = lang r" by blast
qed

lemma bchoice_finite_set:
  assumes a: "x  S. y. x = f y" 
  and     b: "finite S"
  shows "ys. ( S) = (f ` ys)  finite ys"
using bchoice[OF a] b
apply(erule_tac exE)
apply(rule_tac x="fa ` S" in exI)
apply(auto)
done

theorem Myhill_Nerode1:
  fixes A::"('a::finite) lang"
  assumes finite_CS: "finite (UNIV // A)"
  shows   "r. A = lang r"
proof -
  have fin: "finite (finals A)" 
    using finals_in_partitions finite_CS by (rule finite_subset)
  have "X  (UNIV // A). r. X = lang r" 
    using finite_CS every_eqcl_has_reg by blast
  then have a: "X  finals A. r. X = lang r"
    using finals_in_partitions by auto
  then obtain rs::"('a rexp) set" where " (finals A) = (lang ` rs)" "finite rs"
    using fin by (auto dest: bchoice_finite_set)
  then have "A = lang (rs)" 
    unfolding lang_is_union_of_finals[symmetric] by simp
  then show "r. A = lang r" by blast
qed 


end

Theory Myhill_2

(* Author: Xingyuan Zhang, Chunhan Wu, Christian Urban *)
theory Myhill_2
  imports Myhill_1 "HOL-Library.Sublist"
begin

section ‹Second direction of MN: regular language ⇒ finite partition›

subsection ‹Tagging functions›

definition 
   tag_eq :: "('a list  'b)  ('a list × 'a list) set" ("=_=")
where
   "=tag=  {(x, y). tag x = tag y}"

abbreviation
   tag_eq_applied :: "'a list  ('a list  'b)  'a list  bool" ("_ =_= _")
where
   "x =tag= y  (x, y)  =tag="

lemma [simp]:
  shows "(A) `` {x} = (A) `` {y}  x A y"
unfolding str_eq_def by auto

lemma refined_intro:
  assumes "x y z. x =tag= y; x @ z  A  y @ z  A"
  shows "=tag=  A"
using assms unfolding str_eq_def tag_eq_def
apply(clarify, simp (no_asm_use))
by metis

lemma finite_eq_tag_rel:
  assumes rng_fnt: "finite (range tag)"
  shows "finite (UNIV // =tag=)"
proof -
  let "?f" =  "λX. tag ` X" and ?A = "(UNIV // =tag=)"
  have "finite (?f ` ?A)" 
  proof -
    have "range ?f  (Pow (range tag))" unfolding Pow_def by auto
    moreover 
    have "finite (Pow (range tag))" using rng_fnt by simp
    ultimately 
    have "finite (range ?f)" unfolding image_def by (blast intro: finite_subset)
    moreover
    have "?f ` ?A  range ?f" by auto
    ultimately show "finite (?f ` ?A)" by (rule rev_finite_subset) 
  qed
  moreover
  have "inj_on ?f ?A"
  proof -
    { fix X Y
      assume X_in: "X  ?A"
        and  Y_in: "Y  ?A"
        and  tag_eq: "?f X = ?f Y"
      then obtain x y 
        where "x  X" "y  Y" "tag x = tag y"
        unfolding quotient_def Image_def image_def tag_eq_def
        by (simp) (blast)
      with X_in Y_in 
      have "X = Y"
        unfolding quotient_def tag_eq_def by auto
    } 
    then show "inj_on ?f ?A" unfolding inj_on_def by auto
  qed
  ultimately show "finite (UNIV // =tag=)" by (rule finite_imageD)
qed

lemma refined_partition_finite:
  assumes fnt: "finite (UNIV // R1)"
  and refined: "R1  R2"
  and eq1: "equiv UNIV R1" and eq2: "equiv UNIV R2"
  shows "finite (UNIV // R2)"
proof -
  let ?f = "λX. {R1 `` {x} | x. x  X}" 
    and ?A = "UNIV // R2" and ?B = "UNIV // R1"
  have "?f ` ?A  Pow ?B"
    unfolding image_def Pow_def quotient_def by auto
  moreover
  have "finite (Pow ?B)" using fnt by simp
  ultimately  
  have "finite (?f ` ?A)" by (rule finite_subset)
  moreover
  have "inj_on ?f ?A"
  proof -
    { fix X Y
      assume X_in: "X  ?A" and Y_in: "Y  ?A" and eq_f: "?f X = ?f Y"
      from quotientE [OF X_in]
      obtain x where "X = R2 `` {x}" by blast
      with equiv_class_self[OF eq2] have x_in: "x  X" by simp
      then have "R1 ``{x}  ?f X" by auto
      with eq_f have "R1 `` {x}  ?f Y" by simp
      then obtain y 
        where y_in: "y  Y" and eq_r1_xy: "R1 `` {x} = R1 `` {y}" by auto
      with eq_equiv_class[OF _ eq1] 
      have "(x, y)  R1" by blast
      with refined have "(x, y)  R2" by auto
      with quotient_eqI [OF eq2 X_in Y_in x_in y_in]
      have "X = Y" .
    } 
    then show "inj_on ?f ?A" unfolding inj_on_def by blast 
  qed
  ultimately show "finite (UNIV // R2)" by (rule finite_imageD)
qed

lemma tag_finite_imageD:
  assumes rng_fnt: "finite (range tag)" 
  and     refined: "=tag=   A"
  shows "finite (UNIV // A)"
proof (rule_tac refined_partition_finite [of "=tag="])
  show "finite (UNIV // =tag=)" by (rule finite_eq_tag_rel[OF rng_fnt])
next
  show "=tag=  A" using refined .
next
  show "equiv UNIV =tag="
  and  "equiv UNIV (A)" 
    unfolding equiv_def str_eq_def tag_eq_def refl_on_def sym_def trans_def
    by auto
qed


subsection ‹Base cases: @{const Zero}, @{const One} and @{const Atom}

lemma quot_zero_eq:
  shows "UNIV // {} = {UNIV}"
unfolding quotient_def Image_def str_eq_def by auto

lemma quot_zero_finiteI [intro]:
  shows "finite (UNIV // {})"
unfolding quot_zero_eq by simp


lemma quot_one_subset:
  shows "UNIV // {[]}  {{[]}, UNIV - {[]}}"
proof
  fix x
  assume "x  UNIV // {[]}"
  then obtain y where h: "x = {z. y {[]} z}" 
    unfolding quotient_def Image_def by blast
  { assume "y = []"
    with h have "x = {[]}" by (auto simp: str_eq_def)
    then have "x  {{[]}, UNIV - {[]}}" by simp }
  moreover
  { assume "y  []"
    with h have "x = UNIV - {[]}" by (auto simp: str_eq_def)
    then have "x  {{[]}, UNIV - {[]}}" by simp }
  ultimately show "x  {{[]}, UNIV - {[]}}" by blast
qed

lemma quot_one_finiteI [intro]:
  shows "finite (UNIV // {[]})"
by (rule finite_subset[OF quot_one_subset]) (simp)


lemma quot_atom_subset:
  "UNIV // ({[c]})  {{[]},{[c]}, UNIV - {[], [c]}}"
proof 
  fix x 
  assume "x  UNIV // {[c]}"
  then obtain y where h: "x = {z. (y, z)  {[c]}}" 
    unfolding quotient_def Image_def by blast
  show "x  {{[]},{[c]}, UNIV - {[], [c]}}"
  proof -
    { assume "y = []" hence "x = {[]}" using h 
        by (auto simp: str_eq_def) } 
    moreover 
    { assume "y = [c]" hence "x = {[c]}" using h 
        by (auto dest!: spec[where x = "[]"] simp: str_eq_def) } 
    moreover 
    { assume "y  []" and "y  [c]"
      hence " z. (y @ z)  [c]" by (case_tac y, auto)
      moreover have " p. (p  []  p  [c]) = ( q. p @ q  [c])" 
        by (case_tac p, auto)
      ultimately have "x = UNIV - {[],[c]}" using h
        by (auto simp add: str_eq_def)
    } 
    ultimately show ?thesis by blast
  qed
qed

lemma quot_atom_finiteI [intro]:
  shows "finite (UNIV // {[c]})"
by (rule finite_subset[OF quot_atom_subset]) (simp)


subsection ‹Case for @{const Plus}

definition 
  tag_Plus :: "'a lang  'a lang  'a list  ('a lang × 'a lang)"
where
  "tag_Plus A B  λx. (A `` {x}, B `` {x})"

lemma quot_plus_finiteI [intro]:
  assumes finite1: "finite (UNIV // A)"
  and     finite2: "finite (UNIV // B)"
  shows "finite (UNIV // (A  B))"
proof (rule_tac tag = "tag_Plus A B" in tag_finite_imageD)
  have "finite ((UNIV // A) × (UNIV // B))" 
    using finite1 finite2 by auto
  then show "finite (range (tag_Plus A B))"
    unfolding tag_Plus_def quotient_def
    by (rule rev_finite_subset) (auto)
next
  show "=tag_Plus A B=  (A  B)"
    unfolding tag_eq_def tag_Plus_def str_eq_def by auto
qed


subsection ‹Case for Times›

definition
  "Partitions x  {(xp, xs). xp @ xs = x}"

lemma conc_partitions_elim:
  assumes "x  A  B"
  shows "(u, v)  Partitions x. u  A  v  B"
using assms unfolding conc_def Partitions_def
by auto

lemma conc_partitions_intro:
  assumes "(u, v)  Partitions x  u  A   v  B"
  shows "x  A  B"
using assms unfolding conc_def Partitions_def
by auto

lemma equiv_class_member:
  assumes "x  A"
  and "A `` {x} = A `` {y}" 
  shows "y  A"
using assms
apply(simp)
apply(simp add: str_eq_def)
apply(metis append_Nil2)
done

definition 
  tag_Times :: "'a lang  'a lang  'a list  'a lang × 'a lang set"
where
  "tag_Times A B  λx. (A `` {x}, {(B `` {xs}) | xp xs. xp  A  (xp, xs)  Partitions x})"

lemma tag_Times_injI:
  assumes a: "tag_Times A B x = tag_Times A B y"
  and     c: "x @ z  A  B"
  shows "y @ z  A  B"
proof -
  from c obtain u v where 
    h1: "(u, v)  Partitions (x @ z)" and
    h2: "u  A" and
    h3: "v  B" by (auto dest: conc_partitions_elim)
  from h1 have "x @ z = u @ v" unfolding Partitions_def by simp
  then obtain us 
    where "(x = u @ us  us @ z = v)  (x @ us = u  z = us @ v)"
    by (auto simp add: append_eq_append_conv2)
  moreover
  { assume eq: "x = u @ us" "us @ z = v"
    have "(B `` {us})  snd (tag_Times A B x)"
      unfolding Partitions_def tag_Times_def using h2 eq 
      by (auto simp add: str_eq_def)
    then have "(B `` {us})  snd (tag_Times A B y)"
      using a by simp
    then obtain u' us' where
      q1: "u'  A" and
      q2: "B `` {us} = B `` {us'}" and
      q3: "(u', us')  Partitions y" 
      unfolding tag_Times_def by auto
    from q2 h3 eq 
    have "us' @ z  B"
      unfolding Image_def str_eq_def by auto
    then have "y @ z  A  B" using q1 q3 
      unfolding Partitions_def by auto
  }
  moreover
  { assume eq: "x @ us = u" "z = us @ v"
    have "(A `` {x}) = fst (tag_Times A B x)" 
      by (simp add: tag_Times_def)
    then have "(A `` {x}) = fst (tag_Times A B y)"
      using a by simp
    then have "A `` {x} = A `` {y}" 
      by (simp add: tag_Times_def)
    moreover 
    have "x @ us  A" using h2 eq by simp
    ultimately 
    have "y @ us  A" using equiv_class_member 
      unfolding Image_def str_eq_def by blast
    then have "(y @ us) @ v  A  B" 
      using h3 unfolding conc_def by blast
    then have "y @ z  A  B" using eq by simp 
  }
  ultimately show "y @ z  A  B" by blast
qed

lemma quot_conc_finiteI [intro]:
  assumes fin1: "finite (UNIV // A)" 
  and     fin2: "finite (UNIV // B)" 
  shows "finite (UNIV // (A  B))"
proof (rule_tac tag = "tag_Times A B" in tag_finite_imageD)
  have "x y z. tag_Times A B x = tag_Times A B y; x @ z  A  B  y @ z  A  B"
    by (rule tag_Times_injI)
       (auto simp add: tag_Times_def tag_eq_def)
  then show "=tag_Times A B=  (A  B)"
    by (rule refined_intro)
       (auto simp add: tag_eq_def)
next
  have *: "finite ((UNIV // A) × (Pow (UNIV // B)))" 
    using fin1 fin2 by auto
  show "finite (range (tag_Times A B))" 
    unfolding tag_Times_def
    apply(rule finite_subset[OF _ *])
    unfolding quotient_def
    by auto
qed


subsection ‹Case for @{const "Star"}

lemma star_partitions_elim:
  assumes "x @ z  A" "x  []"
  shows "(u, v)  Partitions (x @ z). strict_prefix u x  u  A  v  A"
proof -
  have "([], x @ z)  Partitions (x @ z)" "strict_prefix [] x" "[]  A" "x @ z  A"
    using assms by (auto simp add: Partitions_def strict_prefix_def)
  then show "(u, v)  Partitions (x @ z). strict_prefix u x  u  A  v  A"
    by blast
qed

lemma finite_set_has_max2: 
  "finite A; A  {}   max  A.  a  A. length a  length max"
apply(induct rule:finite.induct)
apply(simp)
by (metis (no_types) all_not_in_conv insert_iff linorder_le_cases order_trans)

lemma finite_strict_prefix_set: 
  shows "finite {xa. strict_prefix xa (x::'a list)}"
apply (induct x rule:rev_induct, simp)
apply (subgoal_tac "{xa. strict_prefix xa (xs @ [x])} = {xa. strict_prefix xa xs}  {xs}")
by (auto simp:strict_prefix_def)

lemma append_eq_cases:
  assumes a: "x @ y = m @ n" "m  []"  
  shows "prefix x m  strict_prefix m x"
unfolding prefix_def strict_prefix_def using a
by (auto simp add: append_eq_append_conv2)

lemma star_spartitions_elim2:
  assumes a: "x @ z  A" 
  and     b: "x  []"
  shows "(u, v)  Partitions x.  (u', v')  Partitions z. strict_prefix u x  u  A  v @ u'  A  v'  A"
proof -
  define S where "S = {u | u v. (u, v)  Partitions x  strict_prefix u x  u  A  v @ z  A}"
  have "finite {u. strict_prefix u x}" by (rule finite_strict_prefix_set)
  then have "finite S" unfolding S_def
    by (rule rev_finite_subset) (auto)
  moreover 
  have "S  {}" using a b unfolding S_def Partitions_def
    by (auto simp: strict_prefix_def)
  ultimately have " u_max  S.  u  S. length u  length u_max"  
    using finite_set_has_max2 by blast
  then obtain u_max v 
    where h0: "(u_max, v)  Partitions x"
    and h1: "strict_prefix u_max x" 
    and h2: "u_max  A" 
    and h3: "v @ z  A"  
    and h4: " u v. (u, v)  Partitions x  strict_prefix u x  u  A  v @ z  A  length u  length u_max"
    unfolding S_def Partitions_def by blast
  have q: "v  []" using h0 h1 b unfolding Partitions_def by auto
  from h3 obtain a b
    where i1: "(a, b)  Partitions (v @ z)"
    and   i2: "a  A"
    and   i3: "b  A"
    and   i4: "a  []"
    unfolding Partitions_def
    using q by (auto dest: star_decom)
  have "prefix v a"
  proof (rule ccontr)
    assume a: "¬(prefix v a)"
    from i1 have i1': "a @ b = v @ z" unfolding Partitions_def by simp
    then have "prefix a v  strict_prefix v a" using append_eq_cases q by blast
    then have q: "strict_prefix a v" using a unfolding strict_prefix_def prefix_def by auto
    then obtain as where eq: "a @ as = v" unfolding strict_prefix_def prefix_def by auto
    have "(u_max @ a, as)  Partitions x" using eq h0 unfolding Partitions_def by auto
    moreover
    have "strict_prefix (u_max @ a) x" using h0 eq q unfolding Partitions_def prefix_def strict_prefix_def by auto
    moreover
    have "u_max @ a  A" using i2 h2 by simp
    moreover
    have "as @ z  A" using i1' i2 i3 eq by auto
    ultimately have "length (u_max @ a)  length u_max" using h4 by blast
    with i4 show "False" by auto
  qed
  with i1 obtain za zb
    where k1: "v @ za = a"
    and   k2: "(za, zb)  Partitions z" 
    and   k4: "zb = b" 
    unfolding Partitions_def prefix_def
    by (auto simp add: append_eq_append_conv2)
  show " (u, v)  Partitions x.  (u', v')  Partitions z. strict_prefix u x  u  A  v @ u'  A  v'  A"
    using h0 h1 h2 i2 i3 k1 k2 k4 unfolding Partitions_def by blast
qed

definition 
  tag_Star :: "'a lang  'a list  ('a lang) set"
where
  "tag_Star A  λx. {A `` {v} | u v. strict_prefix u x  u  A  (u, v)  Partitions x}"

lemma tag_Star_non_empty_injI:
  assumes a: "tag_Star A x = tag_Star A y"
  and     c: "x @ z  A"
  and     d: "x  []"
  shows "y @ z  A"
proof -
  obtain u v u' v' 
    where a1: "(u,  v)  Partitions x" "(u', v') Partitions z"
    and   a2: "strict_prefix u x"
    and   a3: "u  A"
    and   a4: "v @ u'  A" 
    and   a5: "v'  A"
    using c d by (auto dest: star_spartitions_elim2)
  have "(A) `` {v}  tag_Star A x" 
    apply(simp add: tag_Star_def Partitions_def str_eq_def)
    using a1 a2 a3 by (auto simp add: Partitions_def)
  then have "(A) `` {v}  tag_Star A y" using a by simp
  then obtain u1 v1 
    where b1: "v A v1"
    and   b3: "u1  A"
    and   b4: "(u1, v1)  Partitions y"
    unfolding tag_Star_def by auto
  have c: "v1 @ u'  A" using b1 a4 unfolding str_eq_def by simp
  have "u1 @ (v1 @ u') @ v'  A"
    using b3 c a5 by (simp only: append_in_starI)
  then show "y @ z  A" using b4 a1 
    unfolding Partitions_def by auto
qed
    
lemma tag_Star_empty_injI:
  assumes a: "tag_Star A x = tag_Star A y"
  and     c: "x @ z  A"
  and     d: "x = []"
  shows "y @ z  A"
proof -
  from a have "{} = tag_Star A y" unfolding tag_Star_def using d by auto 
  then have "y = []"
    unfolding tag_Star_def Partitions_def strict_prefix_def prefix_def
    by (auto) (metis Nil_in_star append_self_conv2)
  then show "y @ z  A" using c d by simp
qed

lemma quot_star_finiteI [intro]:
  assumes finite1: "finite (UNIV // A)"
  shows "finite (UNIV // (A))"
proof (rule_tac tag = "tag_Star A" in tag_finite_imageD)
  have "x y z. tag_Star A x = tag_Star A y; x @ z  A  y @ z  A"
    by (case_tac "x = []") (blast intro: tag_Star_empty_injI tag_Star_non_empty_injI)+
  then show "=(tag_Star A)=  (A)"
    by (rule refined_intro) (auto simp add: tag_eq_def)
next
  have *: "finite (Pow (UNIV // A))" 
     using finite1 by auto
  show "finite (range (tag_Star A))"
    unfolding tag_Star_def 
    by (rule finite_subset[OF _ *])
       (auto simp add: quotient_def)
qed

subsection ‹The conclusion of the second direction›

lemma Myhill_Nerode2:
  fixes r::"'a rexp"
  shows "finite (UNIV // (lang r))"
by (induct r) (auto)

end

Theory Myhill

(* Author: Xingyuan Zhang, Chunhan Wu, Christian Urban *)

theory Myhill
  imports Myhill_2 "Regular-Sets.Derivatives"
begin

section ‹The theorem›

theorem Myhill_Nerode:
  fixes A::"('a::finite) lang"
  shows "(r. A = lang r)  finite (UNIV // A)"
using Myhill_Nerode1 Myhill_Nerode2 by auto


subsection ‹Second direction proved using partial derivatives›

text ‹
  An alternaive proof using the notion of partial derivatives for regular 
  expressions due to Antimirov \cite{Antimirov95}.
›

lemma MN_Rel_Derivs:
  shows "x A y  Derivs x A = Derivs y A"
unfolding Derivs_def str_eq_def
by auto

lemma Myhill_Nerode3:
  fixes r::"'a rexp"
  shows "finite (UNIV // (lang r))"
proof -
  have "finite (UNIV // =(λx. pderivs x r)=)"
  proof - 
    have "range (λx. pderivs x r)  Pow (pderivs_lang UNIV r)"
      unfolding pderivs_lang_def by auto
    moreover 
    have "finite (Pow (pderivs_lang UNIV r))" by (simp add: finite_pderivs_lang)
    ultimately
    have "finite (range (λx. pderivs x r))"
      by (simp add: finite_subset)
    then show "finite (UNIV // =(λx. pderivs x r)=)" 
      by (rule finite_eq_tag_rel)
  qed
  moreover 
  have "=(λx. pderivs x r)=  (lang r)"
    unfolding tag_eq_def
    by (auto simp add: MN_Rel_Derivs Derivs_pderivs)
  moreover 
  have "equiv UNIV =(λx. pderivs x r)="
  and  "equiv UNIV ((lang r))"
    unfolding equiv_def refl_on_def sym_def trans_def
    unfolding tag_eq_def str_eq_def
    by auto
  ultimately show "finite (UNIV // (lang r))" 
    by (rule refined_partition_finite)
qed

end

Theory Closures

(* Author: Christian Urban, Xingyuan Zhang, Chunhan Wu *)
theory Closures
imports Myhill "HOL-Library.Infinite_Set"
begin

section ‹Closure properties of regular languages›

abbreviation
  regular :: "'a lang  bool"
where
  "regular A  r. A = lang r"

subsection ‹Closure under ∪›, ⋅› and ⋆›

lemma closure_union [intro]:
  assumes "regular A" "regular B" 
  shows "regular (A  B)"
proof -
  from assms obtain r1 r2::"'a rexp" where "lang r1 = A" "lang r2 = B" by auto
  then have "A  B = lang (Plus r1 r2)" by simp
  then show "regular (A  B)" by blast
qed

lemma closure_seq [intro]:
  assumes "regular A" "regular B" 
  shows "regular (A  B)"
proof -
  from assms obtain r1 r2::"'a rexp" where "lang r1 = A" "lang r2 = B" by auto
  then have "A  B = lang (Times r1 r2)" by simp
  then show "regular (A  B)" by blast
qed

lemma closure_star [intro]:
  assumes "regular A"
  shows "regular (A)"
proof -
  from assms obtain r::"'a rexp" where "lang r = A" by auto
  then have "A = lang (Star r)" by simp
  then show "regular (A)" by blast
qed

subsection ‹Closure under complementation›

text ‹Closure under complementation is proved via the 
  Myhill-Nerode theorem›

lemma closure_complement [intro]:
  fixes A::"('a::finite) lang"
  assumes "regular A"
  shows "regular (- A)"
proof -
  from assms have "finite (UNIV // A)" by (simp add: Myhill_Nerode)
  then have "finite (UNIV // (-A))" by (simp add: str_eq_def)
  then show "regular (- A)" by (simp add: Myhill_Nerode)
qed

subsection ‹Closure under -› and ∩›

lemma closure_difference [intro]:
  fixes A::"('a::finite) lang"
  assumes "regular A" "regular B" 
  shows "regular (A - B)"
proof -
  have "A - B = - (- A  B)" by blast
  moreover
  have "regular (- (- A  B))" 
    using assms by blast
  ultimately show "regular (A - B)" by simp
qed

lemma closure_intersection [intro]:
  fixes A::"('a::finite) lang"
  assumes "regular A" "regular B" 
  shows "regular (A  B)"
proof -
  have "A  B = - (- A  - B)" by blast
  moreover
  have "regular (- (- A  - B))" 
    using assms by blast
  ultimately show "regular (A  B)" by simp
qed

subsection ‹Closure under string reversal›

fun
  Rev :: "'a rexp  'a rexp"
where
  "Rev Zero = Zero"
| "Rev One = One"
| "Rev (Atom c) = Atom c"
| "Rev (Plus r1 r2) = Plus (Rev r1) (Rev r2)"
| "Rev (Times r1 r2) = Times (Rev r2) (Rev r1)"
| "Rev (Star r) = Star (Rev r)"

lemma rev_seq[simp]:
  shows "rev ` (B  A) = (rev ` A)  (rev ` B)"
unfolding conc_def image_def
by (auto) (metis rev_append)+

lemma rev_star1:
  assumes a: "s  (rev ` A)"
  shows "s  rev ` (A)"
using a
proof(induct rule: star_induct)
  case (append s1 s2)
  have inj: "inj (rev::'a list  'a list)" unfolding inj_on_def by auto
  have "s1  rev ` A" "s2  rev ` (A)" by fact+
  then obtain x1 x2 where "x1  A" "x2  A" and eqs: "s1 = rev x1" "s2 = rev x2" by auto
  then have "x1  A" "x2  A" by (auto)
  then have "x2 @ x1  A" by (auto)
  then have "rev (x2 @ x1)  rev ` A" using inj by (simp only: inj_image_mem_iff)
  then show "s1 @ s2   rev ` A" using eqs by simp
qed (auto)

lemma rev_star2:
  assumes a: "s  A"
  shows "rev s  (rev ` A)"
using a
proof(induct rule: star_induct)
  case (append s1 s2)
  have inj: "inj (rev::'a list  'a list)" unfolding inj_on_def by auto
  have "s1  A"by fact
  then have "rev s1  rev ` A" using inj by (simp only: inj_image_mem_iff)
  then have "rev s1  (rev ` A)" by (auto)
  moreover
  have "rev s2  (rev ` A)" by fact
  ultimately show "rev (s1 @ s2)   (rev ` A)" by (auto)
qed (auto)

lemma rev_star [simp]:
  shows " rev ` (A) = (rev ` A)"
using rev_star1 rev_star2 by auto

lemma rev_lang:
  shows "rev ` (lang r) = lang (Rev r)"
by (induct r) (simp_all add: image_Un)

lemma closure_reversal [intro]:
  assumes "regular A"
  shows "regular (rev ` A)"
proof -
  from assms obtain r::"'a rexp" where "A = lang r" by auto
  then have "lang (Rev r) = rev ` A" by (simp add: rev_lang)
  then show "regular (rev` A)" by blast
qed
  
subsection ‹Closure under left-quotients›

abbreviation
  "Deriv_lang A B  x  A. Derivs x B"

lemma closure_left_quotient:
  assumes "regular A"
  shows "regular (Deriv_lang B A)"
proof -
  from assms obtain r::"'a rexp" where eq: "lang r = A" by auto
  have fin: "finite (pderivs_lang B r)" by (rule finite_pderivs_lang)
  
  have "Deriv_lang B (lang r) = ( (lang ` pderivs_lang B r))"
    by (simp add: Derivs_pderivs pderivs_lang_def)
  also have " = lang ((pderivs_lang B r))" using fin by simp
  finally have "Deriv_lang B A = lang ((pderivs_lang B r))" using eq
    by simp
  then show "regular (Deriv_lang B A)" by auto
qed

subsection ‹Finite and co-finite sets are regular›

lemma singleton_regular:
  shows "regular {s}"
proof (induct s)
  case Nil
  have "{[]} = lang (One)" by simp
  then show "regular {[]}" by blast
next
  case (Cons c s)
  have "regular {s}" by fact
  then obtain r where "{s} = lang r" by blast
  then have "{c # s} = lang (Times (Atom c) r)" 
    by (auto simp add: conc_def)
  then show "regular {c # s}" by blast
qed

lemma finite_regular:
  assumes "finite A"
  shows "regular A"
using assms
proof (induct)
  case empty
  have "{} = lang (Zero)" by simp
  then show "regular {}" by blast
next
  case (insert s A)
  have "regular {s}" by (simp add: singleton_regular)
  moreover
  have "regular A" by fact
  ultimately have "regular ({s}  A)" by (rule closure_union)
  then show "regular (insert s A)" by simp
qed

lemma cofinite_regular:
  fixes A::"'a::finite lang"
  assumes "finite (- A)"
  shows "regular A"
proof -
  from assms have "regular (- A)" by (simp add: finite_regular)
  then have "regular (-(- A))" by (rule closure_complement)
  then show "regular A" by simp
qed


subsection ‹Continuation lemma for showing non-regularity of languages›

lemma continuation_lemma:
  fixes A B::"'a::finite lang"
  assumes reg: "regular A"
  and     inf: "infinite B"
  shows "x  B. y  B. x  y  x A y"
proof -
  define eqfun where "eqfun = (λA x::('a::finite list). (A) `` {x})"
  have "finite (UNIV // A)" using reg by (simp add: Myhill_Nerode)
  moreover
  have "(eqfun A) ` B  UNIV // (A)"
    unfolding eqfun_def quotient_def by auto
  ultimately have "finite ((eqfun A) ` B)" by (rule rev_finite_subset)
  with inf have "a  B. infinite {b  B. eqfun A b = eqfun A a}"
    by (rule pigeonhole_infinite)
  then obtain a where in_a: "a  B" and "infinite {b  B. eqfun A b = eqfun A a}"
    by blast
  moreover 
  have "{b  B. eqfun A b = eqfun A a} = {b  B. b A a}"
    unfolding eqfun_def Image_def str_eq_def by auto
  ultimately have "infinite {b  B. b A a}" by simp
  then have "infinite ({b  B. b A a} - {a})" by simp
  moreover
  have "{b  B. b A a} - {a} = {b  B. b A a  b  a}" by auto
  ultimately have "infinite {b  B. b A a  b  a}" by simp
  then have "{b  B. b A a  b  a}  {}"
    by (metis finite.emptyI)
  then obtain b where "b  B" "b  a" "b A a" by blast
  with in_a show "x  B. y  B. x  y  x A y"
    by blast
qed


subsection ‹The language an bn is not regular›

abbreviation
  replicate_rev ("_ ^^^ _" [100, 100] 100)
where
  "a ^^^ n  replicate n a"

lemma an_bn_not_regular:
  shows "¬ regular (n. {CHR ''a'' ^^^ n @ CHR ''b'' ^^^ n})"
proof
  define A where "A = (n. {CHR ''a'' ^^^ n @ CHR ''b'' ^^^ n})"
  assume as: "regular A"
  define B where "B = (n. {CHR ''a'' ^^^ n})"

  have sameness: "i j. CHR ''a'' ^^^ i @ CHR ''b'' ^^^ j  A  i = j"
    unfolding A_def 
    apply auto
    apply(drule_tac f="λs. length (filter ((=) (CHR ''a'')) s) = length (filter ((=) (CHR ''b'')) s)" 
      in arg_cong)
    apply(simp)
    done

  have b: "infinite B"
    unfolding infinite_iff_countable_subset
    unfolding inj_on_def B_def
    by (rule_tac x="λn. CHR ''a'' ^^^ n" in exI) (auto)
  moreover
  have "x  B. y  B. x  y  ¬ (x A y)"
    apply(auto)
    unfolding B_def
    apply(auto)
    apply(simp add: str_eq_def)
    apply(drule_tac x="CHR ''b'' ^^^ xa" in spec)
    apply(simp add: sameness)
    done
  ultimately 
  show "False" using continuation_lemma[OF as] by blast
qed


end

Theory Closures2

theory Closures2
imports 
  Closures
  Well_Quasi_Orders.Well_Quasi_Orders
begin

section ‹Closure under SUBSEQ› and SUPSEQ›

text ‹Properties about the embedding relation›

lemma subseq_strict_length:
  assumes a: "subseq x y" "x  y" 
  shows "length x < length y"
using a
by (induct) (auto simp add: less_Suc_eq)

lemma subseq_wf:
  shows "wf {(x, y). subseq x y  x  y}"
proof -
  have "wf (measure length)" by simp
  moreover
  have "{(x, y). subseq x y  x  y}  measure length"
    unfolding measure_def by (auto simp add: subseq_strict_length)
  ultimately 
  show "wf {(x, y). subseq x y  x  y}" by (rule wf_subset)
qed

lemma subseq_good:
  shows "good subseq (f :: nat  ('a::finite) list)"
using wqo_on_imp_good[where f="f", OF wqo_on_lists_over_finite_sets]
by simp

lemma subseq_Higman_antichains:
  assumes a: "(x::('a::finite) list)  A. y  A. x  y  ¬(subseq x y)  ¬(subseq y x)"
  shows "finite A"
proof (rule ccontr)
  assume "infinite A"
  then obtain f::"nat  'a::finite list" where b: "inj f" and c: "range f  A"
    by (auto simp add: infinite_iff_countable_subset)
  from subseq_good[where f="f"] 
  obtain i j where d: "i < j" and e: "subseq (f i) (f j)  f i = f j" 
    unfolding good_def
    by auto
  have "f i  f j" using b d by (auto simp add: inj_on_def)
  moreover
  have "f i  A" using c by auto
  moreover
  have "f j  A" using c by auto
  ultimately have "¬(subseq (f i) (f j))" using a by simp
  with e show "False" by auto
qed

subsection ‹Sub- and Supersequences›

definition
 "SUBSEQ A  {x::('a::finite) list. y  A. subseq x y}"

definition
 "SUPSEQ A  {x::('a::finite) list. y  A. subseq y x}"

lemma SUPSEQ_simps [simp]:
  shows "SUPSEQ {} = {}"
  and   "SUPSEQ {[]} = UNIV"
unfolding SUPSEQ_def by auto

lemma SUPSEQ_atom [simp]:
  shows "SUPSEQ {[c]} = UNIV  {[c]}  UNIV"
unfolding SUPSEQ_def conc_def
by (auto dest: list_emb_ConsD)

lemma SUPSEQ_union [simp]:
  shows "SUPSEQ (A  B) = SUPSEQ A  SUPSEQ B"
unfolding SUPSEQ_def by auto

lemma SUPSEQ_conc [simp]:
  shows "SUPSEQ (A  B) = SUPSEQ A  SUPSEQ B"
unfolding SUPSEQ_def conc_def
apply(auto)
apply(drule list_emb_appendD)
apply(auto)
by (metis list_emb_append_mono)

lemma SUPSEQ_star [simp]:
  shows "SUPSEQ (A) = UNIV"
apply(subst star_unfold_left)
apply(simp only: SUPSEQ_union) 
apply(simp)
done

subsection ‹Regular expression that recognises every character›

definition
  Allreg :: "'a::finite rexp"
where
  "Allreg  (Atom ` UNIV)"

lemma Allreg_lang [simp]:
  shows "lang Allreg = (a. {[a]})"
unfolding Allreg_def by auto

lemma [simp]:
  shows "(a. {[a]}) = UNIV"
apply(auto)
apply(induct_tac x)
apply(auto)
apply(subgoal_tac "[a] @ list  (a. {[a]})")
apply(simp)
apply(rule append_in_starI)
apply(auto)
done

lemma Star_Allreg_lang [simp]:
  shows "lang (Star Allreg) = UNIV"
by simp

fun 
  UP :: "'a::finite rexp  'a rexp"
where
  "UP (Zero) = Zero"
| "UP (One) = Star Allreg"
| "UP (Atom c) = Times (Star Allreg) (Times (Atom c) (Star Allreg))"   
| "UP (Plus r1 r2) = Plus (UP r1) (UP r2)"
| "UP (Times r1 r2) = Times (UP r1) (UP r2)"
| "UP (Star r) = Star Allreg"

lemma lang_UP:
  fixes r::"'a::finite rexp"
  shows "lang (UP r) = SUPSEQ (lang r)"
by (induct r) (simp_all)

lemma SUPSEQ_regular: 
  fixes A::"'a::finite lang"
  assumes "regular A"
  shows "regular (SUPSEQ A)"
proof -
  from assms obtain r::"'a::finite rexp" where "lang r = A" by auto
  then have "lang (UP r) = SUPSEQ A" by (simp add: lang_UP)
  then show "regular (SUPSEQ A)" by auto
qed

lemma SUPSEQ_subset:
  fixes A::"'a::finite list set"
  shows "A  SUPSEQ A"
unfolding SUPSEQ_def by auto

lemma SUBSEQ_complement:
  shows "- (SUBSEQ A) = SUPSEQ (- (SUBSEQ A))"
proof -
  have "- (SUBSEQ A)  SUPSEQ (- (SUBSEQ A))"
    by (rule SUPSEQ_subset)
  moreover 
  have "SUPSEQ (- (SUBSEQ A))  - (SUBSEQ A)"
  proof (rule ccontr)
    assume "¬ (SUPSEQ (- (SUBSEQ A))  - (SUBSEQ A))"
    then obtain x where 
       a: "x  SUPSEQ (- (SUBSEQ A))" and 
       b: "x  - (SUBSEQ A)" by auto

    from a obtain y where c: "y  - (SUBSEQ A)" and d: "subseq y x"
      by (auto simp add: SUPSEQ_def)

    from b have "x  SUBSEQ A" by simp
    then obtain x' where f: "x'  A" and e: "subseq x x'"
      by (auto simp add: SUBSEQ_def)
    
    from d e have "subseq y x'"
      by (rule subseq_order.order_trans)
    then have "y  SUBSEQ A" using f
      by (auto simp add: SUBSEQ_def)
    with c show "False" by simp
  qed
  ultimately show "- (SUBSEQ A) = SUPSEQ (- (SUBSEQ A))" by simp
qed

definition
  minimal :: "'a::finite list  'a lang  bool"
where
  "minimal x A  (y  A. subseq y x  subseq x y)"

lemma main_lemma:
  shows "M. finite M  SUPSEQ A = SUPSEQ M"
proof -
  define M where "M = {x  A. minimal x A}"
  have "finite M"
    unfolding M_def minimal_def
    by (rule subseq_Higman_antichains) (auto simp add: subseq_order.antisym)
  moreover
  have "SUPSEQ A  SUPSEQ M"
  proof
    fix x
    assume "x  SUPSEQ A"
    then obtain y where "y  A" and "subseq y x" by (auto simp add: SUPSEQ_def)
    then have a: "y  {y'  A. subseq y' x}" by simp
    obtain z where b: "z  A" "subseq z x" and c: "y. subseq y z  y  z  y  {y'  A. subseq y' x}"
      using wfE_min[OF subseq_wf a] by auto
    then have "z  M"
      unfolding M_def minimal_def
      by (auto intro: subseq_order.order_trans)
    with b(2) show "x  SUPSEQ M"
      by (auto simp add: SUPSEQ_def)
  qed
  moreover
  have "SUPSEQ M  SUPSEQ A"
    by (auto simp add: SUPSEQ_def M_def)
  ultimately
  show "M. finite M  SUPSEQ A = SUPSEQ M" by blast
qed

subsection ‹Closure of @{const SUBSEQ} and @{const SUPSEQ}

lemma closure_SUPSEQ:
  fixes A::"'a::finite lang" 
  shows "regular (SUPSEQ A)"
proof -
  obtain M where a: "finite M" and b: "SUPSEQ A = SUPSEQ M"
    using main_lemma by blast
  have "regular M" using a by (rule finite_regular)
  then have "regular (SUPSEQ M)" by (rule SUPSEQ_regular)
  then show "regular (SUPSEQ A)" using b by simp
qed

lemma closure_SUBSEQ:
  fixes A::"'a::finite lang"
  shows "regular (SUBSEQ A)"
proof -
  have "regular (SUPSEQ (- SUBSEQ A))" by (rule closure_SUPSEQ)
  then have "regular (- SUBSEQ A)" by (subst SUBSEQ_complement) (simp)
  then have "regular (- (- (SUBSEQ A)))" by (rule closure_complement)
  then show "regular (SUBSEQ A)" by simp
qed

end

Theory Non_Regular_Languages

(* 
  File:     Non_Regular_Languages.thy
  Author:   Manuel Eberl <eberlm@in.tum.de>
  
  This file provides some tools for showing the non-regularity of a language, either 
  via an infinite set of equivalence classes or via the Pumping Lemma.
*)
section ‹Tools for showing non-regularity of a language›
theory Non_Regular_Languages
  imports Myhill
begin

subsection ‹Auxiliary material›

lemma bij_betw_image_quotient:
  "bij_betw (λy. f -` {y}) (f ` A) (A // {(a,b). f a = f b})"
  by (force simp: bij_betw_def inj_on_def image_image quotient_def)
   
lemma regular_Derivs_finite:
  fixes r :: "'a :: finite rexp"
  shows "finite (range (λw. Derivs w (lang r)))"
proof -
  have "?thesis  finite (UNIV // lang r)"
    unfolding str_eq_conv_Derivs by (rule bij_betw_finite bij_betw_image_quotient)+
  also have "" by (subst Myhill_Nerode [symmetric]) auto
  finally show ?thesis .
qed
  
lemma Nil_in_Derivs_iff: "[]  Derivs w A  w  A"
  by (auto simp: Derivs_def)

(* TODO: Move to distribution? *)
text ‹
  The following operation repeats a list $n$ times (usually written as $w ^ n$).
›
primrec repeat :: "nat  'a list  'a list" where
  "repeat 0 xs = []"
| "repeat (Suc n) xs = xs @ repeat n xs"
  
lemma repeat_Cons_left: "repeat (Suc n) xs = xs @ repeat n xs" by simp
    
lemma repeat_Cons_right: "repeat (Suc n) xs = repeat n xs @ xs"
  by (induction n) simp_all
    
lemma repeat_Cons_append_commute [simp]: "repeat n xs @ xs = xs @ repeat n xs"
  by (subst repeat_Cons_right [symmetric]) simp
    
lemma repeat_Cons_add [simp]: "repeat (m + n) xs = repeat m xs @ repeat n xs"
  by (induction m) simp_all
    
lemma repeat_Nil [simp]: "repeat n [] = []"
  by (induction n) simp_all
    
lemma repeat_conv_replicate: "repeat n xs = concat (replicate n xs)"
  by (induction n) simp_all

    
(* TODO: Move to distribution? *)
lemma nth_prefixes [simp]: "n  length xs  prefixes xs ! n = take n xs"
  by (induction xs arbitrary: n) (auto simp: nth_Cons split: nat.splits)
    
lemma nth_suffixes [simp]: "n  length xs  suffixes xs ! n = drop (length xs - n) xs"
  by (subst suffixes_conv_prefixes) (simp_all add: rev_take)

lemma length_take_prefixes:
  assumes "xs  set (take n (prefixes ys))"
  shows   "length xs < n"
proof (cases "n  Suc (length ys)")
  case True
  with assms obtain i where "i < n" "xs = take i ys"
    by (subst (asm) nth_image [symmetric]) auto
  thus ?thesis by simp
next
  case False
  with assms have "prefix xs ys" by simp
  hence "length xs  length ys" by (rule prefix_length_le)
  also from False have " < n" by simp
  finally show ?thesis .
qed


subsection ‹Non-regularity by giving an infinite set of equivalence classes›
  
text ‹
  Non-regularity can be shown by giving an infinite set of non-equivalent words (w.r.t. the 
  Myhill--Nerode relation).
›
lemma not_regular_langI:
  assumes "infinite B" "x y. x  B  y  B  x  y  w. ¬(x @ w  A  y @ w  A)"
  shows   "¬regular_lang (A :: 'a :: finite list set)"
proof -
  have "(λw. Derivs w A) ` B  range (λw. Derivs w A)" by blast
  moreover from assms(2) have "inj_on (λw. Derivs w A) B"
    by (auto simp: inj_on_def Derivs_def)
  with assms(1) have "infinite ((λw. Derivs w A) ` B)"
    by (blast dest: finite_imageD)
  ultimately have "infinite (range (λw. Derivs w A))" by (rule infinite_super)
  with regular_Derivs_finite show ?thesis by blast
qed

lemma not_regular_langI':
  assumes "infinite B" "x y. x  B  y  B  x  y  w. ¬(f x @ w  A  f y @ w  A)"
  shows   "¬regular_lang (A :: 'a :: finite list set)"
proof (rule not_regular_langI)
  from assms(2) have "inj_on f B" by (force simp: inj_on_def)
  with ‹infinite B show "infinite (f ` B)" by (simp add: finite_image_iff)
qed (insert assms, auto)

  
subsection ‹The Pumping Lemma›

text ‹
  The Pumping lemma can be shown very easily from the Myhill--Nerode theorem: if we have 
  a word whose length is more than the (finite) number of equivalence classes, then it must 
  have two different prefixes in the same class and the difference between these two 
  prefixes can then be ``pumped''.
›
lemma pumping_lemma_aux:
  fixes A :: "'a list set"
  defines "δ  λw. Derivs w A"
  defines "n  card (range δ)"
  assumes "z  A" "finite (range δ)" "length z  n"
  shows "u v w. z = u @ v @ w  length (u @ v)  n  v  []  (i. u @ repeat i v @ w  A)"
proof -
  define P where "P = set (take (Suc n) (prefixes z))"
  from ‹length z  n have [simp]: "card P = Suc n"
    unfolding P_def by (subst distinct_card) (auto intro!: distinct_take)
  have length_le: "length y  n" if "y  P" for y
    using length_take_prefixes[OF that [unfolded P_def]] by simp
      
  have "card (δ ` P)  card (range δ)" by (intro card_mono assms) auto
  also from assms have " < card P" by simp
  finally have "¬inj_on δ P" by (rule pigeonhole)
  then obtain a b where ab: "a  P" "b  P" "a  b" "Derivs a A = Derivs b A"
    by (auto simp: inj_on_def δ_def)
  from ab have prefix_ab: "prefix a z" "prefix b z" by (auto simp: P_def dest: in_set_takeD)
  from ab have length_ab: "length a  n" "length b  n"
    by (simp_all add: length_le)
      
  have *: ?thesis 
    if uz': "prefix u z'" "prefix z' z" "length z'  n" 
            "u  z'" "Derivs z' A = Derivs u A" for u z'
  proof -
    from ‹prefix u z' and u  z' 
      obtain v where v [simp]: "z' = u @ v" "v  []" 
      by (auto simp: prefix_def)
    from ‹prefix z' z obtain w where [simp]: "z = u @ v @ w" 
      by (auto simp: prefix_def)
      
    hence [simp]: "Derivs (repeat i v) (Derivs u A) = Derivs u A" for i
      by (induction i) (use uz' in simp_all)
    have "Derivs z A = Derivs (u @ repeat i v @ w) A" for i
      using uz' by simp
    with z  A and uz' have "i. u @ repeat i v @ w  A"
      by (simp add: Nil_in_Derivs_iff [of _ A, symmetric])
    moreover have "z = u @ v @ w" by simp
    moreover from ‹length z'  n have "length (u @ v)  n" by simp
    ultimately show ?thesis using v  [] by blast
  qed
  
  from prefix_ab have "prefix a b  prefix b a" by (rule prefix_same_cases)
  with *[of a b] and *[of b a] and ab and prefix_ab and length_ab show ?thesis by blast
qed

theorem pumping_lemma:
  fixes r :: "'a :: finite rexp"
  obtains n where 
    "z. z  lang r  length z  n  
            u v w. z = u @ v @ w  length (u @ v)  n  v  []  (i. u @ repeat i v @ w  lang r)"
proof -
  let ?n = "card (range (λw. Derivs w (lang r)))"
  have "u v w. z = u @ v @ w  length (u @ v)  ?n  v  []  (i. u @ repeat i v @ w  lang r)"
    if "z  lang r" and "length z  ?n" for z
    by (intro pumping_lemma_aux[of z] that regular_Derivs_finite)
  thus ?thesis by (rule that)
qed
  
corollary pumping_lemma_not_regular_lang:
  fixes A :: "'a :: finite list set"
  assumes "n. length (z n)  n" and "n. z n  A"
  assumes "n u v w. z n = u @ v @ w  length (u @ v)  n  v  []  
             u @ repeat (i n u v w) v @ w  A"
  shows   "¬regular_lang A"
proof
  assume "regular_lang A"
  then obtain r where r: "lang r = A" by blast
  from pumping_lemma[of r] guess n .
  from this[of "z n"] and assms[of n] obtain u v w 
    where "z n = u @ v @ w" and "length (u @ v)  n" and "v  []" and 
          "i. u @ repeat i v @ w  lang r" by (auto simp: r)
  with assms(3)[of n u v w] show False by (auto simp: r)
qed


subsection ‹Examples›

text ‹
  The language of all words containing the same number of $a$s and $b$s is not regular.
›
lemma "¬regular_lang {w. length (filter id w) = length (filter Not w)}" (is "¬regular_lang ?A")
proof (rule not_regular_langI')
  show "infinite (UNIV :: nat set)" by simp
next
  fix m n :: nat assume "m  n"
  hence "replicate m True @ replicate m False  ?A" and
        "replicate n True @ replicate m False  ?A" by simp_all
  thus "w. ¬(replicate m True @ w  ?A  replicate n True @ w  ?A)" by blast
qed

text ‹
  The language $\{a^i b^i\ |\ i \in \mathbb{N}\}$ is not regular.
›
lemma eq_replicate_iff:
  "xs = replicate n x  set xs  {x}  length xs = n"
  using replicate_length_same[of xs x] by (subst eq_commute) auto
    
lemma replicate_eq_appendE:
  assumes "xs @ ys = replicate n x"
  obtains i j where "n = i + j" "xs = replicate i x" "ys = replicate j x"
proof -
  have "n = length (replicate n x)" by simp
  also note assms [symmetric]
  finally have "n = length xs + length ys" by simp
  moreover have "xs = replicate (length xs) x" and "ys = replicate (length ys) x"
    using assms by (auto simp: eq_replicate_iff)
  ultimately show ?thesis using that[of "length xs" "length ys"] by auto
qed
  
lemma "¬regular_lang (range (λi. replicate i True @ replicate i False))" (is "¬regular_lang ?A")
proof (rule pumping_lemma_not_regular_lang)
  fix n :: nat
  show "length (replicate n True @ replicate n False)  n" by simp
  show "replicate n True @ replicate n False  ?A" by simp
next
  fix n :: nat and u v w :: "bool list"
  assume decomp: "replicate n True @ replicate n False = u @ v @ w"
     and length_le: "length (u @ v)  n" and v_ne: "v  []"
  define w1 w2 where "w1 = take (n - length (u@v)) w" and "w2 = drop (n - length (u@v)) w"
  have w_decomp: "w = w1 @ w2" by (simp add: w1_def w2_def)
  
  have "replicate n True = take n (replicate n True @ replicate n False)" by simp
  also note decomp
  also have "take n (u @ v @ w) = u @ v @ w1" using length_le by (simp add: w1_def)
  finally have "u @ v @ w1 = replicate n True" by simp
  then obtain i j k 
    where uvw1: "n = i + j + k" "u = replicate i True" "v = replicate j True" "w1 = replicate k True"
    by (elim replicate_eq_appendE) auto
  
  have "replicate n False = drop n (replicate n True @ replicate n False)" by simp
  also note decomp
  finally have [simp]: "w2 = replicate n False" using length_le by (simp add: w2_def)
      
  have "u @ repeat (Suc (Suc 0)) v @ w = replicate (n + j) True @ replicate n False"
    by (simp add: uvw1 w_decomp replicate_add [symmetric])
  also have "  ?A"
  proof safe
    fix m assume *: "replicate (n + j) True @ replicate n False = 
                       replicate m True @ replicate m False" (is "?lhs = ?rhs")
    have "n = length (filter Not ?lhs)" by simp
    also note *
    also have "length (filter Not ?rhs) = m" by simp
    finally have [simp]: "m = n" by simp
    from * have "v = []" by (simp add: uvw1)
    with v  [] show False by contradiction
  qed
  finally show "u @ repeat (Suc (Suc 0)) v @ w  ?A" .
qed
    
end