# Theory Lexer

(* Title: POSIX Lexing with Derivatives of Regular Expressions Authors: Fahad Ausaf <fahad.ausaf at icloud.com>, 2016 Roy Dyckhoff <roy.dyckhoff at st-andrews.ac.uk>, 2016 Christian Urban <christian.urban at kcl.ac.uk>, 2016 Maintainer: Christian Urban <christian.urban at kcl.ac.uk> *) theory Lexer imports "Regular-Sets.Derivatives" begin section ‹Values› datatype 'a val = Void | Atm 'a | Seq "'a val" "'a val" | Right "'a val" | Left "'a val" | Stars "('a val) list" section ‹The string behind a value› fun flat :: "'a val ⇒ 'a list" where "flat (Void) = []" | "flat (Atm c) = [c]" | "flat (Left v) = flat v" | "flat (Right v) = flat v" | "flat (Seq v1 v2) = (flat v1) @ (flat v2)" | "flat (Stars []) = []" | "flat (Stars (v#vs)) = (flat v) @ (flat (Stars vs))" lemma flat_Stars [simp]: "flat (Stars vs) = concat (map flat vs)" by (induct vs) (auto) section ‹Relation between values and regular expressions› inductive Prf :: "'a val ⇒ 'a rexp ⇒ bool" ("⊢ _ : _" [100, 100] 100) where "⟦⊢ v1 : r1; ⊢ v2 : r2⟧ ⟹ ⊢ Seq v1 v2 : Times r1 r2" | "⊢ v1 : r1 ⟹ ⊢ Left v1 : Plus r1 r2" | "⊢ v2 : r2 ⟹ ⊢ Right v2 : Plus r1 r2" | "⊢ Void : One" | "⊢ Atm c : Atom c" | "⊢ Stars [] : Star r" | "⟦⊢ v : r; ⊢ Stars vs : Star r⟧ ⟹ ⊢ Stars (v # vs) : Star r" inductive_cases Prf_elims: "⊢ v : Zero" "⊢ v : Times r1 r2" "⊢ v : Plus r1 r2" "⊢ v : One" "⊢ v : Atom c" (* "⊢ vs : Star r"*) lemma Prf_flat_lang: assumes "⊢ v : r" shows "flat v ∈ lang r" using assms by(induct v r rule: Prf.induct) (auto) lemma Prf_Stars: assumes "∀v ∈ set vs. ⊢ v : r" shows "⊢ Stars vs : Star r" using assms by(induct vs) (auto intro: Prf.intros) lemma Star_string: assumes "s ∈ star A" shows "∃ss. concat ss = s ∧ (∀s ∈ set ss. s ∈ A)" using assms by (metis in_star_iff_concat subsetD) lemma Star_val: assumes "∀s∈set ss. ∃v. s = flat v ∧ ⊢ v : r" shows "∃vs. concat (map flat vs) = concat ss ∧ (∀v∈set vs. ⊢ v : r)" using assms apply(induct ss) apply(auto) apply (metis empty_iff list.set(1)) by (metis concat.simps(2) list.simps(9) set_ConsD) lemma L_flat_Prf1: assumes "⊢ v : r" shows "flat v ∈ lang r" using assms by (induct)(auto) lemma L_flat_Prf2: assumes "s ∈ lang r" shows "∃v. ⊢ v : r ∧ flat v = s" using assms apply(induct r arbitrary: s) apply(auto intro: Prf.intros) using Prf.intros(2) flat.simps(3) apply blast using Prf.intros(3) flat.simps(4) apply blast apply (metis Prf.intros(1) concE flat.simps(5)) apply(subgoal_tac "∃vs::('a val) list. concat (map flat vs) = s ∧ (∀v ∈ set vs. ⊢ v : r)") apply(auto)[1] apply(rule_tac x="Stars vs" in exI) apply(simp) apply (simp add: Prf_Stars) apply(drule Star_string) apply(auto) apply(rule Star_val) apply(auto) done lemma L_flat_Prf: "lang r = {flat v | v. ⊢ v : r}" using L_flat_Prf1 L_flat_Prf2 by blast section ‹Sulzmann and Lu functions› fun mkeps :: "'a rexp ⇒ 'a val" where "mkeps(One) = Void" | "mkeps(Times r1 r2) = Seq (mkeps r1) (mkeps r2)" | "mkeps(Plus r1 r2) = (if nullable(r1) then Left (mkeps r1) else Right (mkeps r2))" | "mkeps(Star r) = Stars []" fun injval :: "'a rexp ⇒ 'a ⇒ 'a val ⇒ 'a val" where "injval (Atom d) c Void = Atm d" | "injval (Plus r1 r2) c (Left v1) = Left(injval r1 c v1)" | "injval (Plus r1 r2) c (Right v2) = Right(injval r2 c v2)" | "injval (Times r1 r2) c (Seq v1 v2) = Seq (injval r1 c v1) v2" | "injval (Times r1 r2) c (Left (Seq v1 v2)) = Seq (injval r1 c v1) v2" | "injval (Times r1 r2) c (Right v2) = Seq (mkeps r1) (injval r2 c v2)" | "injval (Star r) c (Seq v (Stars vs)) = Stars ((injval r c v) # vs)" section ‹Mkeps, injval› lemma mkeps_nullable: assumes "nullable r" shows "⊢ mkeps r : r" using assms by (induct r) (auto intro: Prf.intros) lemma mkeps_flat: assumes "nullable r" shows "flat (mkeps r) = []" using assms by (induct r) (auto) lemma Prf_injval: assumes "⊢ v : deriv c r" shows "⊢ (injval r c v) : r" using assms apply(induct r arbitrary: c v rule: rexp.induct) apply(auto intro!: Prf.intros mkeps_nullable elim!: Prf_elims split: if_splits) (* Star *) apply(rotate_tac 2) apply(erule Prf.cases) apply(simp_all)[7] apply(auto) apply (metis Prf.intros(6) Prf.intros(7)) by (metis Prf.intros(7)) lemma Prf_injval_flat: assumes "⊢ v : deriv c r" shows "flat (injval r c v) = c # (flat v)" using assms apply(induct r arbitrary: v c) apply(auto elim!: Prf_elims split: if_splits) apply(metis mkeps_flat) apply(rotate_tac 2) apply(erule Prf.cases) apply(simp_all)[7] done (* HERE *) section ‹Our Alternative Posix definition› inductive Posix :: "'a list ⇒ 'a rexp ⇒ 'a val ⇒ bool" ("_ ∈ _ → _" [100, 100, 100] 100) where Posix_One: "[] ∈ One → Void" | Posix_Atom: "[c] ∈ (Atom c) → (Atm c)" | Posix_Plus1: "s ∈ r1 → v ⟹ s ∈ (Plus r1 r2) → (Left v)" | Posix_Plus2: "⟦s ∈ r2 → v; s ∉ lang r1⟧ ⟹ s ∈ (Plus r1 r2) → (Right v)" | Posix_Times: "⟦s1 ∈ r1 → v1; s2 ∈ r2 → v2; ¬(∃s⇩_{3}s⇩_{4}. s⇩_{3}≠ [] ∧ s⇩_{3}@ s⇩_{4}= s2 ∧ (s1 @ s⇩_{3}) ∈ lang r1 ∧ s⇩_{4}∈ lang r2)⟧ ⟹ (s1 @ s2) ∈ (Times r1 r2) → (Seq v1 v2)" | Posix_Star1: "⟦s1 ∈ r → v; s2 ∈ Star r → Stars vs; flat v ≠ []; ¬(∃s⇩_{3}s⇩_{4}. s⇩_{3}≠ [] ∧ s⇩_{3}@ s⇩_{4}= s2 ∧ (s1 @ s⇩_{3}) ∈ lang r ∧ s⇩_{4}∈ lang (Star r))⟧ ⟹ (s1 @ s2) ∈ Star r → Stars (v # vs)" | Posix_Star2: "[] ∈ Star r → Stars []" inductive_cases Posix_elims: "s ∈ Zero → v" "s ∈ One → v" "s ∈ Atom c → v" "s ∈ Plus r1 r2 → v" "s ∈ Times r1 r2 → v" "s ∈ Star r → v" lemma Posix1: assumes "s ∈ r → v" shows "s ∈ lang r" "flat v = s" using assms by (induct s r v rule: Posix.induct) (auto) lemma Posix1a: assumes "s ∈ r → v" shows "⊢ v : r" using assms by (induct s r v rule: Posix.induct)(auto intro: Prf.intros) lemma Posix_mkeps: assumes "nullable r" shows "[] ∈ r → mkeps r" using assms apply(induct r) apply(auto intro: Posix.intros simp add: nullable_iff) apply(subst append.simps(1)[symmetric]) apply(rule Posix.intros) apply(auto) done lemma Posix_determ: assumes "s ∈ r → v1" "s ∈ r → v2" shows "v1 = v2" using assms proof (induct s r v1 arbitrary: v2 rule: Posix.induct) case (Posix_One v2) have "[] ∈ One → v2" by fact then show "Void = v2" by cases auto next case (Posix_Atom c v2) have "[c] ∈ Atom c → v2" by fact then show "Atm c = v2" by cases auto next case (Posix_Plus1 s r1 v r2 v2) have "s ∈ Plus r1 r2 → v2" by fact moreover have "s ∈ r1 → v" by fact then have "s ∈ lang r1" by (simp add: Posix1) ultimately obtain v' where eq: "v2 = Left v'" "s ∈ r1 → v'" by cases auto moreover have IH: "⋀v2. s ∈ r1 → v2 ⟹ v = v2" by fact ultimately have "v = v'" by simp then show "Left v = v2" using eq by simp next case (Posix_Plus2 s r2 v r1 v2) have "s ∈ Plus r1 r2 → v2" by fact moreover have "s ∉ lang r1" by fact ultimately obtain v' where eq: "v2 = Right v'" "s ∈ r2 → v'" by cases (auto simp add: Posix1) moreover have IH: "⋀v2. s ∈ r2 → v2 ⟹ v = v2" by fact ultimately have "v = v'" by simp then show "Right v = v2" using eq by simp next case (Posix_Times s1 r1 v1 s2 r2 v2 v') have "(s1 @ s2) ∈ Times r1 r2 → v'" "s1 ∈ r1 → v1" "s2 ∈ r2 → v2" "¬ (∃s⇩_{3}s⇩_{4}. s⇩_{3}≠ [] ∧ s⇩_{3}@ s⇩_{4}= s2 ∧ s1 @ s⇩_{3}∈ lang r1 ∧ s⇩_{4}∈ lang r2)" by fact+ then obtain v1' v2' where "v' = Seq v1' v2'" "s1 ∈ r1 → v1'" "s2 ∈ r2 → v2'" apply(cases) apply (auto simp add: append_eq_append_conv2) using Posix1(1) by fastforce+ moreover have IHs: "⋀v1'. s1 ∈ r1 → v1' ⟹ v1 = v1'" "⋀v2'. s2 ∈ r2 → v2' ⟹ v2 = v2'" by fact+ ultimately show "Seq v1 v2 = v'" by simp next case (Posix_Star1 s1 r v s2 vs v2) have "(s1 @ s2) ∈ Star r → v2" "s1 ∈ r → v" "s2 ∈ Star r → Stars vs" "flat v ≠ []" "¬ (∃s⇩_{3}s⇩_{4}. s⇩_{3}≠ [] ∧ s⇩_{3}@ s⇩_{4}= s2 ∧ s1 @ s⇩_{3}∈ lang r ∧ s⇩_{4}∈ lang (Star r))" by fact+ then obtain v' vs' where "v2 = Stars (v' # vs')" "s1 ∈ r → v'" "s2 ∈ (Star r) → (Stars vs')" apply(cases) apply (auto simp add: append_eq_append_conv2) using Posix1(1) apply fastforce apply (metis Posix1(1) Posix_Star1.hyps(6) append_Nil append_Nil2) using Posix1(2) by blast moreover have IHs: "⋀v2. s1 ∈ r → v2 ⟹ v = v2" "⋀v2. s2 ∈ Star r → v2 ⟹ Stars vs = v2" by fact+ ultimately show "Stars (v # vs) = v2" by auto next case (Posix_Star2 r v2) have "[] ∈ Star r → v2" by fact then show "Stars [] = v2" by cases (auto simp add: Posix1) qed lemma Posix_injval: assumes "s ∈ (deriv c r) → v" shows "(c # s) ∈ r → (injval r c v)" using assms proof(induct r arbitrary: s v rule: rexp.induct) case Zero have "s ∈ deriv c Zero → v" by fact then have "s ∈ Zero → v" by simp then have "False" by cases then show "(c # s) ∈ Zero → (injval Zero c v)" by simp next case One have "s ∈ deriv c One → v" by fact then have "s ∈ Zero → v" by simp then have "False" by cases then show "(c # s) ∈ One → (injval One c v)" by simp next case (Atom d) consider (eq) "c = d" | (ineq) "c ≠ d" by blast then show "(c # s) ∈ (Atom d) → (injval (Atom d) c v)" proof (cases) case eq have "s ∈ deriv c (Atom d) → v" by fact then have "s ∈ One → v" using eq by simp then have eqs: "s = [] ∧ v = Void" by cases simp show "(c # s) ∈ Atom d → injval (Atom d) c v" using eq eqs by (auto intro: Posix.intros) next case ineq have "s ∈ deriv c (Atom d) → v" by fact then have "s ∈ Zero → v" using ineq by simp then have "False" by cases then show "(c # s) ∈ Atom d → injval (Atom d) c v" by simp qed next case (Plus r1 r2) have IH1: "⋀s v. s ∈ deriv c r1 → v ⟹ (c # s) ∈ r1 → injval r1 c v" by fact have IH2: "⋀s v. s ∈ deriv c r2 → v ⟹ (c # s) ∈ r2 → injval r2 c v" by fact have "s ∈ deriv c (Plus r1 r2) → v" by fact then have "s ∈ Plus (deriv c r1) (deriv c r2) → v" by simp then consider (left) v' where "v = Left v'" "s ∈ deriv c r1 → v'" | (right) v' where "v = Right v'" "s ∉ lang (deriv c r1)" "s ∈ deriv c r2 → v'" by cases auto then show "(c # s) ∈ Plus r1 r2 → injval (Plus r1 r2) c v" proof (cases) case left have "s ∈ deriv c r1 → v'" by fact then have "(c # s) ∈ r1 → injval r1 c v'" using IH1 by simp then have "(c # s) ∈ Plus r1 r2 → injval (Plus r1 r2) c (Left v')" by (auto intro: Posix.intros) then show "(c # s) ∈ Plus r1 r2 → injval (Plus r1 r2) c v" using left by simp next case right have "s ∉ lang (deriv c r1)" by fact then have "c # s ∉ lang r1" by (simp add: lang_deriv Deriv_def) moreover have "s ∈ deriv c r2 → v'" by fact then have "(c # s) ∈ r2 → injval r2 c v'" using IH2 by simp ultimately have "(c # s) ∈ Plus r1 r2 → injval (Plus r1 r2) c (Right v')" by (auto intro: Posix.intros) then show "(c # s) ∈ Plus r1 r2 → injval (Plus r1 r2) c v" using right by simp qed next case (Times r1 r2) have IH1: "⋀s v. s ∈ deriv c r1 → v ⟹ (c # s) ∈ r1 → injval r1 c v" by fact have IH2: "⋀s v. s ∈ deriv c r2 → v ⟹ (c # s) ∈ r2 → injval r2 c v" by fact have "s ∈ deriv c (Times r1 r2) → v" by fact then consider (left_nullable) v1 v2 s1 s2 where "v = Left (Seq v1 v2)" "s = s1 @ s2" "s1 ∈ deriv c r1 → v1" "s2 ∈ r2 → v2" "nullable r1" "¬ (∃s⇩_{3}s⇩_{4}. s⇩_{3}≠ [] ∧ s⇩_{3}@ s⇩_{4}= s2 ∧ s1 @ s⇩_{3}∈ lang (deriv c r1) ∧ s⇩_{4}∈ lang r2)" | (right_nullable) v1 s1 s2 where "v = Right v1" "s = s1 @ s2" "s ∈ deriv c r2 → v1" "nullable r1" "s1 @ s2 ∉ lang (Times (deriv c r1) r2)" | (not_nullable) v1 v2 s1 s2 where "v = Seq v1 v2" "s = s1 @ s2" "s1 ∈ deriv c r1 → v1" "s2 ∈ r2 → v2" "¬nullable r1" "¬ (∃s⇩_{3}s⇩_{4}. s⇩_{3}≠ [] ∧ s⇩_{3}@ s⇩_{4}= s2 ∧ s1 @ s⇩_{3}∈ lang (deriv c r1) ∧ s⇩_{4}∈ lang r2)" by (force split: if_splits elim!: Posix_elims simp add: lang_deriv Deriv_def) then show "(c # s) ∈ Times r1 r2 → injval (Times r1 r2) c v" proof (cases) case left_nullable have "s1 ∈ deriv c r1 → v1" by fact then have "(c # s1) ∈ r1 → injval r1 c v1" using IH1 by simp moreover have "¬ (∃s⇩_{3}s⇩_{4}. s⇩_{3}≠ [] ∧ s⇩_{3}@ s⇩_{4}= s2 ∧ s1 @ s⇩_{3}∈ lang (deriv c r1) ∧ s⇩_{4}∈ lang r2)" by fact then have "¬ (∃s⇩_{3}s⇩_{4}. s⇩_{3}≠ [] ∧ s⇩_{3}@ s⇩_{4}= s2 ∧ (c # s1) @ s⇩_{3}∈ lang r1 ∧ s⇩_{4}∈ lang r2)" by (simp add: lang_deriv Deriv_def) ultimately have "((c # s1) @ s2) ∈ Times r1 r2 → Seq (injval r1 c v1) v2" using left_nullable by (rule_tac Posix.intros) then show "(c # s) ∈ Times r1 r2 → injval (Times r1 r2) c v" using left_nullable by simp next case right_nullable have "nullable r1" by fact then have "[] ∈ r1 → (mkeps r1)" by (rule Posix_mkeps) moreover have "s ∈ deriv c r2 → v1" by fact then have "(c # s) ∈ r2 → (injval r2 c v1)" using IH2 by simp moreover have "s1 @ s2 ∉ lang (Times (deriv c r1) r2)" by fact then have "¬ (∃s⇩_{3}s⇩_{4}. s⇩_{3}≠ [] ∧ s⇩_{3}@ s⇩_{4}= c # s ∧ [] @ s⇩_{3}∈ lang r1 ∧ s⇩_{4}∈ lang r2)" using right_nullable apply (auto simp add: lang_deriv Deriv_def append_eq_Cons_conv) by (metis concI mem_Collect_eq) ultimately have "([] @ (c # s)) ∈ Times r1 r2 → Seq (mkeps r1) (injval r2 c v1)" by(rule Posix.intros) then show "(c # s) ∈ Times r1 r2 → injval (Times r1 r2) c v" using right_nullable by simp next case not_nullable have "s1 ∈ deriv c r1 → v1" by fact then have "(c # s1) ∈ r1 → injval r1 c v1" using IH1 by simp moreover have "¬ (∃s⇩_{3}s⇩_{4}. s⇩_{3}≠ [] ∧ s⇩_{3}@ s⇩_{4}= s2 ∧ s1 @ s⇩_{3}∈ lang (deriv c r1) ∧ s⇩_{4}∈ lang r2)" by fact then have "¬ (∃s⇩_{3}s⇩_{4}. s⇩_{3}≠ [] ∧ s⇩_{3}@ s⇩_{4}= s2 ∧ (c # s1) @ s⇩_{3}∈ lang r1 ∧ s⇩_{4}∈ lang r2)" by (simp add: lang_deriv Deriv_def) ultimately have "((c # s1) @ s2) ∈ Times r1 r2 → Seq (injval r1 c v1) v2" using not_nullable by (rule_tac Posix.intros) (simp_all) then show "(c # s) ∈ Times r1 r2 → injval (Times r1 r2) c v" using not_nullable by simp qed next case (Star r) have IH: "⋀s v. s ∈ deriv c r → v ⟹ (c # s) ∈ r → injval r c v" by fact have "s ∈ deriv c (Star r) → v" by fact then consider (cons) v1 vs s1 s2 where "v = Seq v1 (Stars vs)" "s = s1 @ s2" "s1 ∈ deriv c r → v1" "s2 ∈ (Star r) → (Stars vs)" "¬ (∃s⇩_{3}s⇩_{4}. s⇩_{3}≠ [] ∧ s⇩_{3}@ s⇩_{4}= s2 ∧ s1 @ s⇩_{3}∈ lang (deriv c r) ∧ s⇩_{4}∈ lang (Star r))" apply(auto elim!: Posix_elims(1-5) simp add: lang_deriv Deriv_def intro: Posix.intros) apply(rotate_tac 3) apply(erule_tac Posix_elims(6)) apply (simp add: Posix.intros(6)) using Posix.intros(7) by blast then show "(c # s) ∈ Star r → injval (Star r) c v" proof (cases) case cons have "s1 ∈ deriv c r → v1" by fact then have "(c # s1) ∈ r → injval r c v1" using IH by simp moreover have "s2 ∈ Star r → Stars vs" by fact moreover have "(c # s1) ∈ r → injval r c v1" by fact then have "flat (injval r c v1) = (c # s1)" by (rule Posix1) then have "flat (injval r c v1) ≠ []" by simp moreover have "¬ (∃s⇩_{3}s⇩_{4}. s⇩_{3}≠ [] ∧ s⇩_{3}@ s⇩_{4}= s2 ∧ s1 @ s⇩_{3}∈ lang (deriv c r) ∧ s⇩_{4}∈ lang (Star r))" by fact then have "¬ (∃s⇩_{3}s⇩_{4}. s⇩_{3}≠ [] ∧ s⇩_{3}@ s⇩_{4}= s2 ∧ (c # s1) @ s⇩_{3}∈ lang r ∧ s⇩_{4}∈ lang (Star r))" by (simp add: lang_deriv Deriv_def) ultimately have "((c # s1) @ s2) ∈ Star r → Stars (injval r c v1 # vs)" by (rule Posix.intros) then show "(c # s) ∈ Star r → injval (Star r) c v" using cons by(simp) qed qed section ‹The Lexer by Sulzmann and Lu› fun lexer :: "'a rexp ⇒ 'a list ⇒ ('a val) option" where "lexer r [] = (if nullable r then Some(mkeps r) else None)" | "lexer r (c#s) = (case (lexer (deriv c r) s) of None ⇒ None | Some(v) ⇒ Some(injval r c v))" lemma lexer_correct_None: shows "s ∉ lang r ⟷ lexer r s = None" apply(induct s arbitrary: r) apply(simp add: nullable_iff) apply(drule_tac x="deriv a r" in meta_spec) apply(auto simp add: lang_deriv Deriv_def) done lemma lexer_correct_Some: shows "s ∈ lang r ⟷ (∃v. lexer r s = Some(v) ∧ s ∈ r → v)" apply(induct s arbitrary: r) apply(auto simp add: Posix_mkeps nullable_iff)[1] apply(drule_tac x="deriv a r" in meta_spec) apply(simp add: lang_deriv Deriv_def) apply(rule iffI) apply(auto intro: Posix_injval simp add: Posix1(1)) done lemma lexer_correctness: shows "(lexer r s = Some v) ⟷ s ∈ r → v" and "(lexer r s = None) ⟷ ¬(∃v. s ∈ r → v)" apply(auto) using lexer_correct_None lexer_correct_Some apply fastforce using Posix1(1) Posix_determ lexer_correct_Some apply blast using Posix1(1) lexer_correct_None apply blast using lexer_correct_None lexer_correct_Some by blast end

# Theory Simplifying

(* Title: POSIX Lexing with Derivatives of Regular Expressions Authors: Fahad Ausaf <fahad.ausaf at icloud.com>, 2016 Roy Dyckhoff <roy.dyckhoff at st-andrews.ac.uk>, 2016 Christian Urban <christian.urban at kcl.ac.uk>, 2016 Maintainer: Christian Urban <christian.urban at kcl.ac.uk> *) theory Simplifying imports Lexer begin section ‹Lexer including simplifications› fun F_RIGHT where "F_RIGHT f v = Right (f v)" fun F_LEFT where "F_LEFT f v = Left (f v)" fun F_Plus where "F_Plus f⇩_{1}f⇩_{2}(Right v) = Right (f⇩_{2}v)" | "F_Plus f⇩_{1}f⇩_{2}(Left v) = Left (f⇩_{1}v)" | "F_Plus f1 f2 v = v" fun F_Times1 where "F_Times1 f⇩_{1}f⇩_{2}v = Seq (f⇩_{1}Void) (f⇩_{2}v)" fun F_Times2 where "F_Times2 f⇩_{1}f⇩_{2}v = Seq (f⇩_{1}v) (f⇩_{2}Void)" fun F_Times where "F_Times f⇩_{1}f⇩_{2}(Seq v⇩_{1}v⇩_{2}) = Seq (f⇩_{1}v⇩_{1}) (f⇩_{2}v⇩_{2})" | "F_Times f1 f2 v = v" fun simp_Plus where "simp_Plus (Zero, f⇩_{1}) (r⇩_{2}, f⇩_{2}) = (r⇩_{2}, F_RIGHT f⇩_{2})" | "simp_Plus (r⇩_{1}, f⇩_{1}) (Zero, f⇩_{2}) = (r⇩_{1}, F_LEFT f⇩_{1})" | "simp_Plus (r⇩_{1}, f⇩_{1}) (r⇩_{2}, f⇩_{2}) = (Plus r⇩_{1}r⇩_{2}, F_Plus f⇩_{1}f⇩_{2})" fun simp_Times where "simp_Times (One, f⇩_{1}) (r⇩_{2}, f⇩_{2}) = (r⇩_{2}, F_Times1 f⇩_{1}f⇩_{2})" | "simp_Times (r⇩_{1}, f⇩_{1}) (One, f⇩_{2}) = (r⇩_{1}, F_Times2 f⇩_{1}f⇩_{2})" | "simp_Times (r⇩_{1}, f⇩_{1}) (r⇩_{2}, f⇩_{2}) = (Times r⇩_{1}r⇩_{2}, F_Times f⇩_{1}f⇩_{2})" lemma simp_Times_simps[simp]: "simp_Times p1 p2 = (if (fst p1 = One) then (fst p2, F_Times1 (snd p1) (snd p2)) else (if (fst p2 = One) then (fst p1, F_Times2 (snd p1) (snd p2)) else (Times (fst p1) (fst p2), F_Times (snd p1) (snd p2))))" by (induct p1 p2 rule: simp_Times.induct) (auto) lemma simp_Plus_simps[simp]: "simp_Plus p1 p2 = (if (fst p1 = Zero) then (fst p2, F_RIGHT (snd p2)) else (if (fst p2 = Zero) then (fst p1, F_LEFT (snd p1)) else (Plus (fst p1) (fst p2), F_Plus (snd p1) (snd p2))))" by (induct p1 p2 rule: simp_Plus.induct) (auto) fun simp :: "'a rexp ⇒ 'a rexp * ('a val ⇒ 'a val)" where "simp (Plus r1 r2) = simp_Plus (simp r1) (simp r2)" | "simp (Times r1 r2) = simp_Times (simp r1) (simp r2)" | "simp r = (r, id)" fun slexer :: "'a rexp ⇒ 'a list ⇒ ('a val) option" where "slexer r [] = (if nullable r then Some(mkeps r) else None)" | "slexer r (c#s) = (let (rs, fr) = simp (deriv c r) in (case (slexer rs s) of None ⇒ None | Some(v) ⇒ Some(injval r c (fr v))))" lemma slexer_better_simp: "slexer r (c#s) = (case (slexer (fst (simp (deriv c r))) s) of None ⇒ None | Some(v) ⇒ Some(injval r c ((snd (simp (deriv c r))) v)))" by (auto split: prod.split option.split) lemma L_fst_simp: shows "lang r = lang (fst (simp r))" by (induct r) (auto) lemma Posix_simp: assumes "s ∈ (fst (simp r)) → v" shows "s ∈ r → ((snd (simp r)) v)" using assms proof(induct r arbitrary: s v rule: rexp.induct) case (Plus r1 r2 s v) have IH1: "⋀s v. s ∈ fst (simp r1) → v ⟹ s ∈ r1 → snd (simp r1) v" by fact have IH2: "⋀s v. s ∈ fst (simp r2) → v ⟹ s ∈ r2 → snd (simp r2) v" by fact have as: "s ∈ fst (simp (Plus r1 r2)) → v" by fact consider (Zero_Zero) "fst (simp r1) = Zero" "fst (simp r2) = Zero" | (Zero_NZero) "fst (simp r1) = Zero" "fst (simp r2) ≠ Zero" | (NZero_Zero) "fst (simp r1) ≠ Zero" "fst (simp r2) = Zero" | (NZero_NZero) "fst (simp r1) ≠ Zero" "fst (simp r2) ≠ Zero" by auto then show "s ∈ Plus r1 r2 → snd (simp (Plus r1 r2)) v" proof(cases) case (Zero_Zero) with as have "s ∈ Zero → v" by simp then show "s ∈ Plus r1 r2 → snd (simp (Plus r1 r2)) v" by (rule Posix_elims(1)) next case (Zero_NZero) with as have "s ∈ fst (simp r2) → v" by simp with IH2 have "s ∈ r2 → snd (simp r2) v" by simp moreover from Zero_NZero have "fst (simp r1) = Zero" by simp then have "lang (fst (simp r1)) = {}" by simp then have "lang r1 = {}" using L_fst_simp by auto then have "s ∉ lang r1" by simp ultimately have "s ∈ Plus r1 r2 → Right (snd (simp r2) v)" by (rule Posix_Plus2) then show "s ∈ Plus r1 r2 → snd (simp (Plus r1 r2)) v" using Zero_NZero by simp next case (NZero_Zero) with as have "s ∈ fst (simp r1) → v" by simp with IH1 have "s ∈ r1 → snd (simp r1) v" by simp then have "s ∈ Plus r1 r2 → Left (snd (simp r1) v)" by (rule Posix_Plus1) then show "s ∈ Plus r1 r2 → snd (simp (Plus r1 r2)) v" using NZero_Zero by simp next case (NZero_NZero) with as have "s ∈ Plus (fst (simp r1)) (fst (simp r2)) → v" by simp then consider (Left) v1 where "v = Left v1" "s ∈ (fst (simp r1)) → v1" | (Right) v2 where "v = Right v2" "s ∈ (fst (simp r2)) → v2" "s ∉ lang (fst (simp r1))" by (erule_tac Posix_elims(4)) then show "s ∈ Plus r1 r2 → snd (simp (Plus r1 r2)) v" proof(cases) case (Left) then have "v = Left v1" "s ∈ r1 → (snd (simp r1) v1)" using IH1 by simp_all then show "s ∈ Plus r1 r2 → snd (simp (Plus r1 r2)) v" using NZero_NZero by (simp_all add: Posix_Plus1) next case (Right) then have "v = Right v2" "s ∈ r2 → (snd (simp r2) v2)" "s ∉ lang r1" using IH2 L_fst_simp by auto then show "s ∈ Plus r1 r2 → snd (simp (Plus r1 r2)) v" using NZero_NZero by (simp_all add: Posix_Plus2) qed qed next case (Times r1 r2 s v) have IH1: "⋀s v. s ∈ fst (simp r1) → v ⟹ s ∈ r1 → snd (simp r1) v" by fact have IH2: "⋀s v. s ∈ fst (simp r2) → v ⟹ s ∈ r2 → snd (simp r2) v" by fact have as: "s ∈ fst (simp (Times r1 r2)) → v" by fact consider (One_One) "fst (simp r1) = One" "fst (simp r2) = One" | (One_NOne) "fst (simp r1) = One" "fst (simp r2) ≠ One" | (NOne_One) "fst (simp r1) ≠ One" "fst (simp r2) = One" | (NOne_NOne) "fst (simp r1) ≠ One" "fst (simp r2) ≠ One" by auto then show "s ∈ Times r1 r2 → snd (simp (Times r1 r2)) v" proof(cases) case (One_One) with as have b: "s ∈ One → v" by simp from b have "s ∈ r1 → snd (simp r1) v" using IH1 One_One by simp moreover from b have c: "s = []" "v = Void" using Posix_elims(2) by auto moreover have "[] ∈ One → Void" by (simp add: Posix_One) then have "[] ∈ fst (simp r2) → Void" using One_One by simp then have "[] ∈ r2 → snd (simp r2) Void" using IH2 by simp ultimately have "([] @ []) ∈ Times r1 r2 → Seq (snd (simp r1) Void) (snd (simp r2) Void)" using Posix_Times by blast then show "s ∈ Times r1 r2 → snd (simp (Times r1 r2)) v" using c One_One by simp next case (One_NOne) with as have b: "s ∈ fst (simp r2) → v" by simp from b have "s ∈ r2 → snd (simp r2) v" using IH2 One_NOne by simp moreover have "[] ∈ One → Void" by (simp add: Posix_One) then have "[] ∈ fst (simp r1) → Void" using One_NOne by simp then have "[] ∈ r1 → snd (simp r1) Void" using IH1 by simp moreover from One_NOne(1) have "lang (fst (simp r1)) = {[]}" by simp then have "lang r1 = {[]}" by (simp add: L_fst_simp[symmetric]) ultimately have "([] @ s) ∈ Times r1 r2 → Seq (snd (simp r1) Void) (snd (simp r2) v)" by(rule_tac Posix_Times) auto then show "s ∈ Times r1 r2 → snd (simp (Times r1 r2)) v" using One_NOne by simp next case (NOne_One) with as have "s ∈ fst (simp r1) → v" by simp with IH1 have "s ∈ r1 → snd (simp r1) v" by simp moreover have "[] ∈ One → Void" by (simp add: Posix_One) then have "[] ∈ fst (simp r2) → Void" using NOne_One by simp then have "[] ∈ r2 → snd (simp r2) Void" using IH2 by simp ultimately have "(s @ []) ∈ Times r1 r2 → Seq (snd (simp r1) v) (snd (simp r2) Void)" by(rule_tac Posix_Times) auto then show "s ∈ Times r1 r2 → snd (simp (Times r1 r2)) v" using NOne_One by simp next case (NOne_NOne) with as have "s ∈ Times (fst (simp r1)) (fst (simp r2)) → v" by simp then obtain s1 s2 v1 v2 where eqs: "s = s1 @ s2" "v = Seq v1 v2" "s1 ∈ (fst (simp r1)) → v1" "s2 ∈ (fst (simp r2)) → v2" "¬ (∃s⇩_{3}s⇩_{4}. s⇩_{3}≠ [] ∧ s⇩_{3}@ s⇩_{4}= s2 ∧ s1 @ s⇩_{3}∈ lang r1 ∧ s⇩_{4}∈ lang r2)" by (erule_tac Posix_elims(5)) (auto simp add: L_fst_simp[symmetric]) then have "s1 ∈ r1 → (snd (simp r1) v1)" "s2 ∈ r2 → (snd (simp r2) v2)" using IH1 IH2 by auto then show "s ∈ Times r1 r2 → snd (simp (Times r1 r2)) v" using eqs NOne_NOne by(auto intro: Posix_Times) qed qed (simp_all) lemma slexer_correctness: shows "slexer r s = lexer r s" proof(induct s arbitrary: r) case Nil show "slexer r [] = lexer r []" by simp next case (Cons c s r) have IH: "⋀r. slexer r s = lexer r s" by fact show "slexer r (c # s) = lexer r (c # s)" proof (cases "s ∈ lang (deriv c r)") case True assume a1: "s ∈ lang (deriv c r)" then obtain v1 where a2: "lexer (deriv c r) s = Some v1" "s ∈ deriv c r → v1" using lexer_correct_Some by auto from a1 have "s ∈ lang (fst (simp (deriv c r)))" using L_fst_simp[symmetric] by auto then obtain v2 where a3: "lexer (fst (simp (deriv c r))) s = Some v2" "s ∈ (fst (simp (deriv c r))) → v2" using lexer_correct_Some by auto then have a4: "slexer (fst (simp (deriv c r))) s = Some v2" using IH by simp from a3(2) have "s ∈ deriv c r → (snd (simp (deriv c r))) v2" using Posix_simp by auto with a2(2) have "v1 = (snd (simp (deriv c r))) v2" using Posix_determ by auto with a2(1) a4 show "slexer r (c # s) = lexer r (c # s)" by (auto split: prod.split) next case False assume b1: "s ∉ lang (deriv c r)" then have "lexer (deriv c r) s = None" using lexer_correct_None by auto moreover from b1 have "s ∉ lang (fst (simp (deriv c r)))" using L_fst_simp[symmetric] by auto then have "lexer (fst (simp (deriv c r))) s = None" using lexer_correct_None by auto then have "slexer (fst (simp (deriv c r))) s = None" using IH by simp ultimately show "slexer r (c # s) = lexer r (c # s)" by (simp del: slexer.simps add: slexer_better_simp) qed qed end