# Theory Lexer

(*  Title:       POSIX Lexing with Derivatives of Regular Expressions
Authors:     Fahad Ausaf <fahad.ausaf at icloud.com>, 2016
Roy Dyckhoff <roy.dyckhoff at st-andrews.ac.uk>, 2016
Christian Urban <christian.urban at kcl.ac.uk>, 2016
Maintainer:  Christian Urban <christian.urban at kcl.ac.uk>
*)

theory Lexer
imports "Regular-Sets.Derivatives"
begin

section ‹Values›

datatype 'a val =
Void
| Atm 'a
| Seq "'a val" "'a val"
| Right "'a val"
| Left "'a val"
| Stars "('a val) list"

section ‹The string behind a value›

fun
flat :: "'a val ⇒ 'a list"
where
"flat (Void) = []"
| "flat (Atm c) = [c]"
| "flat (Left v) = flat v"
| "flat (Right v) = flat v"
| "flat (Seq v1 v2) = (flat v1) @ (flat v2)"
| "flat (Stars []) = []"
| "flat (Stars (v#vs)) = (flat v) @ (flat (Stars vs))"

lemma flat_Stars [simp]:
"flat (Stars vs) = concat (map flat vs)"
by (induct vs) (auto)

section ‹Relation between values and regular expressions›

inductive
Prf :: "'a val ⇒ 'a rexp ⇒ bool" ("⊢ _ : _" [100, 100] 100)
where
"⟦⊢ v1 : r1; ⊢ v2 : r2⟧ ⟹ ⊢ Seq v1 v2 : Times r1 r2"
| "⊢ v1 : r1 ⟹ ⊢ Left v1 : Plus r1 r2"
| "⊢ v2 : r2 ⟹ ⊢ Right v2 : Plus r1 r2"
| "⊢ Void : One"
| "⊢ Atm c : Atom c"
| "⊢ Stars [] : Star r"
| "⟦⊢ v : r; ⊢ Stars vs : Star r⟧ ⟹ ⊢ Stars (v # vs) : Star r"

inductive_cases Prf_elims:
"⊢ v : Zero"
"⊢ v : Times r1 r2"
"⊢ v : Plus r1 r2"
"⊢ v : One"
"⊢ v : Atom c"
(*  "⊢ vs : Star r"*)

lemma Prf_flat_lang:
assumes "⊢ v : r" shows "flat v ∈ lang r"
using assms
by(induct v r rule: Prf.induct) (auto)

lemma Prf_Stars:
assumes "∀v ∈ set vs. ⊢ v : r"
shows "⊢ Stars vs : Star r"
using assms
by(induct vs) (auto intro: Prf.intros)

lemma Star_string:
assumes "s ∈ star A"
shows "∃ss. concat ss = s ∧ (∀s ∈ set ss. s ∈ A)"
using assms
by (metis in_star_iff_concat subsetD)

lemma Star_val:
assumes "∀s∈set ss. ∃v. s = flat v ∧ ⊢ v : r"
shows "∃vs. concat (map flat vs) = concat ss ∧ (∀v∈set vs. ⊢ v : r)"
using assms
apply(induct ss)
apply(auto)
apply (metis empty_iff list.set(1))
by (metis concat.simps(2) list.simps(9) set_ConsD)

lemma L_flat_Prf1:
assumes "⊢ v : r" shows "flat v ∈ lang r"
using assms
by (induct)(auto)

lemma L_flat_Prf2:
assumes "s ∈ lang r" shows "∃v. ⊢ v : r ∧ flat v = s"
using assms
apply(induct r arbitrary: s)
apply(auto intro: Prf.intros)
using Prf.intros(2) flat.simps(3) apply blast
using Prf.intros(3) flat.simps(4) apply blast
apply (metis Prf.intros(1) concE flat.simps(5))
apply(subgoal_tac "∃vs::('a val) list. concat (map flat vs) = s ∧ (∀v ∈ set vs. ⊢ v : r)")
apply(auto)
apply(rule_tac x="Stars vs" in exI)
apply(simp)
apply (simp add: Prf_Stars)
apply(drule Star_string)
apply(auto)
apply(rule Star_val)
apply(auto)
done

lemma L_flat_Prf:
"lang r = {flat v | v. ⊢ v : r}"
using L_flat_Prf1 L_flat_Prf2 by blast

section ‹Sulzmann and Lu functions›

fun
mkeps :: "'a rexp ⇒ 'a val"
where
"mkeps(One) = Void"
| "mkeps(Times r1 r2) = Seq (mkeps r1) (mkeps r2)"
| "mkeps(Plus r1 r2) = (if nullable(r1) then Left (mkeps r1) else Right (mkeps r2))"
| "mkeps(Star r) = Stars []"

fun injval :: "'a rexp ⇒ 'a ⇒ 'a val ⇒ 'a val"
where
"injval (Atom d) c Void = Atm d"
| "injval (Plus r1 r2) c (Left v1) = Left(injval r1 c v1)"
| "injval (Plus r1 r2) c (Right v2) = Right(injval r2 c v2)"
| "injval (Times r1 r2) c (Seq v1 v2) = Seq (injval r1 c v1) v2"
| "injval (Times r1 r2) c (Left (Seq v1 v2)) = Seq (injval r1 c v1) v2"
| "injval (Times r1 r2) c (Right v2) = Seq (mkeps r1) (injval r2 c v2)"
| "injval (Star r) c (Seq v (Stars vs)) = Stars ((injval r c v) # vs)"

section ‹Mkeps, injval›

lemma mkeps_nullable:
assumes "nullable r"
shows "⊢ mkeps r : r"
using assms
by (induct r)
(auto intro: Prf.intros)

lemma mkeps_flat:
assumes "nullable r"
shows "flat (mkeps r) = []"
using assms
by (induct r) (auto)

lemma Prf_injval:
assumes "⊢ v : deriv c r"
shows "⊢ (injval r c v) : r"
using assms
apply(induct r arbitrary: c v rule: rexp.induct)
apply(auto intro!: Prf.intros mkeps_nullable elim!: Prf_elims split: if_splits)
(* Star *)
apply(rotate_tac 2)
apply(erule Prf.cases)
apply(simp_all)
apply(auto)
apply (metis Prf.intros(6) Prf.intros(7))
by (metis Prf.intros(7))

lemma Prf_injval_flat:
assumes "⊢ v : deriv c r"
shows "flat (injval r c v) = c # (flat v)"
using assms
apply(induct r arbitrary: v c)
apply(auto elim!: Prf_elims split: if_splits)
apply(metis mkeps_flat)
apply(rotate_tac 2)
apply(erule Prf.cases)
apply(simp_all)
done

(* HERE *)

section ‹Our Alternative Posix definition›

inductive
Posix :: "'a list ⇒ 'a rexp ⇒ 'a val ⇒ bool" ("_ ∈ _ → _" [100, 100, 100] 100)
where
Posix_One: "[] ∈ One → Void"
| Posix_Atom: "[c] ∈ (Atom c) → (Atm c)"
| Posix_Plus1: "s ∈ r1 → v ⟹ s ∈ (Plus r1 r2) → (Left v)"
| Posix_Plus2: "⟦s ∈ r2 → v; s ∉ lang r1⟧ ⟹ s ∈ (Plus r1 r2) → (Right v)"
| Posix_Times: "⟦s1 ∈ r1 → v1; s2 ∈ r2 → v2;
¬(∃s⇩3 s⇩4. s⇩3 ≠ [] ∧ s⇩3 @ s⇩4 = s2 ∧ (s1 @ s⇩3) ∈ lang r1 ∧ s⇩4 ∈ lang r2)⟧ ⟹
(s1 @ s2) ∈ (Times r1 r2) → (Seq v1 v2)"
| Posix_Star1: "⟦s1 ∈ r → v; s2 ∈ Star r → Stars vs; flat v ≠ [];
¬(∃s⇩3 s⇩4. s⇩3 ≠ [] ∧ s⇩3 @ s⇩4 = s2 ∧ (s1 @ s⇩3) ∈ lang r ∧ s⇩4 ∈ lang (Star r))⟧
⟹ (s1 @ s2) ∈ Star r → Stars (v # vs)"
| Posix_Star2: "[] ∈ Star r → Stars []"

inductive_cases Posix_elims:
"s ∈ Zero → v"
"s ∈ One → v"
"s ∈ Atom c → v"
"s ∈ Plus r1 r2 → v"
"s ∈ Times r1 r2 → v"
"s ∈ Star r → v"

lemma Posix1:
assumes "s ∈ r → v"
shows "s ∈ lang r" "flat v = s"
using assms
by (induct s r v rule: Posix.induct) (auto)

lemma Posix1a:
assumes "s ∈ r → v"
shows "⊢ v : r"
using assms
by (induct s r v rule: Posix.induct)(auto intro: Prf.intros)

lemma Posix_mkeps:
assumes "nullable r"
shows "[] ∈ r → mkeps r"
using assms
apply(induct r)
apply(auto intro: Posix.intros simp add: nullable_iff)
apply(subst append.simps(1)[symmetric])
apply(rule Posix.intros)
apply(auto)
done

lemma Posix_determ:
assumes "s ∈ r → v1" "s ∈ r → v2"
shows "v1 = v2"
using assms
proof (induct s r v1 arbitrary: v2 rule: Posix.induct)
case (Posix_One v2)
have "[] ∈ One → v2" by fact
then show "Void = v2" by cases auto
next
case (Posix_Atom c v2)
have "[c] ∈ Atom c → v2" by fact
then show "Atm c = v2" by cases auto
next
case (Posix_Plus1 s r1 v r2 v2)
have "s ∈ Plus r1 r2 → v2" by fact
moreover
have "s ∈ r1 → v" by fact
then have "s ∈ lang r1" by (simp add: Posix1)
ultimately obtain v' where eq: "v2 = Left v'" "s ∈ r1 → v'" by cases auto
moreover
have IH: "⋀v2. s ∈ r1 → v2 ⟹ v = v2" by fact
ultimately have "v = v'" by simp
then show "Left v = v2" using eq by simp
next
case (Posix_Plus2 s r2 v r1 v2)
have "s ∈ Plus r1 r2 → v2" by fact
moreover
have "s ∉ lang r1" by fact
ultimately obtain v' where eq: "v2 = Right v'" "s ∈ r2 → v'"
by cases (auto simp add: Posix1)
moreover
have IH: "⋀v2. s ∈ r2 → v2 ⟹ v = v2" by fact
ultimately have "v = v'" by simp
then show "Right v = v2" using eq by simp
next
case (Posix_Times s1 r1 v1 s2 r2 v2 v')
have "(s1 @ s2) ∈ Times r1 r2 → v'"
"s1 ∈ r1 → v1" "s2 ∈ r2 → v2"
"¬ (∃s⇩3 s⇩4. s⇩3 ≠ [] ∧ s⇩3 @ s⇩4 = s2 ∧ s1 @ s⇩3 ∈ lang r1 ∧ s⇩4 ∈ lang r2)" by fact+
then obtain v1' v2' where "v' = Seq v1' v2'" "s1 ∈ r1 → v1'" "s2 ∈ r2 → v2'"
apply(cases) apply (auto simp add: append_eq_append_conv2)
using Posix1(1) by fastforce+
moreover
have IHs: "⋀v1'. s1 ∈ r1 → v1' ⟹ v1 = v1'"
"⋀v2'. s2 ∈ r2 → v2' ⟹ v2 = v2'" by fact+
ultimately show "Seq v1 v2 = v'" by simp
next
case (Posix_Star1 s1 r v s2 vs v2)
have "(s1 @ s2) ∈ Star r → v2"
"s1 ∈ r → v" "s2 ∈ Star r → Stars vs" "flat v ≠ []"
"¬ (∃s⇩3 s⇩4. s⇩3 ≠ [] ∧ s⇩3 @ s⇩4 = s2 ∧ s1 @ s⇩3 ∈ lang r ∧ s⇩4 ∈ lang (Star r))" by fact+
then obtain v' vs' where "v2 = Stars (v' # vs')" "s1 ∈ r → v'" "s2 ∈ (Star r) → (Stars vs')"
apply(cases) apply (auto simp add: append_eq_append_conv2)
using Posix1(1) apply fastforce
apply (metis Posix1(1) Posix_Star1.hyps(6) append_Nil append_Nil2)
using Posix1(2) by blast
moreover
have IHs: "⋀v2. s1 ∈ r → v2 ⟹ v = v2"
"⋀v2. s2 ∈ Star r → v2 ⟹ Stars vs = v2" by fact+
ultimately show "Stars (v # vs) = v2" by auto
next
case (Posix_Star2 r v2)
have "[] ∈ Star r → v2" by fact
then show "Stars [] = v2" by cases (auto simp add: Posix1)
qed

lemma Posix_injval:
assumes "s ∈ (deriv c r) → v"
shows "(c # s) ∈ r → (injval r c v)"
using assms
proof(induct r arbitrary: s v rule: rexp.induct)
case Zero
have "s ∈ deriv c Zero → v" by fact
then have "s ∈ Zero → v" by simp
then have "False" by cases
then show "(c # s) ∈ Zero → (injval Zero c v)" by simp
next
case One
have "s ∈ deriv c One → v" by fact
then have "s ∈ Zero → v" by simp
then have "False" by cases
then show "(c # s) ∈ One → (injval One c v)" by simp
next
case (Atom d)
consider (eq) "c = d" | (ineq) "c ≠ d" by blast
then show "(c # s) ∈ (Atom d) → (injval (Atom d) c v)"
proof (cases)
case eq
have "s ∈ deriv c (Atom d) → v" by fact
then have "s ∈ One → v" using eq by simp
then have eqs: "s = [] ∧ v = Void" by cases simp
show "(c # s) ∈ Atom d → injval (Atom d) c v" using eq eqs
by (auto intro: Posix.intros)
next
case ineq
have "s ∈ deriv c (Atom d) → v" by fact
then have "s ∈ Zero → v" using ineq by simp
then have "False" by cases
then show "(c # s) ∈ Atom d → injval (Atom d) c v" by simp
qed
next
case (Plus r1 r2)
have IH1: "⋀s v. s ∈ deriv c r1 → v ⟹ (c # s) ∈ r1 → injval r1 c v" by fact
have IH2: "⋀s v. s ∈ deriv c r2 → v ⟹ (c # s) ∈ r2 → injval r2 c v" by fact
have "s ∈ deriv c (Plus r1 r2) → v" by fact
then have "s ∈ Plus (deriv c r1) (deriv c r2) → v" by simp
then consider (left) v' where "v = Left v'" "s ∈ deriv c r1 → v'"
| (right) v' where "v = Right v'" "s ∉ lang (deriv c r1)" "s ∈ deriv c r2 → v'"
by cases auto
then show "(c # s) ∈ Plus r1 r2 → injval (Plus r1 r2) c v"
proof (cases)
case left
have "s ∈ deriv c r1 → v'" by fact
then have "(c # s) ∈ r1 → injval r1 c v'" using IH1 by simp
then have "(c # s) ∈ Plus r1 r2 → injval (Plus r1 r2) c (Left v')" by (auto intro: Posix.intros)
then show "(c # s) ∈ Plus r1 r2 → injval (Plus r1 r2) c v" using left by simp
next
case right
have "s ∉ lang (deriv c r1)" by fact
then have "c # s ∉ lang r1" by (simp add: lang_deriv Deriv_def)
moreover
have "s ∈ deriv c r2 → v'" by fact
then have "(c # s) ∈ r2 → injval r2 c v'" using IH2 by simp
ultimately have "(c # s) ∈ Plus r1 r2 → injval (Plus r1 r2) c (Right v')"
by (auto intro: Posix.intros)
then show "(c # s) ∈ Plus r1 r2 → injval (Plus r1 r2) c v" using right by simp
qed
next
case (Times r1 r2)
have IH1: "⋀s v. s ∈ deriv c r1 → v ⟹ (c # s) ∈ r1 → injval r1 c v" by fact
have IH2: "⋀s v. s ∈ deriv c r2 → v ⟹ (c # s) ∈ r2 → injval r2 c v" by fact
have "s ∈ deriv c (Times r1 r2) → v" by fact
then consider
(left_nullable) v1 v2 s1 s2 where
"v = Left (Seq v1 v2)"  "s = s1 @ s2"
"s1 ∈ deriv c r1 → v1" "s2 ∈ r2 → v2" "nullable r1"
"¬ (∃s⇩3 s⇩4. s⇩3 ≠ [] ∧ s⇩3 @ s⇩4 = s2 ∧ s1 @ s⇩3 ∈ lang (deriv c r1) ∧ s⇩4 ∈ lang r2)"
| (right_nullable) v1 s1 s2 where
"v = Right v1" "s = s1 @ s2"
"s ∈ deriv c r2 → v1" "nullable r1" "s1 @ s2 ∉ lang (Times (deriv c r1) r2)"
| (not_nullable) v1 v2 s1 s2 where
"v = Seq v1 v2" "s = s1 @ s2"
"s1 ∈ deriv c r1 → v1" "s2 ∈ r2 → v2" "¬nullable r1"
"¬ (∃s⇩3 s⇩4. s⇩3 ≠ [] ∧ s⇩3 @ s⇩4 = s2 ∧ s1 @ s⇩3 ∈ lang (deriv c r1) ∧ s⇩4 ∈ lang r2)"
by (force split: if_splits elim!: Posix_elims simp add: lang_deriv Deriv_def)
then show "(c # s) ∈ Times r1 r2 → injval (Times r1 r2) c v"
proof (cases)
case left_nullable
have "s1 ∈ deriv c r1 → v1" by fact
then have "(c # s1) ∈ r1 → injval r1 c v1" using IH1 by simp
moreover
have "¬ (∃s⇩3 s⇩4. s⇩3 ≠ [] ∧ s⇩3 @ s⇩4 = s2 ∧ s1 @ s⇩3 ∈ lang (deriv c r1) ∧ s⇩4 ∈ lang r2)" by fact
then have "¬ (∃s⇩3 s⇩4. s⇩3 ≠ [] ∧ s⇩3 @ s⇩4 = s2 ∧ (c # s1) @ s⇩3 ∈ lang r1 ∧ s⇩4 ∈ lang r2)"
by (simp add: lang_deriv Deriv_def)
ultimately have "((c # s1) @ s2) ∈ Times r1 r2 → Seq (injval r1 c v1) v2" using left_nullable by (rule_tac Posix.intros)
then show "(c # s) ∈ Times r1 r2 → injval (Times r1 r2) c v" using left_nullable by simp
next
case right_nullable
have "nullable r1" by fact
then have "[] ∈ r1 → (mkeps r1)" by (rule Posix_mkeps)
moreover
have "s ∈ deriv c r2 → v1" by fact
then have "(c # s) ∈ r2 → (injval r2 c v1)" using IH2 by simp
moreover
have "s1 @ s2 ∉ lang (Times (deriv c r1) r2)" by fact
then have "¬ (∃s⇩3 s⇩4. s⇩3 ≠ [] ∧ s⇩3 @ s⇩4 = c # s ∧ [] @ s⇩3 ∈ lang r1 ∧ s⇩4 ∈ lang r2)"
using right_nullable
apply (auto simp add: lang_deriv Deriv_def append_eq_Cons_conv)
by (metis concI mem_Collect_eq)
ultimately have "([] @ (c # s)) ∈ Times r1 r2 → Seq (mkeps r1) (injval r2 c v1)"
by(rule Posix.intros)
then show "(c # s) ∈ Times r1 r2 → injval (Times r1 r2) c v" using right_nullable by simp
next
case not_nullable
have "s1 ∈ deriv c r1 → v1" by fact
then have "(c # s1) ∈ r1 → injval r1 c v1" using IH1 by simp
moreover
have "¬ (∃s⇩3 s⇩4. s⇩3 ≠ [] ∧ s⇩3 @ s⇩4 = s2 ∧ s1 @ s⇩3 ∈ lang (deriv c r1) ∧ s⇩4 ∈ lang r2)" by fact
then have "¬ (∃s⇩3 s⇩4. s⇩3 ≠ [] ∧ s⇩3 @ s⇩4 = s2 ∧ (c # s1) @ s⇩3 ∈ lang r1 ∧ s⇩4 ∈ lang r2)" by (simp add: lang_deriv Deriv_def)
ultimately have "((c # s1) @ s2) ∈ Times r1 r2 → Seq (injval r1 c v1) v2" using not_nullable
by (rule_tac Posix.intros) (simp_all)
then show "(c # s) ∈ Times r1 r2 → injval (Times r1 r2) c v" using not_nullable by simp
qed
next
case (Star r)
have IH: "⋀s v. s ∈ deriv c r → v ⟹ (c # s) ∈ r → injval r c v" by fact
have "s ∈ deriv c (Star r) → v" by fact
then consider
(cons) v1 vs s1 s2 where
"v = Seq v1 (Stars vs)" "s = s1 @ s2"
"s1 ∈ deriv c r → v1" "s2 ∈ (Star r) → (Stars vs)"
"¬ (∃s⇩3 s⇩4. s⇩3 ≠ [] ∧ s⇩3 @ s⇩4 = s2 ∧ s1 @ s⇩3 ∈ lang (deriv c r) ∧ s⇩4 ∈ lang (Star r))"
apply(auto elim!: Posix_elims(1-5) simp add: lang_deriv Deriv_def intro: Posix.intros)
apply(rotate_tac 3)
apply(erule_tac Posix_elims(6))
apply (simp add: Posix.intros(6))
using Posix.intros(7) by blast
then show "(c # s) ∈ Star r → injval (Star r) c v"
proof (cases)
case cons
have "s1 ∈ deriv c r → v1" by fact
then have "(c # s1) ∈ r → injval r c v1" using IH by simp
moreover
have "s2 ∈ Star r → Stars vs" by fact
moreover
have "(c # s1) ∈ r → injval r c v1" by fact
then have "flat (injval r c v1) = (c # s1)" by (rule Posix1)
then have "flat (injval r c v1) ≠ []" by simp
moreover
have "¬ (∃s⇩3 s⇩4. s⇩3 ≠ [] ∧ s⇩3 @ s⇩4 = s2 ∧ s1 @ s⇩3 ∈ lang (deriv c r) ∧ s⇩4 ∈ lang (Star r))" by fact
then have "¬ (∃s⇩3 s⇩4. s⇩3 ≠ [] ∧ s⇩3 @ s⇩4 = s2 ∧ (c # s1) @ s⇩3 ∈ lang r ∧ s⇩4 ∈ lang (Star r))"
by (simp add: lang_deriv Deriv_def)
ultimately
have "((c # s1) @ s2) ∈ Star r → Stars (injval r c v1 # vs)" by (rule Posix.intros)
then show "(c # s) ∈ Star r → injval (Star r) c v" using cons by(simp)
qed
qed

section ‹The Lexer by Sulzmann and Lu›

fun
lexer :: "'a rexp ⇒ 'a list ⇒ ('a val) option"
where
"lexer r [] = (if nullable r then Some(mkeps r) else None)"
| "lexer r (c#s) = (case (lexer (deriv c r) s) of
None ⇒ None
| Some(v) ⇒ Some(injval r c v))"

lemma lexer_correct_None:
shows "s ∉ lang r ⟷ lexer r s = None"
apply(induct s arbitrary: r)
apply(drule_tac x="deriv a r" in meta_spec)
apply(auto simp add: lang_deriv Deriv_def)
done

lemma lexer_correct_Some:
shows "s ∈ lang r ⟷ (∃v. lexer r s = Some(v) ∧ s ∈ r → v)"
apply(induct s arbitrary: r)
apply(auto simp add: Posix_mkeps nullable_iff)
apply(drule_tac x="deriv a r" in meta_spec)
apply(simp add: lang_deriv Deriv_def)
apply(rule iffI)
apply(auto intro: Posix_injval simp add: Posix1(1))
done

lemma lexer_correctness:
shows "(lexer r s = Some v) ⟷ s ∈ r → v"
and   "(lexer r s = None) ⟷ ¬(∃v. s ∈ r → v)"
apply(auto)
using lexer_correct_None lexer_correct_Some apply fastforce
using Posix1(1) Posix_determ lexer_correct_Some apply blast
using Posix1(1) lexer_correct_None apply blast
using lexer_correct_None lexer_correct_Some by blast

end


# Theory Simplifying

(*  Title:       POSIX Lexing with Derivatives of Regular Expressions
Authors:     Fahad Ausaf <fahad.ausaf at icloud.com>, 2016
Roy Dyckhoff <roy.dyckhoff at st-andrews.ac.uk>, 2016
Christian Urban <christian.urban at kcl.ac.uk>, 2016
Maintainer:  Christian Urban <christian.urban at kcl.ac.uk>
*)

theory Simplifying
imports Lexer
begin

section ‹Lexer including simplifications›

fun F_RIGHT where
"F_RIGHT f v = Right (f v)"

fun F_LEFT where
"F_LEFT f v = Left (f v)"

fun F_Plus where
"F_Plus f⇩1 f⇩2 (Right v) = Right (f⇩2 v)"
| "F_Plus f⇩1 f⇩2 (Left v) = Left (f⇩1 v)"
| "F_Plus f1 f2 v = v"

fun F_Times1 where
"F_Times1 f⇩1 f⇩2 v = Seq (f⇩1 Void) (f⇩2 v)"

fun F_Times2 where
"F_Times2 f⇩1 f⇩2 v = Seq (f⇩1 v) (f⇩2 Void)"

fun F_Times where
"F_Times f⇩1 f⇩2 (Seq v⇩1 v⇩2) = Seq (f⇩1 v⇩1) (f⇩2 v⇩2)"
| "F_Times f1 f2 v = v"

fun simp_Plus where
"simp_Plus (Zero, f⇩1) (r⇩2, f⇩2) = (r⇩2, F_RIGHT f⇩2)"
| "simp_Plus (r⇩1, f⇩1) (Zero, f⇩2) = (r⇩1, F_LEFT f⇩1)"
| "simp_Plus (r⇩1, f⇩1) (r⇩2, f⇩2) = (Plus r⇩1 r⇩2, F_Plus f⇩1 f⇩2)"

fun simp_Times where
"simp_Times (One, f⇩1) (r⇩2, f⇩2) = (r⇩2, F_Times1 f⇩1 f⇩2)"
| "simp_Times (r⇩1, f⇩1) (One, f⇩2) = (r⇩1, F_Times2 f⇩1 f⇩2)"
| "simp_Times (r⇩1, f⇩1) (r⇩2, f⇩2) = (Times r⇩1 r⇩2, F_Times f⇩1 f⇩2)"

lemma simp_Times_simps[simp]:
"simp_Times p1 p2 = (if (fst p1 = One) then (fst p2, F_Times1 (snd p1) (snd p2))
else (if (fst p2 = One) then (fst p1, F_Times2 (snd p1) (snd p2))
else (Times (fst p1) (fst p2), F_Times (snd p1) (snd p2))))"
by (induct p1 p2 rule: simp_Times.induct) (auto)

lemma simp_Plus_simps[simp]:
"simp_Plus p1 p2 = (if (fst p1 = Zero) then (fst p2, F_RIGHT (snd p2))
else (if (fst p2 = Zero) then (fst p1, F_LEFT (snd p1))
else (Plus (fst p1) (fst p2), F_Plus (snd p1) (snd p2))))"
by (induct p1 p2 rule: simp_Plus.induct) (auto)

fun
simp :: "'a rexp ⇒ 'a rexp * ('a val ⇒ 'a val)"
where
"simp (Plus r1 r2) = simp_Plus (simp r1) (simp r2)"
| "simp (Times r1 r2) = simp_Times (simp r1) (simp r2)"
| "simp r = (r, id)"

fun
slexer :: "'a rexp ⇒ 'a list ⇒ ('a val) option"
where
"slexer r [] = (if nullable r then Some(mkeps r) else None)"
| "slexer r (c#s) = (let (rs, fr) = simp (deriv c r) in
(case (slexer rs s) of
None ⇒ None
| Some(v) ⇒ Some(injval r c (fr v))))"

lemma slexer_better_simp:
"slexer r (c#s) = (case (slexer (fst (simp (deriv c r))) s) of
None ⇒ None
| Some(v) ⇒ Some(injval r c ((snd (simp (deriv c r))) v)))"
by (auto split: prod.split option.split)

lemma L_fst_simp:
shows "lang r = lang (fst (simp r))"
by (induct r) (auto)

lemma Posix_simp:
assumes "s ∈ (fst (simp r)) → v"
shows "s ∈ r → ((snd (simp r)) v)"
using assms
proof(induct r arbitrary: s v rule: rexp.induct)
case (Plus r1 r2 s v)
have IH1: "⋀s v. s ∈ fst (simp r1) → v ⟹ s ∈ r1 → snd (simp r1) v" by fact
have IH2: "⋀s v. s ∈ fst (simp r2) → v ⟹ s ∈ r2 → snd (simp r2) v" by fact
have as: "s ∈ fst (simp (Plus r1 r2)) → v" by fact
consider (Zero_Zero) "fst (simp r1) = Zero" "fst (simp r2) = Zero"
| (Zero_NZero) "fst (simp r1) = Zero" "fst (simp r2) ≠ Zero"
| (NZero_Zero) "fst (simp r1) ≠ Zero" "fst (simp r2) = Zero"
| (NZero_NZero) "fst (simp r1) ≠ Zero" "fst (simp r2) ≠ Zero" by auto
then show "s ∈ Plus r1 r2 → snd (simp (Plus r1 r2)) v"
proof(cases)
case (Zero_Zero)
with as have "s ∈ Zero → v" by simp
then show "s ∈ Plus r1 r2 → snd (simp (Plus r1 r2)) v" by (rule Posix_elims(1))
next
case (Zero_NZero)
with as have "s ∈ fst (simp r2) → v" by simp
with IH2 have "s ∈ r2 → snd (simp r2) v" by simp
moreover
from Zero_NZero have "fst (simp r1) = Zero" by simp
then have "lang (fst (simp r1)) = {}" by simp
then have "lang r1 = {}" using L_fst_simp by auto
then have "s ∉ lang r1" by simp
ultimately have "s ∈ Plus r1 r2 → Right (snd (simp r2) v)" by (rule Posix_Plus2)
then show "s ∈ Plus r1 r2 → snd (simp (Plus r1 r2)) v"
using Zero_NZero by simp
next
case (NZero_Zero)
with as have "s ∈ fst (simp r1) → v" by simp
with IH1 have "s ∈ r1 → snd (simp r1) v" by simp
then have "s ∈ Plus r1 r2 → Left (snd (simp r1) v)" by (rule Posix_Plus1)
then show "s ∈ Plus r1 r2 → snd (simp (Plus r1 r2)) v" using NZero_Zero by simp
next
case (NZero_NZero)
with as have "s ∈ Plus (fst (simp r1)) (fst (simp r2)) → v" by simp
then consider (Left) v1 where "v = Left v1" "s ∈ (fst (simp r1)) → v1"
| (Right) v2 where "v = Right v2" "s ∈ (fst (simp r2)) → v2" "s ∉ lang (fst (simp r1))"
by (erule_tac Posix_elims(4))
then show "s ∈ Plus r1 r2 → snd (simp (Plus r1 r2)) v"
proof(cases)
case (Left)
then have "v = Left v1" "s ∈ r1 → (snd (simp r1) v1)" using IH1 by simp_all
then show "s ∈ Plus r1 r2 → snd (simp (Plus r1 r2)) v" using NZero_NZero
by (simp_all add: Posix_Plus1)
next
case (Right)
then have "v = Right v2" "s ∈ r2 → (snd (simp r2) v2)" "s ∉ lang r1" using IH2 L_fst_simp by auto
then show "s ∈ Plus r1 r2 → snd (simp (Plus r1 r2)) v" using NZero_NZero
by (simp_all add: Posix_Plus2)
qed
qed
next
case (Times r1 r2 s v)
have IH1: "⋀s v. s ∈ fst (simp r1) → v ⟹ s ∈ r1 → snd (simp r1) v" by fact
have IH2: "⋀s v. s ∈ fst (simp r2) → v ⟹ s ∈ r2 → snd (simp r2) v" by fact
have as: "s ∈ fst (simp (Times r1 r2)) → v" by fact
consider (One_One) "fst (simp r1) = One" "fst (simp r2) = One"
| (One_NOne) "fst (simp r1) = One" "fst (simp r2) ≠ One"
| (NOne_One) "fst (simp r1) ≠ One" "fst (simp r2) = One"
| (NOne_NOne) "fst (simp r1) ≠ One" "fst (simp r2) ≠ One" by auto
then show "s ∈ Times r1 r2 → snd (simp (Times r1 r2)) v"
proof(cases)
case (One_One)
with as have b: "s ∈ One → v" by simp
from b have "s ∈ r1 → snd (simp r1) v" using IH1 One_One by simp
moreover
from b have c: "s = []" "v = Void" using Posix_elims(2) by auto
moreover
have "[] ∈ One → Void" by (simp add: Posix_One)
then have "[] ∈ fst (simp r2) → Void" using One_One by simp
then have "[] ∈ r2 → snd (simp r2) Void" using IH2 by simp
ultimately have "([] @ []) ∈ Times r1 r2 → Seq (snd (simp r1) Void) (snd (simp r2) Void)"
using Posix_Times by blast
then show "s ∈ Times r1 r2 → snd (simp (Times r1 r2)) v" using c One_One by simp
next
case (One_NOne)
with as have b: "s ∈ fst (simp r2) → v" by simp
from b have "s ∈ r2 → snd (simp r2) v" using IH2 One_NOne by simp
moreover
have "[] ∈ One → Void" by (simp add: Posix_One)
then have "[] ∈ fst (simp r1) → Void" using One_NOne by simp
then have "[] ∈ r1 → snd (simp r1) Void" using IH1 by simp
moreover
from One_NOne(1) have "lang (fst (simp r1)) = {[]}" by simp
then have "lang r1 = {[]}" by (simp add: L_fst_simp[symmetric])
ultimately have "([] @ s) ∈ Times r1 r2 → Seq (snd (simp r1) Void) (snd (simp r2) v)"
by(rule_tac Posix_Times) auto
then show "s ∈ Times r1 r2 → snd (simp (Times r1 r2)) v" using One_NOne by simp
next
case (NOne_One)
with as have "s ∈ fst (simp r1) → v" by simp
with IH1 have "s ∈ r1 → snd (simp r1) v" by simp
moreover
have "[] ∈ One → Void" by (simp add: Posix_One)
then have "[] ∈ fst (simp r2) → Void" using NOne_One by simp
then have "[] ∈ r2 → snd (simp r2) Void" using IH2 by simp
ultimately have "(s @ []) ∈ Times r1 r2 → Seq (snd (simp r1) v) (snd (simp r2) Void)"
by(rule_tac Posix_Times) auto
then show "s ∈ Times r1 r2 → snd (simp (Times r1 r2)) v" using NOne_One by simp
next
case (NOne_NOne)
with as have "s ∈ Times (fst (simp r1)) (fst (simp r2)) → v" by simp
then obtain s1 s2 v1 v2 where eqs: "s = s1 @ s2" "v = Seq v1 v2"
"s1 ∈ (fst (simp r1)) → v1" "s2 ∈ (fst (simp r2)) → v2"
"¬ (∃s⇩3 s⇩4. s⇩3 ≠ [] ∧ s⇩3 @ s⇩4 = s2 ∧ s1 @ s⇩3 ∈ lang r1 ∧ s⇩4 ∈ lang r2)"
by (erule_tac Posix_elims(5)) (auto simp add: L_fst_simp[symmetric])
then have "s1 ∈ r1 → (snd (simp r1) v1)" "s2 ∈ r2 → (snd (simp r2) v2)"
using IH1 IH2 by auto
then show "s ∈ Times r1 r2 → snd (simp (Times r1 r2)) v" using eqs NOne_NOne
by(auto intro: Posix_Times)
qed
qed (simp_all)

lemma slexer_correctness:
shows "slexer r s = lexer r s"
proof(induct s arbitrary: r)
case Nil
show "slexer r [] = lexer r []" by simp
next
case (Cons c s r)
have IH: "⋀r. slexer r s = lexer r s" by fact
show "slexer r (c # s) = lexer r (c # s)"
proof (cases "s ∈ lang (deriv c r)")
case True
assume a1: "s ∈ lang (deriv c r)"
then obtain v1 where a2: "lexer (deriv c r) s = Some v1" "s ∈ deriv c r → v1"
using lexer_correct_Some by auto
from a1 have "s ∈ lang (fst (simp (deriv c r)))" using L_fst_simp[symmetric] by auto
then obtain v2 where a3: "lexer (fst (simp (deriv c r))) s = Some v2" "s ∈ (fst (simp (deriv c r))) → v2"
using lexer_correct_Some by auto
then have a4: "slexer (fst (simp (deriv c r))) s = Some v2" using IH by simp
from a3(2) have "s ∈ deriv c r → (snd (simp (deriv c r))) v2" using Posix_simp by auto
with a2(2) have "v1 = (snd (simp (deriv c r))) v2" using Posix_determ by auto
with a2(1) a4 show "slexer r (c # s) = lexer r (c # s)" by (auto split: prod.split)
next
case False
assume b1: "s ∉ lang (deriv c r)"
then have "lexer (deriv c r) s = None" using lexer_correct_None by auto
moreover
from b1 have "s ∉ lang (fst (simp (deriv c r)))" using L_fst_simp[symmetric] by auto
then have "lexer (fst (simp (deriv c r))) s = None" using lexer_correct_None by auto
then have "slexer (fst (simp (deriv c r))) s = None" using IH by simp
ultimately show "slexer r (c # s) = lexer r (c # s)"
by (simp del: slexer.simps add: slexer_better_simp)
qed
qed

end