Session X86_Semantics

Theory BitByte

(*  Title:       X86 instruction semantics and basic block symbolic execution
    Authors:     Freek Verbeek, Abhijith Bharadwaj, Joshua Bockenek, Ian Roessle, Timmy Weerwag, Binoy Ravindran
    Year:        2020
    Maintainer:  Freek Verbeek (freek@vt.edu)
*)

section "Bit and byte-level theorems"

theory BitByte
  imports Main "Word_Lib.Syntax_Bundles" "Word_Lib.Bit_Shifts_Infix_Syntax" "Word_Lib.Bitwise"
begin

subsection ‹Basics›

unbundle bit_operations_syntax
unbundle bit_projection_infix_syntax

definition take_bits :: "nat  nat  'a::len word  'a::len word" ("_,__" 51) ― ‹little-endian›
  where "take_bits l h w  (w >> l) AND mask (h-l)"

text @{term "take_bits l h w"} takes a subset of bits from word @{term w}, from low (inclusive) to high (exclusive).
  For example, @{term [show_types] "take_bits 2 5 (28::8word) = 7"}.
›

definition take_byte :: "nat  'a::len word  8word" ― ‹little-endian›
  where "take_byte n w  ucast (n*8,n*8+8w)"

text @{term "take_byte n w"} takes the nth byte from word @{term w}.
  For example, @{term [show_types] "take_byte 1 (42 << 8::16word) = 42"}.
›

definition overwrite :: "nat  nat  'a::len word  'a::len word  'a::len word"
  where "overwrite l h w0 w1  ((h,LENGTH('a)w0) << h) OR ((l,hw1) << l) OR (0,lw0)"

text @{term "overwrite l h w0 w1"} overwrites low (inclusive) to high (exclusive) bits in
  word @{term w0} with bits from word @{term w1}.
  For example, @{term [show_types] "(overwrite 2 4 28 227 :: 8word) = 16"}.
›


text ‹We prove some theorems about taking the nth bit/byte of an operation. These are useful to prove
      equaltiy between different words, by first applying rule @{thm word_eqI}.›

lemma bit_take_bits_iff [bit_simps]:
  (l,hw) !! n  n < LENGTH('a)  n < h - l  w !! (n + l) for w :: 'a::len word›
  by (simp add: take_bits_def bit_simps ac_simps)

lemma bit_take_byte_iff [bit_simps]:
  ‹take_byte m w !! n  n < LENGTH('a)  n < 8  w !! (n + m * 8) for w :: 'a::len word›
  by (auto simp add: take_byte_def bit_simps)

lemma bit_overwrite_iff [bit_simps]:
  ‹overwrite l h w0 w1 !! n  n < LENGTH('a) 
    (if l  n  n < h then w1 else w0) !! n
    for w0 w1 :: 'a::len word›
  by (auto simp add: overwrite_def bit_simps)

lemma nth_takebits:
  fixes w :: "'a::len word"
  shows "(l,hw) !! n = (if n < LENGTH('a)  n < h - l then w !! (n + l) else False)"
  by (auto simp add: bit_simps)

lemma nth_takebyte:
  fixes w :: "'a::len word"
  shows "take_byte (n div 8) w !! (n mod 8) = (if n mod 8 < LENGTH('a) then w!!n else False )"
  by (simp add: bit_simps)

lemma nth_take_byte_overwrite:
  fixes v v' :: "'a::len word"
  shows "take_byte n (overwrite l h v v') !! i = (if i + n * 8 < l  i + n * 8  h then take_byte n v !! i else take_byte n v' !! i)"
  by (auto simp add: bit_simps dest: bit_imp_le_length)

lemma nth_bitNOT:
  fixes a :: "'a::len word"
  shows "(NOT a) !! n  (if n < LENGTH('a)  then ¬(a !! n) else False)"
  by (simp add: bit_simps)


text ‹Various simplification rules›

lemma ucast_take_bits:
  fixes w :: "'a::len word"
  assumes "h = LENGTH('b)"
      and "LENGTH('b)  LENGTH('a)"
    shows "ucast (0,hw) = (ucast w ::'b :: len word)"
  apply (rule bit_word_eqI)
  using assms
  apply (simp add: bit_simps)
  done

lemma take_bits_ucast:
  fixes w :: "'b::len word"
  assumes "h = LENGTH('b)"
  shows "(0,h (ucast w ::'a :: len word)) = (ucast w ::'a :: len word)"
  apply (rule bit_word_eqI)
  using assms
  apply (auto simp add: bit_simps dest: bit_imp_le_length)
  done

lemma take_bits_take_bits:
  fixes w :: "'a::len word"
  shows "(l,h(l',h'w)) = (if min LENGTH('a) h  h' - l' then l+l',h'w else (l+l',l'+min LENGTH('a) hw))"
  apply (rule bit_word_eqI)
  apply (simp add: bit_simps ac_simps)
  apply auto
  done

lemma take_bits_overwrite:
  shows "l,h(overwrite l h w0 w1) = l,hw1"
  apply (rule bit_word_eqI)
  apply (simp add: bit_simps ac_simps)
  apply (auto dest: bit_imp_le_length)
  done

lemma overwrite_0_take_bits_0:
  shows "overwrite 0 h (0,hw0) w1 = 0,hw1"
  apply (rule bit_word_eqI)
  apply (simp add: bit_simps ac_simps)
  done

lemma take_byte_shiftlr_256:
  fixes v :: "256 word"
  assumes "m  n"
  shows "take_byte n (v << m*8) = (if (n+1)*8  256 then take_byte (n-m) v else 0)"
  apply (rule bit_word_eqI)
  using assms
  apply (simp add: bit_simps)
  apply (simp add: algebra_simps)
  done


subsection ‹ Take\_Bits and arithmetic›

text ‹This definition is based on @{thm to_bl_plus_carry}, which formulates addition as bitwise operations using @{term xor3} and @{term carry}.›

definition bitwise_add :: "(bool × bool) list  bool  bool list"
  where "bitwise_add x c  foldr (λ(x, y) res car. xor3 x y car # res (carry x y car)) x (λ_. []) c"

lemma length_foldr_bitwise_add:
  shows "length (bitwise_add x c) = length x"
  unfolding bitwise_add_def
  by(induct x arbitrary: c) auto

text ‹This is the "heart" of the proof: bitwise addition of two appended zipped lists can be expressed as
      two consecutive bitwise additions.
      Here, I need to make the assumption that the final carry is False.
 ›
lemma bitwise_add_append:
  assumes "x = []  ¬carry (fst (last x)) (snd (last x)) True"
  shows "bitwise_add (x @ y) (x[]  c) = bitwise_add x (x[]  c) @ bitwise_add y False"
  using assms
  unfolding bitwise_add_def
  by(induct x arbitrary: c) (auto simp add: case_prod_unfold xor3_def carry_def split: if_split_asm)

lemma bitwise_add_take_append:
  shows "take (length x) (bitwise_add (x @ y) c) = bitwise_add x c"
  unfolding bitwise_add_def
  by(induct x arbitrary: c) (auto simp add: case_prod_unfold xor3_def carry_def split: if_split_asm)

lemma bitwise_add_zero:
  shows "bitwise_add (replicate n (False, False)) False = replicate n False "
  unfolding bitwise_add_def
  by(induct n) (auto simp add: xor3_def carry_def)

lemma bitwise_add_take:
  shows "take n (bitwise_add x c) = bitwise_add (take n x) c"
  unfolding bitwise_add_def
  by (induct n arbitrary: x c,auto)
     (metis append_take_drop_id bitwise_add_def bitwise_add_take_append diff_is_0_eq' length_foldr_bitwise_add length_take nat_le_linear rev_min_pm1 take_all)


lemma fst_hd_drop_zip:
  assumes "n < length x"
      and "length x = length y"
  shows "fst (hd (drop n (zip x y))) = hd (drop n x)"
  using assms
  by (induct x arbitrary: n y,auto)
     (metis (no_types, lifting) Cons_nth_drop_Suc drop_zip fst_conv length_Cons list.sel(1) zip_Cons_Cons)

lemma snd_hd_drop_zip:
  assumes "n < length x"
      and "length x = length y"
  shows "snd (hd (drop n (zip x y))) = hd (drop n y)"
  using assms
  by (induct x arbitrary: n y,auto)
     (metis (no_types, lifting) Cons_nth_drop_Suc drop_zip snd_conv length_Cons list.sel(1) zip_Cons_Cons)

text ‹
  Ucasting of @{term "a+b"} can be rewritten to taking bits of @{term a} and @{term b}.
›

lemma ucast_plus:
  fixes a b :: "'a::len word"
  assumes "LENGTH('a) > LENGTH('b)"
  shows "(ucast (a + b) ::'b::len word) = (ucast a + ucast b::'b::len word)"
proof-
  have "to_bl (ucast (a + b) ::'b::len word) = to_bl (ucast a + ucast b::'b::len word)"
    using assms
    apply (auto simp add: to_bl_ucast to_bl_plus_carry word_rep_drop length_foldr_bitwise_add drop_zip[symmetric] rev_drop bitwise_add_def simp del: foldr_replicate foldr_append)
    apply (simp only: bitwise_add_def[symmetric] length_foldr_bitwise_add)
    by (auto simp add: drop_take bitwise_add_take[symmetric] rev_take length_foldr_bitwise_add)
  thus ?thesis
    using word_bl.Rep_eqD
    by blast
qed

lemma ucast_uminus:
  fixes a b :: "'a::len word"
assumes "LENGTH('a) > LENGTH('b)"
  shows "ucast (- a) = (- ucast a :: 'b::len word)"
  apply (subst twos_complement)+
  apply (subst word_succ_p1)+
  apply (subst ucast_plus)
  apply (rule assms)
   apply simp
   apply (rule word_eqI)
   apply (auto simp add: word_size nth_ucast nth_bitNOT)
   using assms order.strict_trans
   by blast

lemma ucast_minus:
  fixes a b :: "'a::len word"
  assumes "LENGTH('a) > LENGTH('b)"
  shows "(ucast (a - b) ::'b::len word) = (ucast a - ucast b::'b::len word)"
  using ucast_plus[OF assms,of a "-b"] ucast_uminus[OF assms,of b]
  by auto

lemma to_bl_takebits:
  fixes a :: "'a::len word"
  shows "to_bl (0,ha) = replicate (LENGTH('a) - h) False @ drop (LENGTH('a) - h) (to_bl a)"
  apply (auto simp add: take_bits_def bl_word_and to_bl_mask)
  apply (rule nth_equalityI)
  by (auto simp add: min_def nth_append)


text ‹ All simplification rules that are used during symbolic execution.›
lemmas BitByte_simps = ucast_plus ucast_minus ucast_uminus take_bits_overwrite take_bits_take_bits
  ucast_take_bits overwrite_0_take_bits_0 mask_eq_exp_minus_1
  ucast_down_ucast_id is_down take_bits_ucast ucast_up_ucast_id is_up

text ‹Simplification for immediate (numeral) values.›
lemmas take_bits_numeral[simp] = take_bits_def[of _ _ "numeral n"] for n
lemmas take_bits_num0[simp] = take_bits_def[of _ _ "0"] for n
lemmas take_bits_num1[simp] = take_bits_def[of _ _ "1"] for n
lemmas overwrite_numeral_numeral[simp] = overwrite_def[of _ _ "numeral n" "numeral m"] for n m
lemmas overwrite_num0_numeral[simp] = overwrite_def[of _ _ 0 "numeral m"] for n m
lemmas overwrite_numeral_num0[simp] = overwrite_def[of _ _ "numeral m" 0] for n m
lemmas overwrite_numeral_00[simp] = overwrite_def[of _ _ 0 0]

end

Theory Memory

(*  Title:       X86 instruction semantics and basic block symbolic execution
    Authors:     Freek Verbeek, Abhijith Bharadwaj, Joshua Bockenek, Ian Roessle, Timmy Weerwag, Binoy Ravindran
    Year:        2020
    Maintainer:  Freek Verbeek (freek@vt.edu)
*)

section "Memory-related theorems"

theory Memory
  imports BitByte
begin

context
  fixes dummy_type :: "'a::len"
begin

primrec read_bytes :: "('a word  8 word)  'a word  nat  8word list"
  where "read_bytes m a 0   = []"
  | "read_bytes m a (Suc n) = m (a + of_nat n) # read_bytes m a n"

text ‹Read bytes from memory. Memory is represented by a term of @{typ "64 word  8 word"}.
      Given an address @{term [show_types] "a::64 word"} and a size @{term n}, retrieve the bytes in
      the order they are stored in memory.›

definition region_addresses :: "'a word  nat  'a word set"
  where "region_addresses a si  {a' .  i < si . a' = a + of_nat (si - i - 1)}"

text ‹The set of addresses belonging to a region starting at address @{term "a::64 word"} of @{term si} bytes.›

definition region_overflow :: "'a word  nat  bool"
  where "region_overflow a si  unat a + si  2^LENGTH('a)"

text ‹An overflow occurs if the address plus the size is greater equal @{term "2^64"}

definition enclosed :: "'a word  nat  'a word  nat  bool"
  where "enclosed a' si' a si  unat a + si < 2^LENGTH('a)  unat a  unat a'  unat a' + si'  unat a + si"

text ‹A region is enclosed in another if its @{const region_addresses} is a subset of the other.›

definition separate :: "'a word  nat  'a word  nat  bool"
  where "separate a si a' si'  si 0  si'  0  region_addresses a si  region_addresses a' si' = {}"

text ‹A region is separate from another if they do not overlap.›



lemma region_addresses_iff: "a'  region_addresses a si  unat (a' - a) < si"
  apply (auto simp add: region_addresses_def unsigned_of_nat)
   apply (metis diff_Suc_less le_less_trans less_imp_Suc_add take_bit_nat_less_eq_self zero_less_Suc)
  by (smt (z3) add.commute add_Suc_right add_diff_cancel_left' diff_add_cancel less_add_Suc2 less_imp_Suc_add word_unat.Rep_inverse)

lemma notin_region_addresses:
  assumes "x  region_addresses a si"
  shows "unat x < unat a  unat a + si  unat x"
  by (metis assms add.commute less_diff_conv2 not_le_imp_less region_addresses_iff unat_sub_if')

lemma notin_region_addresses_sub:
  assumes "x  region_addresses a si"
  shows "unat (x - a') < unat (a - a')  unat (a - a') + si  unat (x - a')"
  using assms notin_region_addresses region_addresses_iff by auto

lemma region_addresses_eq_empty_iff: "region_addresses a si = {}  si = 0"
  by (metis region_addresses_iff add_diff_cancel_left' ex_in_conv neq0_conv not_less0 unsigned_0)

lemma length_read_bytes:
  shows "length (read_bytes m a si) = si"
  by (induct si,auto)

lemma nth_read_bytes:
assumes "n < si"
shows "read_bytes m a si ! n = m (a + of_nat (si - 1 - n))"
  using assms
  apply (induct si arbitrary: n,auto)
  subgoal for si n
    by(cases n,auto)
  done

text ‹Writing to memory occurs via function @{const override_on}.
      In case of enclosure, reading bytes from memory overridden on a set of region addresses can
      be simplified to reading bytes from the overwritten memory only.
      In case of separation, reading bytes from overridden memory can be simplified to reading
      from the original memory.›

lemma read_bytes_override_on_enclosed:
assumes "offset'  offset"
    and "si'  si"
    and "unat offset + si'  si + unat offset'"
  shows "read_bytes (override_on m m' (region_addresses (a - offset) si)) (a - offset') si' = read_bytes m' (a - offset') si'"
proof-
  {
    fix i
    assume 1: "i < si'"
    let ?i = "(si + i + unat offset') - unat offset - si'"

    have "i + unat offset' < si' + unat offset"
      using 1 assms(1)
      by unat_arith
    hence 2: "si + i + unat offset' - (unat offset + si') < si"
      using diff_less[of "(si' + unat offset - i) - unat offset'" si] assms(3)
      by auto
    moreover
    have "of_nat (si' - Suc i) - offset' = of_nat (si - Suc ?i) - offset"
      using assms 1 2 by (auto simp add: of_nat_diff)
    ultimately
    have " i' < si. of_nat (si' - Suc i) - offset' = of_nat (si - Suc i') - offset"
      by auto
  }
  note 1 = this
  show ?thesis
    apply (rule nth_equalityI)
    using 1
    by (auto simp add: length_read_bytes nth_read_bytes override_on_def region_addresses_def)
qed

lemmas read_bytes_override_on = read_bytes_override_on_enclosed[where offset=0 and offset'=0,simplified]

lemma read_bytes_override_on_enclosed_plus:
  assumes "unat offset + si'  si"
      and "si  2^LENGTH('a)"
  shows "read_bytes (override_on m m' (region_addresses a si)) (offset+a) si' = read_bytes m' (offset+a) si'"
proof-
  {
    fix i
    have "i < si'   i' < si. offset + (of_nat (si' - Suc i)::'a word) = of_nat (si - Suc i')"
      apply (rule exI[of _"si - si' + i - unat offset"])
      using assms by (auto simp add: of_nat_diff)
  }
  note 1 = this
  show ?thesis
    apply (rule nth_equalityI)
    using assms 1
    by (auto simp add: override_on_def length_read_bytes nth_read_bytes region_addresses_def)
qed


lemma read_bytes_override_on_separate:
assumes "separate a si a' si'"
  shows "read_bytes (override_on m m' (region_addresses a si)) a' si' = read_bytes m a' si'"
  apply (rule nth_equalityI)
  using assms
  by (auto simp add: length_read_bytes nth_read_bytes override_on_def separate_def region_addresses_def)


text‹Bytes are are written to memory one-by-one, then read by @{const read_bytes} producing a list of bytes.
    That list is concatenated again using @{const word_rcat}. Writing @{term si} bytes of word @{term w} into
    memory, reading the byte-list and concatenating again produces @{term si} bytes of the original word.›


lemma word_rcat_read_bytes_enclosed:
  fixes w :: "'b::len word"
  assumes "LENGTH('b)  2^LENGTH('a)"
    and "unat offset + si  2^LENGTH('a)"
  shows "word_rcat (read_bytes (λa'. take_byte (unat (a' - a)) w) (a + offset) si) = unat offset * 8,(unat offset + si) * 8w"
  apply (rule word_eqI)
  using assms
  apply (auto simp add: test_bit_rcat word_size length_read_bytes rev_nth nth_read_bytes unat_of_nat nth_takebyte unat_word_ariths )
    apply (auto simp add: take_byte_def nth_ucast nth_takebits take_bit_eq_mod split: if_split_asm)[1]
   apply (auto simp add: nth_takebits split: if_split_asm)[1]
  apply (auto simp add: take_byte_def nth_ucast nth_takebits split: if_split_asm)[1]
  by (auto simp add: rev_nth length_read_bytes take_byte_def nth_ucast nth_takebits nth_read_bytes unat_word_ariths unat_of_nat take_bit_eq_mod split: if_split_asm)

lemmas word_rcat_read_bytes = word_rcat_read_bytes_enclosed[where offset=0,simplified]



text ‹The following theorems allow reasoning over enclosure and separation, for example as linear arithmetic.›

lemma enclosed_spec:
  assumes enclosed: "enclosed a' si' a si"
      and x_in: "x  region_addresses a' si'"
    shows "x  region_addresses a si"
proof -
  from x_in have "unat (x - a') < si'"
    using region_addresses_iff by blast
  with enclosed have "unat (x - a) < si"
    unfolding enclosed_def by (auto simp add: unat_sub_if' split: if_split_asm)
  then show ?thesis
    using region_addresses_iff by blast
qed

lemma address_in_enclosed_region_as_linarith:
  assumes "enclosed a' si' a si"
      and "x  region_addresses a' si'"
    shows "a  x  a'  x  x < a' + of_nat si'  x < a + of_nat si"
  using assms
  by (auto simp add: enclosed_def region_addresses_def word_le_nat_alt word_less_nat_alt unat_of_nat unat_word_ariths unat_sub_if' take_bit_eq_mod)


lemma address_of_enclosed_region_ge:
  assumes "enclosed a' si' a si"
    shows "a'  a"
  using assms word_le_nat_alt
  by (auto simp add: enclosed_def)

lemma address_in_enclosed_region:
  assumes "enclosed a' si' a si"
      and "x  region_addresses a' si'"
    shows "unat (x - a)  unat (a' - a)  unat (a' - a) + si' > unat (x - a)  unat (x - a) < si"
  by (smt (z3) address_in_enclosed_region_as_linarith add_diff_cancel_left' address_of_enclosed_region_ge assms(1) assms(2) diff_diff_add enclosed_spec le_iff_add nat_add_left_cancel_less region_addresses_iff unat_sub_if' word_le_minus_mono_left word_unat.Rep_inverse word_unat_less_le)

lemma enclosed_minus_minus:
  fixes a :: "'a word"
  assumes "offset  offset'"
      and "unat offset - si  unat offset' - si'"
      and "unat offset'  si'"
      and "unat offset  si"
      and "a  offset"
    shows "enclosed (a - offset') si' (a - offset) si"
proof-
  have "unat offset'  unat a"
    using assms(1,5)
    by unat_arith
  thus ?thesis
    using assms
    apply (auto simp add: enclosed_def unat_sub_if_size word_size)
    apply unat_arith
    using diff_le_mono2[of "unat offset - si" "unat offset' - si'" "unat a"]
    apply (auto simp add: enclosed_def unat_sub_if_size word_size)
     by unat_arith+
qed

lemma enclosed_plus:
  fixes a :: "'a word"
  assumes "si' < si"
      and "unat a + si < 2^LENGTH('a)"
    shows "enclosed a si' a si"
  using assms
  by (auto simp add: enclosed_def)

lemma separate_symm: "separate a si a' si' = separate a' si' a si"
  by (metis inf.commute separate_def)

lemma separate_iff: "separate a si a' si'  si > 0  si' > 0  unat (a' - a)  si  unat (a - a')  si'"
proof
  assume assm: "separate a si a' si'"
  have "unat (a' - a)  si" if "separate a si a' si'" for a si a' si'
  proof (rule ccontr)
    assume "¬ unat (a' - a)  si"
    then have "a'  region_addresses a si"
      by (simp add: region_addresses_iff)
    moreover from that have "a'  region_addresses a' si'"
      using region_addresses_iff separate_def by auto
    ultimately have "¬separate a si a' si'"
      by (meson disjoint_iff separate_def)
    with that show False
      by blast
  qed
  with assm have "unat (a' - a)  si" and "unat (a - a')  si'"
    using separate_symm by auto
  with assm show "si > 0  si' > 0  unat (a' - a)  si  unat (a - a')  si'"
    using separate_def by auto
next
  assume assm: "si > 0  si' > 0  unat (a' - a)  si  unat (a - a')  si'"
  then have "unat (x - a')  si'" if "unat (x - a) < si" for x
    using that apply (auto simp add: unat_sub_if' split: if_split_asm)
     apply (meson Nat.le_diff_conv2 add_increasing le_less_trans less_imp_le_nat unsigned_greater_eq unsigned_less)
    by (smt (z3) Nat.le_diff_conv2 add_leD2 le_less_trans linorder_not_less nat_le_linear unat_lt2p)
  then have "region_addresses a si  region_addresses a' si' = {}"
    by (simp add: region_addresses_iff disjoint_iff leD)
  with assm show "separate a si a' si'"
    by (simp add: separate_def)
qed

lemma separate_as_linarith:
assumes "¬region_overflow a si"
    and "¬region_overflow a' si'"
  shows "separate a si a' si'  0 < si  0 < si'  (a + of_nat si  a'  a' + of_nat si'  a)"
    (is "?lhs  ?rhs")
proof
  assume ?lhs then show ?rhs
   by (meson separate_iff le_less le_plus not_le_imp_less word_of_nat_le)
next
  have *: "separate a si a' si'"
    if "si > 0" and "si' > 0" and "a + of_nat si  a'"
      and "¬region_overflow a si" and "¬region_overflow a' si'"
    for a si a' si'
  proof -
    from that have "unat a + si < 2^LENGTH('a)" and "unat a' + si' < 2^LENGTH('a)"
      by (meson not_le region_overflow_def)+
    have "x < a + of_nat si" if "x  region_addresses a si" for x
      by (smt (z3) Abs_fnat_hom_add ‹unat a + si < 2 ^ LENGTH('a) add.commute dual_order.trans le_add1 less_diff_conv2 not_less region_addresses_iff that unat_of_nat_len unat_sub_if' word_less_iff_unsigned word_unat.Rep_inverse)
    moreover have "x  a'" if "x  region_addresses a' si'" for x
      using address_in_enclosed_region_as_linarith enclosed_def ‹unat a' + si' < 2 ^ LENGTH('a) that by blast
    ultimately show ?thesis
      using separate_def that by fastforce
  qed
  assume ?rhs then show ?lhs
    by (meson "*" assms separate_symm)
qed

text ‹Compute separation in case the addresses and sizes are immediate values.›
lemmas separate_as_linarith_numeral [simp] =
  separate_as_linarith [of "numeral a::'a word" "numeral si" "numeral a'::'a word" "numeral si'"] for a si a' si'
lemmas separate_as_linarith_numeral_1 [simp] =
  separate_as_linarith [of "numeral a::'a word" "numeral si" "numeral a'::'a word" "Suc 0"] for a si a'
lemmas separate_as_linarith_numeral1_ [simp] =
  separate_as_linarith [of "numeral a::'a word" "Suc 0" "numeral a'::'a word" "numeral si'"] for a a' si'
lemmas separate_as_linarith_numeral11 [simp] =
  separate_as_linarith [of "numeral a::'a word" "Suc 0" "numeral a'::'a word" "Suc 0"] for a a'
lemmas region_overflow_numeral[simp] =
  region_overflow_def [of "numeral a::'a word" "numeral si"] for a si
lemmas region_overflow_numeral1[simp] =
  region_overflow_def [of "numeral a::'a word" "Suc 0"] for a


lemma separate_plus_none:
  assumes "si'  unat offset"
      and "0 < si"
      and "0 < si'"
      and "unat offset + si  2^LENGTH('a)"
    shows "separate (offset + a) si a si'"
  using assms apply (auto simp add: separate_iff)
  by (smt (z3) Nat.diff_diff_right add.commute add_leD1 diff_0 diff_is_0_eq diff_zero not_gr_zero unat_sub_if' unsigned_0)

lemmas unat_minus = unat_sub_if'[of 0,simplified]

lemma separate_minus_minus':
  assumes "si  0"
      and "si'  0"
      and "unat offset  si"
      and "unat offset'  si'"
      and "unat offset - si  unat offset'"
    shows "separate (a - offset) si (a - offset') si'"
  using assms apply (auto simp add: separate_iff)
   apply (metis Nat.le_diff_conv2 add.commute add_leD2 unat_sub_if')
  by (smt (z3) add.commute add_diff_cancel_right' diff_add_cancel diff_le_self diff_less less_le_trans nat_le_linear not_le unat_sub_if')

lemma separate_minus_minus:
  assumes "si  0"
      and "si'  0"
      and "unat offset  si"
      and "unat offset'  si'"
      and "unat offset - si  unat offset'  unat offset' - si'  unat offset"
    shows "separate (a - offset) si (a - offset') si'"
  by (meson assms separate_minus_minus' separate_symm)

lemma separate_minus_none:
  assumes "si  0"
      and "si'  0"
      and "unat offset  si"
      and "si'  2^LENGTH('a) - unat offset"
    shows "separate (a - offset) si a si'"
proof -
  have "0 < si" and "0 < si'"
    using assms(1,2) by blast+
  moreover have "¬ offset  0"
    using assms(1) assms(3) by fastforce
  ultimately show ?thesis
    by (smt (z3) add_diff_cancel_left' assms(3,4) diff_diff_eq2 diff_zero le_add_diff_inverse less_or_eq_imp_le separate_iff unat_sub_if' unsigned_0 unsigned_less word_less_eq_iff_unsigned)
qed


text ‹The following theorems are used during symbolic execution to determine whether two regions are separate.›

lemmas separate_simps = separate_plus_none separate_minus_none separate_minus_minus

end
end

Theory State

(*  Title:       X86 instruction semantics and basic block symbolic execution
    Authors:     Freek Verbeek, Abhijith Bharadwaj, Joshua Bockenek, Ian Roessle, Timmy Weerwag, Binoy Ravindran
    Year:        2020
    Maintainer:  Freek Verbeek (freek@vt.edu)
*)

section "Concrete state and instructions"

theory State
  imports Main Memory
begin

text ‹A state consists of registers, memory, flags and a rip. Some design considerations here:
\begin{itemize}
\item All register values are 256 bits. We could also distinguish 64 bits registers, 128 registers etc.
      That would increase complexity in proofs and datastructures. The cost of using 256 everywhere is
      that a goal typically will have some casted 64 bits values.
\item The instruction pointer RIP is a special 64-bit register outside of the normal register set.
\item Strings are used for registers and flags. We would prefer an enumerative datatype, however, that would be
      extremely slow since there are roughly 100 register names.
\end{itemize}
›

record state =
  regs  :: "string   256word"
  mem   :: "64 word  8 word"
  flags :: "string   bool"
  rip   :: "64 word"

definition real_reg :: "string  bool × string × nat × nat"
  where "real_reg reg 
  ― ‹TODO: xmm, ymm, etc.›
  case reg of
  ― ‹rip›
    ''rip''    (True,  ''rip'', 0,64)
  ― ‹rax,rbx,rcx,rdx›
  | ''rax''    (True,  ''rax'', 0,64)
  | ''eax''    (True,  ''rax'', 0,32)
  | ''ax''     (False, ''rax'', 0,16)
  | ''ah''     (False, ''rax'', 8,16)
  | ''al''     (False, ''rax'', 0,8)
  | ''rbx''    (True,  ''rbx'', 0,64)
  | ''ebx''    (True,  ''rbx'', 0,32)
  | ''bx''     (False, ''rbx'', 0,16)
  | ''bh''     (False, ''rbx'', 8,16)
  | ''bl''     (False, ''rbx'', 0,8)
  | ''rcx''    (True,  ''rcx'', 0,64)
  | ''ecx''    (True,  ''rcx'', 0,32)
  | ''cx''     (False, ''rcx'', 0,16)
  | ''ch''     (False, ''rcx'', 8,16)
  | ''cl''     (False, ''rcx'', 0,8)
  | ''rdx''    (True,  ''rdx'', 0,64)
  | ''edx''    (True,  ''rdx'', 0,32)
  | ''dx''     (False, ''rdx'', 0,16)
  | ''dh''     (False, ''rdx'', 8,16)
  | ''dl''     (False, ''rdx'', 0,8)
  ― ‹RBP, RSP›
  | ''rbp''    (True,  ''rbp'', 0,64)
  | ''ebp''    (True,  ''rbp'', 0,32)
  | ''bp''     (False, ''rbp'', 0,16)
  | ''bpl''    (False, ''rbp'', 0,8)
  | ''rsp''    (True,  ''rsp'', 0,64)
  | ''esp''    (True,  ''rsp'', 0,32)
  | ''sp''     (False, ''rsp'', 0,16)
  | ''spl''    (False, ''rsp'', 0,8)
  ― ‹ RDI, RSI, R8 to R15›
  | ''rdi''    (True,  ''rdi'', 0,64)
  | ''edi''    (True,  ''rdi'', 0,32)
  | ''di''     (False, ''rdi'', 0,16)
  | ''dil''    (False, ''rdi'', 0,8)
  | ''rsi''    (True,  ''rsi'', 0,64)
  | ''esi''    (True,  ''rsi'', 0,32)
  | ''si''     (False, ''rsi'', 0,16)
  | ''sil''    (False, ''rsi'', 0,8)
  | ''r15''    (True,  ''r15'', 0,64)
  | ''r15d''   (True,  ''r15'', 0,32)
  | ''r15w''   (False, ''r15'', 0,16)
  | ''r15b''   (False, ''r15'', 0,8)
  | ''r14''    (True,  ''r14'', 0,64)
  | ''r14d''   (True,  ''r14'', 0,32)
  | ''r14w''   (False, ''r14'', 0,16)
  | ''r14b''   (False, ''r14'', 0,8)
  | ''r13''    (True,  ''r13'', 0,64)
  | ''r13d''   (True,  ''r13'', 0,32)
  | ''r13w''   (False, ''r13'', 0,16)
  | ''r13b''   (False, ''r13'', 0,8)
  | ''r12''    (True,  ''r12'', 0,64)
  | ''r12d''   (True,  ''r12'', 0,32)
  | ''r12w''   (False, ''r12'', 0,16)
  | ''r12b''   (False, ''r12'', 0,8)
  | ''r11''    (True,  ''r11'', 0,64)
  | ''r11d''   (True,  ''r11'', 0,32)
  | ''r11w''   (False, ''r11'', 0,16)
  | ''r11b''   (False, ''r11'', 0,8)
  | ''r10''    (True,  ''r10'', 0,64)
  | ''r10d''   (True,  ''r10'', 0,32)
  | ''r10w''   (False, ''r10'', 0,16)
  | ''r10b''   (False, ''r10'', 0,8)
  | ''r9''     (True,  ''r9'' , 0,64)
  | ''r9d''    (True,  ''r9'' , 0,32)
  | ''r9w''    (False, ''r9'' , 0,16)
  | ''r9b''    (False, ''r9'' , 0,8)
  | ''r8''     (True,  ''r8'' , 0,64)
  | ''r8d''    (True,  ''r8'' , 0,32)
  | ''r8w''    (False, ''r8'' , 0,16)
  | ''r8b''    (False, ''r8'' , 0,8)
  ― ‹xmm›
  | ''xmm0''   (True, ''xmm0'' , 0,128)
  | ''xmm1''   (True, ''xmm1'' , 0,128)
  | ''xmm2''   (True, ''xmm2'' , 0,128)
  | ''xmm3''   (True, ''xmm3'' , 0,128)
  | ''xmm4''   (True, ''xmm4'' , 0,128)
  | ''xmm5''   (True, ''xmm5'' , 0,128)
  | ''xmm6''   (True, ''xmm6'' , 0,128)
  | ''xmm7''   (True, ''xmm7'' , 0,128)
  | ''xmm8''   (True, ''xmm8'' , 0,128)
  | ''xmm9''   (True, ''xmm9'' , 0,128)
  | ''xmm10''   (True, ''xmm10'' , 0,128)
  | ''xmm11''   (True, ''xmm11'' , 0,128)
  | ''xmm12''   (True, ''xmm12'' , 0,128)
  | ''xmm13''   (True, ''xmm13'' , 0,128)
  | ''xmm14''   (True, ''xmm14'' , 0,128)
  | ''xmm15''   (True, ''xmm15'' , 0,128)
  "


text ‹x86 has register aliassing. For example, register EAX is the lower 32 bits of register RAX.
      This function map register aliasses to the ``real'' register. For example:

      @{term "real_reg ''ah'' = (False, ''rax'', 8,16)"}.

      This means that register AH is the second byte (bits 8 to 16) of register RAX.
      The bool @{const False} indicates that writing to AH does not overwrite the remainder of RAX.

      @{term "real_reg ''eax'' = (True, ''rax'', 0,32)"}.

      Register EAX is the lower 4 bytes of RAX. Writing to EAX means overwritting the remainder of RAX
      with zeroes.
›

definition reg_size :: "string  nat" ― ‹in bytes›
  where "reg_size reg  let (_,_,l,h) = real_reg reg in (h - l) div 8"


text‹ We now define functions for reading and writing from state.›

definition reg_read :: "state  string  256 word"
  where "reg_read σ reg 
      if reg = ''rip'' then ucast (rip σ) else
      if reg = '''' then 0 else ― ‹happens if no base register is used in an address›
      let (_,r,l,h) = real_reg reg in
        l,h(regs σ r)"

primrec fromBool :: "bool  'a :: len word"
  where
    "fromBool True = 1"
  | "fromBool False = 0"

definition flag_read :: "state  string  256 word"
  where "flag_read σ flag  fromBool (flags σ flag)"

definition mem_read :: "state  64 word  nat  256 word"
  where "mem_read σ a si  word_rcat (read_bytes (mem σ) a si)"


text ‹Doing state-updates occur through a tiny deeply embedded language of state updates. This allows us
      to reason over state updates through theorems.›

datatype StateUpdate =
    RegUpdate string "256 word"         ― ‹Write value to register›
  | FlagUpdate "string  bool"         ― ‹Update all flags at once›
  | RipUpdate "64 word"                 ― ‹Update instruction pointer with address›
  | MemUpdate "64 word" nat "256 word"  ― ‹Write a number of bytes of a value to the address›

primrec state_update
  where
    "state_update (RegUpdate reg val)  = (λ σ . σregs := (regs σ)(reg := val))"
  | "state_update (FlagUpdate  val)    = (λ σ . σflags := val)"
  | "state_update (RipUpdate a)        = (λ σ . σrip := a)"
  | "state_update (MemUpdate a si val) = (λ σ .
        let new = (λ a' . take_byte (unat (a' - a)) val) in
         σmem := override_on (mem σ) new (region_addresses a si))"

abbreviation RegUpdateSyntax ("_ :=r _" 30)
  where "RegUpdateSyntax reg val  RegUpdate reg val"
abbreviation MemUpdateSyntax ("_,_ :=m _" 30)
  where "MemUpdateSyntax a si val  MemUpdate a si val"
abbreviation FlagUpdateSyntax ("setFlags")
  where "FlagUpdateSyntax val  FlagUpdate val"
abbreviation RipUpdateSyntax ("setRip")
  where "RipUpdateSyntax val  RipUpdate val"

text ‹Executes a write to a register in terms of the tiny deeply embedded language above.›
definition reg_write
  where "reg_write reg val σ 
      let (b,r,l,h)  = real_reg reg;
          curr_val   = reg_read σ r;
          new_val    = if b then val else overwrite l h curr_val val in
        state_update (RegUpdate r new_val) σ"


text ‹A datatype for operands of instructions.›

datatype Operand =
    Imm "256 word"
  | Reg string
  | Flag string
  | Mem  nat    "64 word"   string    "string"      nat
      ― ‹size   offset      base-reg  index-reg    scale›

abbreviation mem_op_no_offset_no_index :: "string  (64 word × string × string × nat)" ("[_]1" 40)
  where "mem_op_no_offset_no_index r  (0,r,[],0)"

abbreviation mem_op_no_index :: "64 word  string  (64 word × string × string × nat)" ("[_ + _]2" 40)
  where "mem_op_no_index offset r  (offset,r,[],0)"

abbreviation mem_op :: "64 word  string  string  nat  (64 word × string × string × nat)" ("[_ + _ + _ * _]3" 40)
  where "mem_op offset r index scale (offset,r,index,scale)"

definition ymm_ptr ("YMMWORD PTR _")
  where "YMMWORD PTR x  case x of (offset,base,index,scale)  Mem 32 offset base index scale"

definition xmm_ptr ("XMMWORD PTR _")
  where "XMMWORD PTR x  case x of (offset,base,index,scale)  Mem 16 offset base index scale"

definition qword_ptr ("QWORD PTR _")
  where "QWORD PTR x  case x of (offset,base,index,scale)  Mem 8 offset base index scale"

definition dword_ptr ("DWORD PTR _")
  where "DWORD PTR x  case x of (offset,base,index,scale)  Mem 4 offset base index scale"

definition word_ptr ("WORD PTR _")
  where "WORD PTR x  case x of (offset,base,index,scale)  Mem 2 offset base index scale"

definition byte_ptr ("BYTE PTR _")
  where "BYTE PTR x  case x of (offset,base,index,scale)  Mem 1 offset base index scale"


primrec (nonexhaustive) operand_size :: "Operand  nat" ― ‹in bytes›
  where
    "operand_size (Reg r) = reg_size r"
  | "operand_size (Mem si _ _ _ _) = si"

fun resolve_address :: "state  64 word  char list  char list  nat  64 word"
  where "resolve_address σ offset base index scale =
      (let i = ucast (reg_read σ index);
           b = ucast (reg_read σ base) in
         offset + b + of_nat scale*i)"

primrec operand_read :: "state  Operand  256 word"
  where
    "operand_read σ (Imm i)  = i"
  | "operand_read σ (Reg r)  = reg_read σ r"
  | "operand_read σ (Flag f) = flag_read σ f"
  | "operand_read σ (Mem si offset base index scale) =
      (let a = resolve_address σ offset base index scale in
        mem_read σ a si
      )"


primrec state_with_updates :: "state  StateUpdate list  state" (infixl "with" 66)
  where
    "σ with [] = σ"
  | "(σ with (f#fs)) = state_update f (σ with fs)"

primrec (nonexhaustive) operand_write :: "Operand  256word  state  state"
  where
    "operand_write (Reg r)  v σ = reg_write r v σ"
  | "operand_write (Mem si offset base index scale) v σ =
      (let i = ucast (reg_read σ index);
           b = ucast (reg_read σ base);
           a = offset + b + of_nat scale*i in
        σ with [a,si :=m v]
      )"





text ‹ The following theorems simplify reading from state parts after doing updates to other state parts.›

lemma regs_reg_write:
  shows "regs (σ with ((r :=r w)#updates)) r' = (if r=r' then w else regs (σ with updates) r')"
  by (induct updates arbitrary: σ, auto simp add: case_prod_unfold Let_def)

lemma regs_mem_write:
  shows "regs (σ with ((a,si :=m v)#updates)) r = regs (σ with updates) r"
  by (induct updates arbitrary: σ, auto)

lemma regs_flag_write:
  shows "regs (σ with ((setFlags v)#updates)) r = regs (σ with updates) r"
  by (induct updates arbitrary: σ, auto)

lemma regs_rip_write:
  shows "regs (σ with ((setRip a)#updates)) f = regs (σ with updates) f"
  by (auto)


lemma flag_read_reg_write:
  shows "flag_read (σ with ((r :=r w)#updates)) f = flag_read (σ with updates) f"
  by (induct updates arbitrary: σ, auto simp add: flag_read_def)

lemma flag_read_mem_write:
  shows "flag_read (σ with ((a,si :=m v)#updates)) f = flag_read (σ with updates) f"
  by (induct updates arbitrary: σ, auto simp add: flag_read_def)

lemma flag_read_flag_write:
  shows "flag_read (σ with ((setFlags v)#updates)) = fromBool o v"
  by (induct updates arbitrary: σ, auto simp add: flag_read_def)

lemma flag_read_rip_write:
  shows "flag_read (σ with ((setRip a)#updates)) f = flag_read (σ with updates) f"
  by (auto simp add: flag_read_def)

lemma mem_read_reg_write:
  shows "mem_read (σ with ((r :=r w)#updates)) a si = mem_read (σ with updates) a si"
  by (auto simp add: mem_read_def read_bytes_def)

lemma mem_read_flag_write:
  shows "mem_read (σ with ((setFlags v)#updates)) a si = mem_read (σ with updates) a si"
  by (auto simp add: mem_read_def read_bytes_def)

lemma mem_read_rip_write:
  shows "mem_read (σ with ((setRip a')#updates)) a si = mem_read (σ with updates) a si"
  by (auto simp add: mem_read_def read_bytes_def)

lemma mem_read_mem_write_alias:
  assumes "si'  si"
      and "si  2^64"
  shows "mem_read (σ with ((a,si :=m v)#updates)) a si' = 0,si'*8 v"
  using assms
  by (auto simp add: mem_read_def word_rcat_read_bytes read_bytes_override_on_enclosed[where offset=0 and offset'=0,simplified])

lemma mem_read_mem_write_separate:
  assumes "separate a si a' si'"
shows "mem_read (σ with ((a,si :=m v)#updates)) a' si' = mem_read (σ with updates) a' si'"
  using assms
  by (auto simp add: mem_read_def read_bytes_override_on_separate)

lemma mem_read_mem_write_enclosed_minus:
  assumes "offset'  offset"
    and "si'  si"
    and "unat (offset - offset') + si' < 2^64"
    and "unat offset + si'  si + unat offset'"
  shows "mem_read (σ with ((a - offset,si :=m v)#updates)) (a - offset') si' = unat (offset - offset') * 8,unat (offset - offset') * 8 + si' * 8v"
  using assms
  by (auto simp add: mem_read_def read_bytes_override_on_enclosed word_rcat_read_bytes_enclosed[of "offset - offset'" si' "a - offset" v,simplified])

lemma mem_read_mem_write_enclosed_plus:
assumes "unat offset + si'  si"
    and "si < 2 ^ 64"
  shows "mem_read (σ with ((a,si :=m v)#updates)) (offset + a) si' = unat offset * 8,(unat offset + si') * 8v"
  using assms
  apply (auto simp add: mem_read_def read_bytes_override_on_enclosed_plus)
  using word_rcat_read_bytes_enclosed[of offset si' a v]
  by auto (simp add: add.commute)

lemma mem_read_mem_write_enclosed_plus2:
assumes "unat offset + si'  si"
    and "si < 2 ^ 64"
  shows "mem_read (σ with ((a,si :=m v)#updates)) (a + offset) si' = unat offset * 8,(unat offset + si') * 8v"
  using mem_read_mem_write_enclosed_plus[OF assms]
  by (auto simp add: add.commute)

lemma mem_read_mem_write_enclosed_numeral[simp]:
assumes "unat (numeral a' - numeral a::64 word) + (numeral si'::nat)  numeral si"
    and "numeral a'  (numeral a::64 word)"
    and "numeral si < (2 ^ 64::nat)"
  shows "mem_read (σ with ((numeral a,numeral si :=m v)#updates)) (numeral a') (numeral si') = unat (numeral a' - (numeral a::64 word)) * 8,(unat (numeral a' - (numeral a::64 word)) + (numeral si')) * 8v"
proof-
  have 1: "numeral a + (numeral a' - numeral a) = (numeral a'::64 word)"
    using assms(2) by (metis add.commute diff_add_cancel)
  thus ?thesis
    using mem_read_mem_write_enclosed_plus2[of "numeral a' - numeral a" "numeral si'" "numeral si" σ "numeral a" v updates,OF assms(1,3)]
    by auto
qed

lemma mem_read_mem_write_enclosed_numeral1_[simp]:
assumes "unat (numeral a' - numeral a::64 word) + (numeral si'::nat)  Suc 0"
    and "numeral a'  (numeral a::64 word)"
  shows "mem_read (σ with ((numeral a,Suc 0 :=m v)#updates)) (numeral a') (numeral si') = unat (numeral a' - (numeral a::64 word)) * 8,(unat (numeral a' - (numeral a::64 word)) + (numeral si')) * 8v"
proof-
  have 1: "numeral a + (numeral a' - numeral a) = (numeral a'::64 word)"
    using assms(2) by (metis add.commute diff_add_cancel)
  thus ?thesis
    using mem_read_mem_write_enclosed_plus2[of "numeral a' - numeral a" "numeral si'" "Suc 0" σ "numeral a" v updates,OF assms(1)]
    by auto
qed

lemma mem_read_mem_write_enclosed_numeral_1[simp]:
assumes "unat (numeral a' - numeral a::64 word) + (Suc 0)  numeral si"
    and "numeral a'  (numeral a::64 word)"
    and "numeral si < (2 ^ 64::nat)"
  shows "mem_read (σ with ((numeral a,numeral si :=m v)#updates)) (numeral a') (Suc 0) = unat (numeral a' - (numeral a::64 word)) * 8,(unat (numeral a' - (numeral a::64 word)) + (Suc 0)) * 8v"
proof-
  have 1: "numeral a + (numeral a' - numeral a) = (numeral a'::64 word)"
    using assms(2) by (metis add.commute diff_add_cancel)
  thus ?thesis
    using mem_read_mem_write_enclosed_plus2[of "numeral a' - numeral a" "Suc 0" "numeral si" σ "numeral a" v updates,OF assms(1,3)]
    by auto
qed


lemma mem_read_mem_write_enclosed_numeral11[simp]:
assumes "unat (numeral a' - numeral a::64 word) + (Suc 0)  Suc 0"
    and "numeral a'  (numeral a::64 word)"
  shows "mem_read (σ with ((numeral a,Suc 0 :=m v)#updates)) (numeral a') (Suc 0) = unat (numeral a' - (numeral a::64 word)) * 8,(unat (numeral a' - (numeral a::64 word)) + (Suc 0)) * 8v"
proof-
  have 1: "numeral a + (numeral a' - numeral a) = (numeral a'::64 word)"
    using assms(2) by (metis add.commute diff_add_cancel)
  thus ?thesis
    using mem_read_mem_write_enclosed_plus2[of "numeral a' - numeral a" "Suc 0" "Suc 0" σ "numeral a" v updates,OF assms(1)]
    by auto
qed


lemma rip_reg_write[simp]:
  shows "rip (σ with ((r :=r v)#updates)) = rip (σ with updates)"
  by (auto simp add: case_prod_unfold Let_def)

lemma rip_flag_write[simp]:
  shows "rip (σ with ((setFlags v)#updates)) = rip (σ with updates)"
  by (auto)

lemma rip_mem_write[simp]:
  shows "rip (σ with ((a,si :=m v)#updates)) = rip (σ with updates)"
  by (auto)

lemma rip_rip_write[simp]:
 shows "rip (σ with ((setRip a)#updates)) = a"
  by (auto)


lemma with_with:
  shows "(σ with updates) with updates' = σ with (updates' @ updates)"
by (induct updates' arbitrary: σ,auto)

lemma add_state_update_to_list:
  shows "state_update upd (σ with updates) = σ with (upd#updates)"
  by auto

text ‹The updates performed to a state are ordered: memoery, registers, flags, rip.
      This function is basically insertion sort. Moreover, consecutive updates to the same register
      are removed.›

fun insert_state_update
  where
    "insert_state_update (setRip a) (setRip a'#updates) = insert_state_update (setRip a) updates"
  | "insert_state_update (setRip a) (setFlags v#updates) = setFlags v # (insert_state_update (setRip a) updates)"
  | "insert_state_update (setRip a) ((r :=r v)#updates) = (r :=r v) # (insert_state_update (setRip a) updates)"
  | "insert_state_update (setRip a) ((a',si :=m v)#updates) = (a',si :=m v) # (insert_state_update (setRip a) updates)"

  | "insert_state_update (setFlags v) (setFlags v'#updates) = insert_state_update (setFlags v) updates"
  | "insert_state_update (setFlags v) ((r :=r v')#updates) = (r :=r v') # insert_state_update (setFlags v) updates"
  | "insert_state_update (setFlags v) ((a',si :=m v')#updates) = (a',si :=m v') # insert_state_update (setFlags v) updates"

  | "insert_state_update ((r :=r v)) ((r' :=r v')#updates) = (if r = r' then insert_state_update (r :=r v) updates else (r' :=r v')#insert_state_update (r :=r v) updates)"
  | "insert_state_update ((r :=r v)) ((a',si :=m v')#updates) = (a',si :=m v') # insert_state_update (r :=r v) updates"

  | "insert_state_update upd updates = upd # updates"

fun clean
  where
    "clean [] = []"
  | "clean [upd] = [upd]"
  | "clean (upd#upd'#updates) =  insert_state_update upd (clean (upd'#updates))"

lemma insert_state_update:
  shows "σ with (insert_state_update upd updates) = σ with (upd # updates)"
  by (induct updates rule: insert_state_update.induct,auto simp add: fun_upd_twist)

lemma clean_state_updates:
  shows "σ with (clean updates) = σ with updates"
  by (induct updates rule: clean.induct,auto simp add: insert_state_update)



text ‹The set of simplification rules used during symbolic execution.›
lemmas state_simps =
      qword_ptr_def dword_ptr_def word_ptr_def byte_ptr_def reg_size_def
      reg_write_def real_reg_def reg_read_def

      regs_rip_write regs_mem_write regs_reg_write regs_flag_write
      flag_read_reg_write flag_read_mem_write flag_read_rip_write flag_read_flag_write
      mem_read_reg_write mem_read_flag_write mem_read_rip_write
      mem_read_mem_write_alias mem_read_mem_write_separate
      mem_read_mem_write_enclosed_minus mem_read_mem_write_enclosed_plus mem_read_mem_write_enclosed_plus2

      with_with add_state_update_to_list

declare state_with_updates.simps(2)[simp del]
declare state_update.simps[simp del]

end

Theory X86_InstructionSemantics

(*  Title:       X86 instruction semantics and basic block symbolic execution
    Authors:     Freek Verbeek, Abhijith Bharadwaj, Joshua Bockenek, Ian Roessle, Timmy Weerwag, Binoy Ravindran
    Year:        2020
    Maintainer:  Freek Verbeek (freek@vt.edu)
*)

section "Instruction Semantics"

theory X86_InstructionSemantics
  imports State
begin

text ‹A datatype for storing instructions. Note that we add a special kind of meta-instruction, called
      ExternalFunc. A call to an external function can manually be mapped to a manually supplied state 
      transformation function.›
datatype I = 
    Instr string "Operand option" "Operand option" "Operand option" "64 word"
  | ExternalFunc "state  state"

text ‹A datatype for the result of floating point comparisons.›
datatype FP_Order = FP_Unordered | FP_GT | FP_LT | FP_EQ


abbreviation "instr_next i  case i of (Instr _ _ _ _ a')  a'"

locale unknowns =
  fixes unknown_addsd     :: "64 word  64 word  64 word"
    and unknown_subsd     :: "64 word  64 word  64 word"
    and unknown_mulsd     :: "64 word  64 word  64 word"
    and unknown_divsd     :: "64 word  64 word  64 word"
    and unknown_ucomisd   :: "64 word  64 word  FP_Order"
    and unknown_semantics :: "I  state  state"
    and unknown_flags     :: "string  string  bool" 
begin

text ‹
  The semantics below are intended to be overapproximative and incomplete.
  This is achieved using locale ``unknowns''.
  Any place where semantics is \emph{not} modelled, it is mapped to a universally quantified uninterpreted function
  from that locale. We do not make use of @{const undefined}, since that could be used to prove that the semantics
  of two undefined behaviors are equivalent. 
  For example:
  \begin{itemize}
  \item Only a subset of instructions has semantics. In case of an unknown instruction $i$,
        the function @{term semantics} below will result in @{term "unknown_semantics i"}.
  \item Not all flags have been defined. In case a flag is read whose semantics is not defined below,
        the read will resolve to @{term "unknown_flags i f"}. Note that if the semantics of an instruction do
        not set flags, an overapproximative semantics such as below imply that the instruction indeed
        does not modify flags. In order words, if we were uncertain we would assign unknown values to flags.
  \item Not all operations have been defined. For example, floating points operations have no executable
        semantics, but are mapped to uninterpreted functions such as @{term unknown_addsd}.
  \end{itemize}
›



text ‹Moves›

definition semantics_MOV :: "Operand  Operand  state  state"
  where "semantics_MOV op1 op2 σ 
          let src = operand_read σ op2 in
             operand_write op1 src σ"

abbreviation MOV
  where "MOV op1 op2  Instr ''mov'' (Some op1) (Some op2) None"

abbreviation MOVABS
  where "MOVABS op1 op2  Instr ''movabs'' (Some op1) (Some op2) None"

abbreviation MOVAPS
  where "MOVAPS op1 op2  Instr ''movaps'' (Some op1) (Some op2) None"

abbreviation MOVZX
  where "MOVZX op1 op2  Instr ''movzx'' (Some op1) (Some op2) None"

abbreviation MOVDQU
  where "MOVDQU op1 op2  Instr ''movdqu'' (Some op1) (Some op2) None"

definition semantics_MOVD :: "Operand  Operand  state  state"
  where "semantics_MOVD op1 op2 σ 
          let src = ucast(operand_read σ op2)::32word in
             operand_write op1 (ucast src) σ"

abbreviation MOVD
  where "MOVD op1 op2  Instr ''movd'' (Some op1) (Some op2) None"

fun isXMM :: "Operand  bool"
  where "isXMM (Reg r) = (take 3 r = ''xmm'')"
  | "isXMM _ = False"

definition semantics_MOVSD :: "Operand  Operand  state  state"
  where "semantics_MOVSD op1 op2 σ 
    if isXMM op1  isXMM op2 then
      let src = 0,64operand_read σ op2;
          dst = 64,128operand_read σ op1 in
             operand_write op1 (overwrite 0 64 dst src) σ
     else
      let src = 0,64operand_read σ op2 in
         operand_write op1 src σ"

abbreviation MOVSD
  where "MOVSD op1 op2  Instr ''movsd'' (Some op1) (Some op2) None"

abbreviation MOVQ
  where "MOVQ op1 op2  Instr ''movq'' (Some op1) (Some op2) None"


text ‹ lea/push/pop/call/ret/leave ›

definition semantics_LEA :: "Operand  Operand  state  state"
  where "semantics_LEA op1 op2 σ 
          case op2 of Mem si offset base index scale 
            operand_write op1 (ucast (resolve_address σ offset base index scale)) σ"

abbreviation LEA
  where "LEA op1 op2  Instr ''lea'' (Some op1) (Some op2) None"

definition semantics_PUSH :: "Operand  state  state"
  where "semantics_PUSH op1 σ 
          let src = operand_read σ op1;
              si  = operand_size op1;
              rsp = ucast (ucast(reg_read σ ''rsp'') - of_nat si :: 64 word) in
             operand_write (QWORD PTR [''rsp'']1) src (operand_write (Reg ''rsp'') rsp σ)"

abbreviation PUSH
  where "PUSH op1  Instr ''push'' (Some op1) None None"

definition semantics_POP :: "Operand  state  state"
  where "semantics_POP op1 σ 
          let si  = operand_size op1;
              src = operand_read σ (QWORD PTR [''rsp'']1);
              rsp = ucast (ucast(reg_read σ ''rsp'') + of_nat si::64 word) in
             operand_write op1 src (operand_write (Reg ''rsp'') rsp σ)"

abbreviation POP
  where "POP op1  Instr ''pop'' (Some op1) None None"

definition semantics_CALL :: "Operand  state  state"
  where "semantics_CALL op1 σ 
        let src = ucast (operand_read σ op1) in
           (state_update (setRip src) o semantics_PUSH (Reg ''rip'')) σ"

definition semantics_RET :: "state  state"
  where "semantics_RET σ 
          let a   = ucast (operand_read σ (QWORD PTR [''rsp'']1));
              rsp = ucast (reg_read σ ''rsp'') + 8 :: 64 word in
             (state_update (setRip a) o operand_write (Reg ''rsp'') (ucast rsp)) σ"

abbreviation RET
  where "RET  Instr ''ret'' None None None"

definition semantics_LEAVE :: "state  state"
  where "semantics_LEAVE  semantics_POP (Reg ''rbp'') o semantics_MOV (Reg ''rsp'') (Reg ''rbp'')"

abbreviation LEAVE
  where "LEAVE op1  Instr ''pop'' (Some op1) None None"

text ‹Generic operators ›

definition unop :: "('a ::len word  'a::len word)  
                     ('a::len word   string  bool) 
                      Operand  state  state"
  where "unop f g op1 σ 
          let si  = operand_size op1;
              dst = ucast (operand_read σ op1)::'a::len word in
             operand_write op1 (ucast (f dst)) (σ with [setFlags (g dst)])"

definition binop :: "('a::len word  'a ::len word  'a::len word) 
                     ('a::len word  'a::len word   string  bool) 
                      Operand  Operand  state  state"
  where "binop f g op1 op2 σ 
          let dst = ucast (operand_read σ op1)::'a::len word;
              src = ucast (operand_read σ op2)::'a::len word in
             operand_write op1 (ucast (f dst src)) (σ with [setFlags (g dst src)])"

definition unop_no_flags :: "('a ::len word  'a::len word)  Operand  state  state"
  where "unop_no_flags f op1 σ 
          let dst = ucast (operand_read σ op1)::'a::len word in
             operand_write op1 (ucast (f dst)) σ"

definition binop_flags :: "('a::len word  'a::len word   string  bool) 
                      Operand  Operand  state  state"
  where "binop_flags g op1 op2 σ 
          let si  = operand_size op1;
              dst = ucast (operand_read σ op1)::'a::len word;
              src = ucast (operand_read σ op2)::'a::len word in
            σ with [setFlags (g dst src)]"

definition binop_no_flags :: "('a::len word  'a ::len word  'a::len word) 
                      Operand  Operand  state  state"
  where "binop_no_flags f op1 op2 σ 
          let si  = operand_size op1;
              dst = ucast (operand_read σ op1)::'a::len word;
              src = ucast (operand_read σ op2)::'a::len word in
            operand_write op1 (ucast (f dst src)) σ"

definition binop_XMM :: "(64 word  64 word  64 word)  Operand  Operand  state  state"
  where "binop_XMM f op1 op2 σ 
          let dst = ucast (operand_read σ op1)::64word;
              src = ucast (operand_read σ op2)::64word in
            operand_write op1 (ucast (overwrite 0 64 dst (f dst src))) σ"

text ‹Arithmetic›

definition ADD_flags :: "'a::len word  'a::len word  string  bool"
  where "ADD_flags w0 w1 flag  case flag of
    ''zf''  w0 + w1 = 0
  | ''cf''  unat w0 + unat w1  2^(LENGTH('a))
  | ''of''  (w0 <s 0  w1 <s 0)  ¬(w0 <s 0  w0+w1 <s 0)
  | ''sf''  w0 + w1 <s 0
  | f       unknown_flags ''ADD'' f"

definition semantics_ADD :: "Operand  Operand  state  state"
  where "semantics_ADD op1  
           if operand_size op1 = 32 then binop ((+)::256 word  _  _) ADD_flags op1
      else if operand_size op1 = 16 then binop ((+)::128 word  _  _) ADD_flags op1
      else if operand_size op1 = 8  then binop ((+)::64  word  _  _) ADD_flags op1
      else if operand_size op1 = 4  then binop ((+)::32  word  _  _) ADD_flags op1
      else if operand_size op1 = 2  then binop ((+)::16  word  _  _) ADD_flags op1
      else if operand_size op1 = 1  then binop ((+)::8   word  _  _) ADD_flags op1
      else undefined"

abbreviation ADD
  where "ADD op1 op2  Instr ''add'' (Some op1) (Some op2) None"


definition INC_flags :: "256 word  ('a::len word  string  bool)"
  where "INC_flags cf w0 flag  case flag of
    ''zf''  w0 + 1 = 0
  | ''cf''  cf  0
  | ''of''  0 <=s w0  w0+1 <s 0
  | ''sf''  w0 + 1 <s 0
  | f       unknown_flags ''INC'' f"

definition semantics_INC :: "Operand  state  state" 
  where "semantics_INC op1 σ  
    let cf = flag_read σ ''cf'' in
           if operand_size op1 = 32 then unop ((+) (1::256 word)) (INC_flags cf) op1 σ
      else if operand_size op1 = 16 then unop ((+) (1::128 word)) (INC_flags cf) op1 σ
      else if operand_size op1 = 8  then unop ((+) (1::64  word)) (INC_flags cf) op1 σ
      else if operand_size op1 = 4  then unop ((+) (1::32  word)) (INC_flags cf) op1 σ
      else if operand_size op1 = 2  then unop ((+) (1::16  word)) (INC_flags cf) op1 σ
      else if operand_size op1 = 1  then unop ((+) (1::8   word)) (INC_flags cf) op1 σ
      else undefined"

abbreviation INC
  where "INC op1  Instr ''inc'' (Some op1) None None"

definition DEC_flags :: "256 word  ('a::len word  string  bool)"
  where "DEC_flags cf w0 flag  case flag of
    ''zf''  w0 = 1
  | ''cf''  cf  0
  | ''of''  w0 <s 0  0 <=s w0 - 1
  | ''sf''  w0 - 1 <s 0
  | f       unknown_flags ''DEC'' f"

definition semantics_DEC :: "Operand  state  state" 
  where "semantics_DEC op1 σ  
    let cf = flag_read σ ''cf'' in
           if operand_size op1 = 32 then unop (λ w . w - 1::256 word) (DEC_flags cf) op1 σ
      else if operand_size op1 = 16 then unop (λ w . w - 1::128 word) (DEC_flags cf) op1 σ
      else if operand_size op1 = 8  then unop (λ w . w - 1::64  word) (DEC_flags cf) op1 σ
      else if operand_size op1 = 4  then unop (λ w . w - 1::32  word) (DEC_flags cf) op1 σ
      else if operand_size op1 = 2  then unop (λ w . w - 1::16  word) (DEC_flags cf) op1 σ
      else if operand_size op1 = 1  then unop (λ w . w - 1::8   word) (DEC_flags cf) op1 σ
      else undefined"

abbreviation DEC
  where "DEC op1  Instr ''dec'' (Some op1) None None"

definition NEG_flags :: "('a::len word  string  bool)"
  where "NEG_flags w0 flag  case flag of
    ''zf''  w0 = 0
  | ''cf''  w0  0
  | ''sf''  - w0 <s 0
  | ''of''  msb (- w0)  msb w0
  | f       unknown_flags ''NEG'' f"


definition semantics_NEG :: "Operand  state  state" 
  where "semantics_NEG op1 σ  
           if operand_size op1 = 32 then unop (λ w0 . - (w0::256 word)) NEG_flags op1 σ
      else if operand_size op1 = 16 then unop (λ w0 . - (w0::128 word)) NEG_flags op1 σ
      else if operand_size op1 = 8  then unop (λ w0 . - (w0::64  word)) NEG_flags op1 σ
      else if operand_size op1 = 4  then unop (λ w0 . - (w0::32  word)) NEG_flags op1 σ
      else if operand_size op1 = 2  then unop (λ w0 . - (w0::16  word)) NEG_flags op1 σ
      else if operand_size op1 = 1  then unop (λ w0 . - (w0::8   word)) NEG_flags op1 σ
      else undefined"

abbreviation NEG
  where "NEG op1  Instr ''neg'' (Some op1) None None"

definition SUB_flags :: "'a::len word  'a::len word  string   bool"
  where "SUB_flags w0 w1 flag  case flag of
    ''zf''  w0 = w1
  | ''cf''  w0 < w1
  | ''sf''  w0 - w1 <s 0
  | ''of''  (msb w0  msb w1)  (msb (w0 - w1) = msb w1)
  | f       unknown_flags ''SUB'' f"

definition semantics_SUB :: "Operand  Operand  state  state"
  where "semantics_SUB op1  
           if operand_size op1 = 32 then binop ((-)::256 word  _  _) SUB_flags op1
      else if operand_size op1 = 16 then binop ((-)::128 word  _  _) SUB_flags op1
      else if operand_size op1 = 8  then binop ((-)::64  word  _  _) SUB_flags op1
      else if operand_size op1 = 4  then binop ((-)::32  word  _  _) SUB_flags op1
      else if operand_size op1 = 2  then binop ((-)::16  word  _  _) SUB_flags op1
      else if operand_size op1 = 1  then binop ((-)::8   word  _  _) SUB_flags op1
      else undefined"

abbreviation SUB
  where "SUB op1 op2  Instr ''sub'' (Some op1) (Some op2) None"

definition sbb :: "'b::len word  'a::len word  'a word  'a word"
  where "sbb cf dst src  dst - (src + ucast cf)"

definition SBB_flags :: "'b::len word  'a::len word  'a::len word  string   bool"
  where "SBB_flags cf dst src flag  case flag of
    ''zf''  sbb cf dst src = 0
  | ''cf''  dst < src + ucast cf
  | ''sf''  sbb cf dst src <s 0
  | ''of''  (msb dst  msb (src + ucast cf))  (msb (sbb cf dst src) = msb (src + ucast cf))
  | f       unknown_flags ''SBB'' f"

definition semantics_SBB :: "Operand  Operand  state  state"
  where "semantics_SBB op1 op2 σ  
    let cf = flag_read σ ''cf'' in
           if operand_size op1 = 32 then binop (sbb cf::256 word  _  _) (SBB_flags cf) op1 op2 σ
      else if operand_size op1 = 16 then binop (sbb cf::128 word  _  _) (SBB_flags cf) op1 op2 σ
      else if operand_size op1 = 8  then binop (sbb cf::64  word  _  _) (SBB_flags cf) op1 op2 σ
      else if operand_size op1 = 4  then binop (sbb cf::32  word  _  _) (SBB_flags cf) op1 op2 σ
      else if operand_size op1 = 2  then binop (sbb cf::16  word  _  _) (SBB_flags cf) op1 op2 σ
      else if operand_size op1 = 1  then binop (sbb cf::8   word  _  _) (SBB_flags cf) op1 op2 σ
      else undefined"

abbreviation SBB
  where "SBB op1 op2  Instr ''sbb'' (Some op1) (Some op2) None"

definition adc :: "'b::len word  'a::len word  'a word  'a word"
  where "adc cf dst src  dst + (src + ucast cf)"

definition ADC_flags :: "'b::len word  'a::len word  'a::len word  string   bool"
  where "ADC_flags cf dst src flag  case flag of
    ''zf''  adc cf dst src = 0
  | ''cf''  unat dst + unat src + unat cf  2^(LENGTH('a))
  | ''of''  (dst <s 0  src + ucast cf <s 0)  ¬(dst <s 0  adc cf dst src <s 0)
  | ''sf''  adc cf dst src <s 0
  | f       unknown_flags ''ADC'' f"


definition semantics_ADC :: "Operand  Operand  state  state"
  where "semantics_ADC op1 op2 σ  
    let cf = flag_read σ ''cf'' in
           if operand_size op1 = 32 then binop (adc cf::256 word  _  _) (ADC_flags cf) op1 op2 σ
      else if operand_size op1 = 16 then binop (adc cf::128 word  _  _) (ADC_flags cf) op1 op2 σ
      else if operand_size op1 = 8  then binop (adc cf::64  word  _  _) (ADC_flags cf) op1 op2 σ
      else if operand_size op1 = 4  then binop (adc cf::32  word  _  _) (ADC_flags cf) op1 op2 σ
      else if operand_size op1 = 2  then binop (adc cf::16  word  _  _) (ADC_flags cf) op1 op2 σ
      else if operand_size op1 = 1  then binop (adc cf::8   word  _  _) (ADC_flags cf) op1 op2 σ
      else undefined"

abbreviation ADC
  where "ADC op1 op2  Instr ''adc'' (Some op1) (Some op2) None"


definition write_MUL_result :: "string  string  'a::len word  _  state  state"
  where "write_MUL_result rh rl result flgs σ  
        let si = LENGTH('a) div 2 in
          operand_write (Reg rh) (ucast (si,2*siresult))
            (operand_write (Reg rl) (ucast (0,siresult))
              (σ with [setFlags flgs]))"

definition MUL_flags :: "'a::len word  string  bool"
  where "MUL_flags result flag  case flag of
        ''cf''  (LENGTH('a) div 2,LENGTH('a)result)  0
      | ''of''  (LENGTH('a) div 2,LENGTH('a)result)  0
      | f       unknown_flags ''MUL'' f"


definition IMUL_flags :: "'a::len word  string  bool"
  where "IMUL_flags result flag  case flag of
        ''cf''  (LENGTH('a) div 2,LENGTH('a)result)  (if result !! (LENGTH('a) div 2 - 1) then 2^(LENGTH('a) div 2)-1 else 0)
      | ''of''  (LENGTH('a) div 2,LENGTH('a)result)  (if result !! (LENGTH('a) div 2 - 1) then 2^(LENGTH('a) div 2)-1 else 0)
      | f       unknown_flags ''IMUL'' f"


(*
  Assumes LENGTH('a) is twice the size of the operands, e.g.:
    unop_MUL TYPE(16) True ''al'' (Reg ''r15b'') σ
*)
definition unop_MUL :: "'a::len itself  bool  string  Operand  state  state"
  where "unop_MUL _ signd op1_reg op2 σ 
          let cast = (if signd then scast else ucast);
              dst  = cast (operand_read σ (Reg op1_reg))::'a::len word;
              src  = cast (operand_read σ op2)::'a::len word;
              prod = dst * src;
              flgs = (if signd then IMUL_flags else MUL_flags) prod in
            if LENGTH('a) = 16 then
              write_MUL_result ''ah'' op1_reg prod flgs σ
            else if LENGTH('a) = 32 then
              write_MUL_result ''dx'' op1_reg prod flgs σ
            else if LENGTH('a) = 64 then
              write_MUL_result ''edx'' op1_reg  prod flgs σ
            else if LENGTH('a) = 128 then
              write_MUL_result ''rdx'' op1_reg prod flgs σ
            else
              undefined"

definition semantics_MUL :: "Operand  state  state"
  where "semantics_MUL op2  
           if operand_size op2 = 8 then unop_MUL TYPE(128) False ''rax'' op2
      else if operand_size op2 = 4 then unop_MUL TYPE(64)  False ''eax'' op2
      else if operand_size op2 = 2 then unop_MUL TYPE(32)  False ''ax''  op2
      else if operand_size op2 = 1 then unop_MUL TYPE(16)  False ''al''  op2
      else undefined"

abbreviation MUL
  where "MUL op1  Instr ''mul'' (Some op1) None None"

definition semantics_IMUL1 :: "Operand  state  state"
  where "semantics_IMUL1 op2  
           if operand_size op2 = 8 then unop_MUL TYPE(128) True ''rax'' op2
      else if operand_size op2 = 4 then unop_MUL TYPE(64)  True ''eax'' op2
      else if operand_size op2 = 2 then unop_MUL TYPE(32)  True ''ax''  op2
      else if operand_size op2 = 1 then unop_MUL TYPE(16)  True ''al''  op2
      else undefined"

abbreviation IMUL1
  where "IMUL1 op1  Instr ''imul'' (Some op1) None None"

definition ternop_IMUL :: "'a::len itself  Operand  Operand  Operand  state  state"
  where "ternop_IMUL _ op1 op2 op3 σ 
          let src1 = scast (operand_read σ op2)::'a::len word;
              src2 = scast (operand_read σ op3)::'a::len word;
              prod = src1 * src2;
              flgs = IMUL_flags prod in
            (operand_write op1 (ucast (0,LENGTH('a) div 2prod))
                (σ with [setFlags flgs]))"

definition semantics_IMUL2 :: "Operand  Operand  state  state"
  where "semantics_IMUL2 op1 op2  
           if operand_size op1 = 8 then ternop_IMUL TYPE(128) op1 op1 op2
      else if operand_size op1 = 4 then ternop_IMUL TYPE(64)  op1 op1 op2
      else if operand_size op1 = 2 then ternop_IMUL TYPE(32)  op1 op1 op2
      else if operand_size op1 = 1 then ternop_IMUL TYPE(16)  op1 op1 op2
      else undefined"

abbreviation IMUL2
  where "IMUL2 op1 op2  Instr ''imul'' (Some op1) (Some op2) None"

definition semantics_IMUL3 :: "Operand  Operand  Operand  state  state"
  where "semantics_IMUL3 op1 op2 op3  
           if operand_size op1 = 8 then ternop_IMUL TYPE(128) op1 op2 op3
      else if operand_size op1 = 4 then ternop_IMUL TYPE(64)  op1 op2 op3
      else if operand_size op1 = 2 then ternop_IMUL TYPE(32)  op1 op2 op3
      else if operand_size op1 = 1 then ternop_IMUL TYPE(16)  op1 op2 op3
      else undefined"

abbreviation IMUL3
  where "IMUL3 op1 op2 op3  Instr ''imul'' (Some op1) (Some op2) (Some op3)"

definition SHL_flags :: "nat  ('a::len word  string  bool)"
  where "SHL_flags n dst flag  case flag of
        ''cf''  dst !! (LENGTH('a) - n) 
      | ''of''  dst !! (LENGTH('a) - n - 1)  dst !! (LENGTH('a) - n)
      | ''zf''  (dst << n) = 0
      | ''sf''  dst !! (LENGTH('a) - n - 1)
      | f       unknown_flags ''SHL'' f"

definition semantics_SHL :: "Operand  Operand  state  state" 
  where "semantics_SHL op1 op2 σ  
    let src = unat (operand_read σ op2) in
           if operand_size op1 = 32 then unop (λ w . w << src::256 word) (SHL_flags src) op1 σ
      else if operand_size op1 = 16 then unop (λ w . w << src::128 word) (SHL_flags src) op1 σ
      else if operand_size op1 = 8  then unop (λ w . w << src::64  word) (SHL_flags src) op1 σ
      else if operand_size op1 = 4  then unop (λ w . w << src::32  word) (SHL_flags src) op1 σ
      else if operand_size op1 = 2  then unop (λ w . w << src::16  word) (SHL_flags src) op1 σ
      else if operand_size op1 = 1  then unop (λ w . w << src::8   word) (SHL_flags src) op1 σ
      else undefined"

abbreviation SHL
  where "SHL op1 op2  Instr ''shl'' (Some op1) (Some op2) None"

abbreviation SAL
  where "SAL op1 op2  Instr ''sal'' (Some op1) (Some op2) None"

definition SHR_flags :: "nat  ('a::len word  string  bool)"
  where "SHR_flags n dst flag  case flag of
        ''cf''  dst !! (n - 1) 
      | ''of''  msb dst
      | ''zf''  (dst >> n) = 0
      | f       unknown_flags ''SHR'' f"

definition semantics_SHR :: "Operand  Operand  state  state" 
  where "semantics_SHR op1 op2 σ  
    let src = unat (operand_read σ op2) in
           if operand_size op1 = 32 then unop (λ w . w >> src::256 word) (SHR_flags src) op1 σ
      else if operand_size op1 = 16 then unop (λ w . w >> src::128 word) (SHR_flags src) op1 σ
      else if operand_size op1 = 8  then unop (λ w . w >> src::64  word) (SHR_flags src) op1 σ
      else if operand_size op1 = 4  then unop (λ w . w >> src::32  word) (SHR_flags src) op1 σ
      else if operand_size op1 = 2  then unop (λ w . w >> src::16  word) (SHR_flags src) op1 σ
      else if operand_size op1 = 1  then unop (λ w . w >> src::8   word) (SHR_flags src) op1 σ
      else undefined"

abbreviation SHR
  where "SHR op1 op2  Instr ''shr'' (Some op1) (Some op2) None"

definition SAR_flags :: "nat  ('a::len word  string  bool)"
  where "SAR_flags n dst flag  case flag of
        ''cf''  dst !! (n - 1) 
      | ''of''  False
      | ''zf''  (dst >>> n) = 0
      | f       unknown_flags ''SAR'' f"


definition semantics_SAR :: "Operand  Operand  state  state" 
  where "semantics_SAR op1 op2 σ  
    let src = unat (operand_read σ op2) in
           if operand_size op1 = 32 then unop (λ w . w >>> src::256 word) (SAR_flags src) op1 σ
      else if operand_size op1 = 16 then unop (λ w . w >>> src::128 word) (SAR_flags src) op1 σ
      else if operand_size op1 = 8  then unop (λ w . w >>> src::64  word) (SAR_flags src) op1 σ
      else if operand_size op1 = 4  then unop (λ w . w >>> src::32  word) (SAR_flags src) op1 σ
      else if operand_size op1 = 2  then unop (λ w . w >>> src::16  word) (SAR_flags src) op1 σ
      else if operand_size op1 = 1  then unop (λ w . w >>> src::8   word) (SAR_flags src) op1 σ
      else undefined"

abbreviation SAR
  where "SAR op1 op2  Instr ''sar'' (Some op1) (Some op2) None"


definition shld :: "'b::len itself  nat  'a::len word  'a word  'a word"
  where "shld _ n dst src 
    let dstsrc  = (ucast dst << LENGTH('a)) OR (ucast src :: 'b word);
        shifted = LENGTH('a),LENGTH('a)*2(dstsrc << n) in
      ucast shifted"

definition SHLD_flags :: "'b::len itself  nat  ('a::len word  'a::len word  string  bool)"
  where "SHLD_flags b n src dst flag  case flag of
        ''cf''  dst !! (LENGTH('a) - n) 
      | ''of''  dst !! (LENGTH('a) - n - 1)  dst !! (LENGTH('a) - n)
      | ''zf''  shld b n dst src = 0
      | ''sf''  dst !! (LENGTH('a) - n - 1) ― ‹msb (shld n dst src)›
      | f       unknown_flags ''SHLD'' f"

definition semantics_SHLD :: "Operand  Operand  Operand  state  state"
  where "semantics_SHLD op1 op2 op3 σ  
    let src2 = unat (operand_read σ op3) in
           if operand_size op1 = 32 then binop (shld (TYPE(512)) src2 ::256 word  _  _) (SHLD_flags (TYPE(512)) src2) op1 op2 σ
      else if operand_size op1 = 16 then binop (shld (TYPE(256)) src2 ::128 word  _  _) (SHLD_flags (TYPE(256)) src2) op1 op2 σ
      else if operand_size op1 = 8  then binop (shld (TYPE(128)) src2 ::64  word  _  _) (SHLD_flags (TYPE(128)) src2) op1 op2 σ
      else if operand_size op1 = 4  then binop (shld (TYPE(64))  src2 ::32  word  _  _) (SHLD_flags (TYPE(64))  src2) op1 op2 σ
      else if operand_size op1 = 2  then binop (shld (TYPE(32))  src2 ::16  word  _  _) (SHLD_flags (TYPE(32))  src2) op1 op2 σ
      else if operand_size op1 = 1  then binop (shld (TYPE(16))  src2 ::8   word  _  _) (SHLD_flags (TYPE(16))  src2) op1 op2 σ
      else undefined"



definition ROL_flags :: "nat  ('a::len word  string  bool)" (*TODO check for unaffected flags *)
  where "ROL_flags n dst flag  case flag of
        ''cf''  dst !! (LENGTH('a) - n) 
      | ''of''  dst !! (LENGTH('a) - n - 1)  dst !! (LENGTH('a) - n)
      | f       unknown_flags ''ROL'' f"

definition semantics_ROL :: "Operand  Operand  state  state" 
  where "semantics_ROL op1 op2 σ  
    let src = unat (operand_read σ op2) in
           if operand_size op1 = 32 then unop (word_rotl src::256 word_) (ROL_flags src) op1 σ
      else if operand_size op1 = 16 then unop (word_rotl src::128 word_) (ROL_flags src) op1 σ
      else if operand_size op1 = 8  then unop (word_rotl src::64  word_) (ROL_flags src) op1 σ
      else if operand_size op1 = 4  then unop (word_rotl src::32  word_) (ROL_flags src) op1 σ
      else if operand_size op1 = 2  then unop (word_rotl src::16  word_) (ROL_flags src) op1 σ
      else if operand_size op1 = 1  then unop (word_rotl src::8   word_) (ROL_flags src) op1 σ
      else undefined"

abbreviation ROL
  where "ROL op1 op2  Instr ''rol'' (Some op1) (Some op2) None"

definition ROR_flags :: "nat  ('a::len word  string  bool)"
  where "ROR_flags n dst flag  case flag of
        ''cf''  dst !! (n - 1) 
      | ''of''  msb (word_rotr n dst)  (word_rotr n dst !! (LENGTH('a)-2))
      | f       unknown_flags ''ROR'' f"

definition semantics_ROR :: "Operand  Operand  state  state" 
  where "semantics_ROR op1 op2 σ  
    let src = unat (operand_read σ op2) in
           if operand_size op1 = 32 then unop (word_rotr src::256 word_) (ROR_flags src) op1 σ
      else if operand_size op1 = 16 then unop (word_rotr src::128 word_) (ROR_flags src) op1 σ
      else if operand_size op1 = 8  then unop (word_rotr src::64  word_) (ROR_flags src) op1 σ
      else if operand_size op1 = 4  then unop (word_rotr src::32  word_) (ROR_flags src) op1 σ
      else if operand_size op1 = 2  then unop (word_rotr src::16  word_) (ROR_flags src) op1 σ
      else if operand_size op1 = 1  then unop (word_rotr src::8   word_) (ROR_flags src) op1 σ
      else undefined"

abbreviation ROR
  where "ROR op1 op2  Instr ''ror'' (Some op1) (Some op2) None"

text ‹ flag-related ›

definition semantics_CMP :: "Operand  Operand  state  state"
  where "semantics_CMP op1  
           if operand_size op1 = 32 then binop_flags (SUB_flags::256 word  _  _  _) op1
      else if operand_size op1 = 16 then binop_flags (SUB_flags::128 word  _  _  _) op1
      else if operand_size op1 = 8  then binop_flags (SUB_flags::64  word  _  _  _) op1
      else if operand_size op1 = 4  then binop_flags (SUB_flags::32  word  _  _  _) op1
      else if operand_size op1 = 2  then binop_flags (SUB_flags::16  word  _  _  _) op1
      else if operand_size op1 = 1  then binop_flags (SUB_flags::8   word  _  _  _) op1
      else undefined"

abbreviation CMP
  where "CMP op1 op2  Instr ''cmp'' (Some op1) (Some op2) None"

definition logic_flags :: "('a::len word  'a::len word  'a::len word)  'a::len word  'a::len word  string   bool"
  where "logic_flags logic_op w0 w1 flag  case flag of
    ''zf''  logic_op w0 w1 = 0
  | ''cf''  False
  | ''of''  False
  | ''sf''  msb (logic_op w0 w1)
  | f       unknown_flags ''logic'' f"

definition semantics_TEST :: "Operand  Operand  state  state"
  where "semantics_TEST op1  
           if operand_size op1 = 32 then binop_flags (logic_flags ((AND)::256 word  _  _)) op1
      else if operand_size op1 = 16 then binop_flags (logic_flags ((AND)::128 word  _  _)) op1
      else if operand_size op1 = 8  then binop_flags (logic_flags ((AND)::64  word  _  _)) op1
      else if operand_size op1 = 4  then binop_flags (logic_flags ((AND)::32  word  _  _)) op1
      else if operand_size op1 = 2  then binop_flags (logic_flags ((AND)::16  word  _  _)) op1
      else if operand_size op1 = 1  then binop_flags (logic_flags ((AND)::8   word  _  _)) op1
      else undefined"

abbreviation TEST
  where "TEST op1 op2  Instr ''test'' (Some op1) (Some op2) None"


text ‹ sign extension ›
definition mov_sign_extension :: "('a::len) itself  ('b::len) itself  Operand  Operand  state  state"
  where "mov_sign_extension _ _ op1 op2 σ 
          let src = ucast (operand_read σ op2)::'b word in
             operand_write op1 (ucast (scast src::'a word)) σ"

definition semantics_MOVSXD :: "Operand  Operand  state  state"
  where "semantics_MOVSXD op1 op2 
      if (operand_size op1, operand_size op2) = (8,4) then
         mov_sign_extension (TYPE(64)) (TYPE(32)) op1 op2
      else if (operand_size op1, operand_size op2) = (8,2) then
         mov_sign_extension (TYPE(64)) (TYPE(16)) op1 op2
      else if (operand_size op1, operand_size op2) = (8,1) then
         mov_sign_extension (TYPE(64)) (TYPE(8)) op1 op2
      else if (operand_size op1, operand_size op2) = (4,2) then
         mov_sign_extension (TYPE(32)) (TYPE(16)) op1 op2
      else if (operand_size op1, operand_size op2) = (4,1) then
         mov_sign_extension (TYPE(32)) (TYPE(8)) op1 op2
      else if (operand_size op1, operand_size op2) = (2,1) then
         mov_sign_extension (TYPE(16)) (TYPE(8)) op1 op2
      else
        undefined"

abbreviation MOVSXD
  where "MOVSXD op1 op2  Instr ''movsxd'' (Some op1) (Some op2) None"

abbreviation MOVSX
  where "MOVSX op1 op2  Instr ''movsx'' (Some op1) (Some op2) None"

definition semantics_CDQE :: "state  state"
  where "semantics_CDQE  semantics_MOVSXD (Reg ''rax'') (Reg ''eax'')"

abbreviation CDQE
  where "CDQE  Instr ''cdqe'' None None None"

definition semantics_CDQ :: "state  state"
  where "semantics_CDQ σ 
          let src = ucast (operand_read σ (Reg ''eax'')) :: 32 word in
             operand_write (Reg ''edx'') (ucast (32,64(scast src::64 word))) σ"

abbreviation CDQ
  where "CDQ  Instr ''cdq'' None None None"

definition semantics_CQO :: "state  state"
  where "semantics_CQO σ 
          let src = ucast (operand_read σ (Reg ''rax'')) :: 64 word in
             operand_write (Reg ''rdx'') (ucast (64,128(scast src::128 word))) σ"

abbreviation CQO
  where "CQO  Instr ''cqo'' None None None"


text ‹logic›
definition semantics_AND :: "Operand  Operand  state  state"
  where "semantics_AND op1 op2 σ  
           if operand_size op1 = 32 then binop ((AND)::256 word  _  _) (logic_flags ((AND)::256 word  _  _)) op1 op2 σ
      else if operand_size op1 = 16 then binop ((AND)::128 word  _  _) (logic_flags ((AND)::128 word  _  _)) op1 op2 σ
      else if operand_size op1 = 8  then binop ((AND)::64  word  _  _) (logic_flags ((AND)::64  word  _  _)) op1 op2 σ
      else if operand_size op1 = 4  then binop ((AND)::32  word  _  _) (logic_flags ((AND)::32  word  _  _)) op1 op2 σ
      else if operand_size op1 = 2  then binop ((AND)::16  word  _  _) (logic_flags ((AND)::16  word  _  _)) op1 op2 σ
      else if operand_size op1 = 1  then binop ((AND)::8   word  _  _) (logic_flags ((AND)::8   word  _  _)) op1 op2 σ
      else undefined"

abbreviation AND'
  where "AND' op1 op2  Instr ''and'' (Some op1) (Some op2) None"

definition semantics_OR :: "Operand  Operand  state  state"
  where "semantics_OR op1 op2 σ  
           if operand_size op1 = 32 then binop ((OR)::256 word  _  _) (logic_flags ((OR)::256 word  _  _)) op1 op2 σ
      else if operand_size op1 = 16 then binop ((OR)::128 word  _  _) (logic_flags ((OR)::128 word  _  _)) op1 op2 σ
      else if operand_size op1 = 8  then binop ((OR)::64  word  _  _) (logic_flags ((OR)::64  word  _  _)) op1 op2 σ
      else if operand_size op1 = 4  then binop ((OR)::32  word  _  _) (logic_flags ((OR)::32  word  _  _)) op1 op2 σ
      else if operand_size op1 = 2  then binop ((OR)::16  word  _  _) (logic_flags ((OR)::16  word  _  _)) op1 op2 σ
      else if operand_size op1 = 1  then binop ((OR)::8   word  _  _) (logic_flags ((OR)::8   word  _  _)) op1 op2 σ
      else undefined"

abbreviation OR'
  where "OR' op1 op2  Instr ''or'' (Some op1) (Some op2) None"

definition semantics_XOR :: "Operand  Operand  state  state"
  where "semantics_XOR op1 op2 σ  
           if operand_size op1 = 32 then binop ((XOR)::256 word  _  _) (logic_flags ((XOR)::256 word  _  _)) op1 op2 σ
      else if operand_size op1 = 16 then binop ((XOR)::128 word  _  _) (logic_flags ((XOR)::128 word  _  _)) op1 op2 σ
      else if operand_size op1 = 8  then binop ((XOR)::64  word  _  _) (logic_flags ((XOR)::64  word  _  _)) op1 op2 σ
      else if operand_size op1 = 4  then binop ((XOR)::32  word  _  _) (logic_flags ((XOR)::32  word  _  _)) op1 op2 σ
      else if operand_size op1 = 2  then binop ((XOR)::16  word  _  _) (logic_flags ((XOR)::16  word  _  _)) op1 op2 σ
      else if operand_size op1 = 1  then binop ((XOR)::8   word  _  _) (logic_flags ((XOR)::8   word  _  _)) op1 op2 σ
      else undefined"

abbreviation XOR'
  where "XOR' op1 op2  Instr ''xor'' (Some op1) (Some op2) None"

definition semantics_XORPS :: "Operand  Operand  state  state"
  where "semantics_XORPS op1  
           if operand_size op1 = 32 then binop_no_flags ((XOR)::256 word  _  _) op1
      else if operand_size op1 = 16 then binop_no_flags ((XOR)::128 word  _  _) op1
      else if operand_size op1 = 8  then binop_no_flags ((XOR)::64  word  _  _) op1
      else if operand_size op1 = 4  then binop_no_flags ((XOR)::32  word  _  _) op1
      else if operand_size op1 = 2  then binop_no_flags ((XOR)::16  word  _  _) op1
      else if operand_size op1 = 1  then binop_no_flags ((XOR)::8   word  _  _) op1
      else undefined"

abbreviation XORPS
  where "XORPS op1 op2  Instr ''xorps'' (Some op1) (Some op2) None"

definition semantics_NOT :: "Operand  state  state" 
  where "semantics_NOT op1 σ  
           if operand_size op1 = 32 then unop_no_flags (not::256 word_) op1 σ
      else if operand_size op1 = 16 then unop_no_flags (not::128 word_) op1 σ
      else if operand_size op1 = 8  then unop_no_flags (not::64  word_) op1 σ
      else if operand_size op1 = 4  then unop_no_flags (not::32  word_) op1 σ
      else if operand_size op1 = 2  then unop_no_flags (not::16  word_) op1 σ
      else if operand_size op1 = 1  then unop_no_flags (not::8   word_) op1 σ
      else undefined"

abbreviation NOT'
  where "NOT' op1  Instr ''not'' (Some op1) None None"

text ‹ jumps ›
datatype FlagExpr = Flag string | FE_NOT FlagExpr | FE_AND FlagExpr FlagExpr | FE_OR FlagExpr FlagExpr | FE_EQ FlagExpr FlagExpr

primrec readFlagExpr :: "FlagExpr  state  bool"
  where
    "readFlagExpr (Flag f) σ = (flag_read σ f = 1)"
  | "readFlagExpr (FE_NOT fe) σ = (¬readFlagExpr fe σ)"
  | "readFlagExpr (FE_AND fe0 fe1) σ = (readFlagExpr fe0 σ  readFlagExpr fe1 σ)"
  | "readFlagExpr (FE_OR fe0 fe1) σ = (readFlagExpr fe0 σ  readFlagExpr fe1 σ)"
  | "readFlagExpr (FE_EQ fe0 fe1) σ = (readFlagExpr fe0 σ  readFlagExpr fe1 σ)"

definition semantics_cond_jump :: "FlagExpr  64 word  state  state"
  where "semantics_cond_jump fe a σ 
          let fv = readFlagExpr fe σ in
             if fv then state_update (setRip a) σ else σ"

definition semantics_JMP :: "Operand  state  state"
  where "semantics_JMP op1 σ 
          let a = ucast (operand_read σ op1) in
            state_update (setRip a) σ"

abbreviation JMP
  where "JMP op1  Instr ''jmp'' (Some op1) None None"

definition semantics_JO :: "Operand  state  state"
  where "semantics_JO op1 σ 
          let a = ucast (operand_read σ op1) in
             semantics_cond_jump (Flag ''of'') a σ"

abbreviation JO
  where "JO op1  Instr ''jo'' (Some op1) None None"

definition semantics_JNO :: "Operand  state  state"
  where "semantics_JNO op1 σ 
          let a = ucast (operand_read σ op1) in
             semantics_cond_jump (FE_NOT (Flag ''of'')) a σ"

abbreviation JNO
  where "JNO op1  Instr ''jno'' (Some op1) None None"

definition semantics_JS :: "Operand  state  state"
  where "semantics_JS op1 σ 
          let a = ucast (operand_read σ op1) in
             semantics_cond_jump (Flag ''sf'') a σ"

abbreviation JS
  where "JS op1  Instr ''js'' (Some op1) None None"

definition semantics_JNS :: "Operand  state  state"
  where "semantics_JNS op1 σ 
          let a = ucast (operand_read σ op1) in
             semantics_cond_jump (FE_NOT (Flag ''sf'')) a σ"

abbreviation JNS
  where "JNS op1  Instr ''jns'' (Some op1) None None"

definition semantics_JE :: "Operand  state  state"
  where "semantics_JE op1 σ 
          let a = ucast (operand_read σ op1) in
             semantics_cond_jump (Flag ''zf'') a σ"

abbreviation JE
  where "JE op1  Instr ''je'' (Some op1) None None"

abbreviation JZ
  where "JZ op1  Instr ''jz'' (Some op1) None None"

definition semantics_JNE :: "Operand  state  state"
  where "semantics_JNE op1 σ 
          let a = ucast (operand_read σ op1) in
             semantics_cond_jump (FE_NOT (Flag ''zf'')) a σ"

abbreviation JNE
  where "JNE op1  Instr ''jne'' (Some op1) None None"

abbreviation JNZ
  where "JNZ op1  Instr ''jnz'' (Some op1) None None"

definition semantics_JB :: "Operand  state  state"
  where "semantics_JB op1 σ 
          let a = ucast (operand_read σ op1) in
             semantics_cond_jump (Flag ''cf'') a σ"

abbreviation JB
  where "JB op1  Instr ''jb'' (Some op1) None None"

abbreviation JNAE
  where "JNAE op1  Instr ''jnae'' (Some op1) None None"

abbreviation JC
  where "JC op1  Instr ''jc'' (Some op1) None None"

definition semantics_JNB :: "Operand  state  state"
  where "semantics_JNB op1 σ 
          let a = ucast (operand_read σ op1) in
             semantics_cond_jump (FE_NOT (Flag ''cf'')) a σ"

abbreviation JNB
  where "JNB op1  Instr ''jnb'' (Some op1) None None"

abbreviation JAE
  where "JAE op1  Instr ''jae'' (Some op1) None None"

abbreviation JNC
  where "JNC op1  Instr ''jnc'' (Some op1) None None"

definition semantics_JBE :: "Operand  state  state"
  where "semantics_JBE op1 σ 
          let a = ucast (operand_read σ op1) in
             semantics_cond_jump (FE_OR (Flag ''cf'') (Flag ''zf'')) a σ"

abbreviation JBE
  where "JBE op1  Instr ''jbe'' (Some op1) None None"

abbreviation JNA
  where "JNA op1  Instr ''jna'' (Some op1) None None"

definition semantics_JA :: "Operand  state  state"
  where "semantics_JA op1 σ 
          let a = ucast (operand_read σ op1) in
             semantics_cond_jump (FE_AND (FE_NOT (Flag ''cf'')) (FE_NOT (Flag ''zf''))) a σ"

abbreviation JA
  where "JA op1  Instr ''ja'' (Some op1) None None"

abbreviation JNBE
  where "JNBE op1  Instr ''jnbe'' (Some op1) None None"

definition semantics_JL :: "Operand  state  state"
  where "semantics_JL op1 σ 
          let a = ucast (operand_read σ op1) in
             semantics_cond_jump (FE_NOT (FE_EQ (Flag ''sf'') (Flag ''of''))) a σ"

abbreviation JL
  where "JL op1  Instr ''jl'' (Some op1) None None"

abbreviation JNGE
  where "JNGE op1  Instr ''jnge'' (Some op1) None None"

definition semantics_JGE :: "Operand  state  state"
  where "semantics_JGE op1 σ 
          let a = ucast (operand_read σ op1) in
             semantics_cond_jump (FE_EQ (Flag ''sf'') (Flag ''of'')) a σ"

abbreviation JGE
  where "JGE op1  Instr ''jge'' (Some op1) None None"

abbreviation JNL
  where "JNL op1  Instr ''jnl'' (Some op1) None None"

definition semantics_JLE :: "Operand  state  state"
  where "semantics_JLE op1 σ 
          let a = ucast (operand_read σ op1) in
             semantics_cond_jump (FE_OR (Flag ''zf'') (FE_NOT (FE_EQ (Flag ''sf'') (Flag ''of'')))) a σ"

abbreviation JLE
  where "JLE op1  Instr ''jle'' (Some op1) None None"

abbreviation JNG
  where "JNG op1  Instr ''jng'' (Some op1) None None"

definition semantics_JG :: "Operand  state  state"
  where "semantics_JG op1 σ 
          let a = ucast (operand_read σ op1) in
             semantics_cond_jump (FE_AND (FE_NOT (Flag ''zf'')) (FE_EQ (Flag ''sf'') (Flag ''of''))) a σ"

abbreviation JG
  where "JG op1  Instr ''jg'' (Some op1) None None"

abbreviation JNLE
  where "JNLE op1  Instr ''jnle'' (Some op1) None None"


text ‹ setXX ›
definition semantics_setXX :: "FlagExpr  Operand  state  state"
  where "semantics_setXX fe op1 σ 
          let fv = readFlagExpr fe σ in
             operand_write op1 (fromBool fv) σ"

abbreviation semantics_SETO :: "Operand  state  state"
  where "semantics_SETO  semantics_setXX (Flag ''of'')"

abbreviation SETO
  where "SETO op1  Instr ''seto'' (Some op1) None None"

abbreviation semantics_SETNO :: "Operand  state  state"
  where "semantics_SETNO  semantics_setXX (FE_NOT (Flag ''of''))"

abbreviation SETNO
  where "SETNO op1  Instr ''setno'' (Some op1) None None"

abbreviation semantics_SETS :: "Operand  state  state"
  where "semantics_SETS  semantics_setXX (Flag ''sf'')"

abbreviation SETS
  where "SETS op1  Instr ''sets'' (Some op1) None None"

abbreviation semantics_SETNS :: "Operand  state  state"
  where "semantics_SETNS  semantics_setXX (FE_NOT (Flag ''sf''))"

abbreviation SETNS
  where "SETNS op1  Instr ''setns'' (Some op1) None None"

abbreviation semantics_SETE :: "Operand  state  state"
  where "semantics_SETE  semantics_setXX (Flag ''zf'')"

abbreviation SETE
  where "SETE op1  Instr ''sete'' (Some op1) None None"

abbreviation SETZ
  where "SETZ op1  Instr ''setz'' (Some op1) None None"

abbreviation semantics_SETNE :: "Operand  state  state"
  where "semantics_SETNE  semantics_setXX (FE_NOT (Flag ''zf''))"

abbreviation SETNE
  where "SETNE op1  Instr ''setne'' (Some op1) None None"

abbreviation SETNZ
  where "SETNZ op1  Instr ''setnz'' (Some op1) None None"

abbreviation semantics_SETB :: "Operand  state  state"
  where "semantics_SETB  semantics_setXX (Flag ''cf'')"

abbreviation SETB
  where "SETB op1  Instr ''setb'' (Some op1) None None"

abbreviation SETNAE
  where "SETNAE op1  Instr ''setnae'' (Some op1) None None"

abbreviation SETC
  where "SETC op1  Instr ''setc'' (Some op1) None None"

abbreviation semantics_SETNB :: "Operand  state  state"
  where "semantics_SETNB  semantics_setXX (FE_NOT (Flag ''cf''))"

abbreviation SETNB
  where "SETNB op1  Instr ''setnb'' (Some op1) None None"

abbreviation SETAE
  where "SETAE op1  Instr ''setae'' (Some op1) None None"

abbreviation SETNC
  where "SETNC op1  Instr ''setnc'' (Some op1) None None"

abbreviation semantics_SETBE :: "Operand  state  state"
  where "semantics_SETBE  semantics_setXX (FE_OR (Flag ''cf'') (Flag ''zf''))"

abbreviation SETBE
  where "SETBE op1  Instr ''setbe'' (Some op1) None None"

abbreviation SETNA
  where "SETNA op1  Instr ''setna'' (Some op1) None None"

abbreviation semantics_SETA :: "Operand  state  state"
  where "semantics_SETA  semantics_setXX (FE_AND (FE_NOT (Flag ''cf'')) (FE_NOT (Flag ''zf'')))"

abbreviation SETA
  where "SETA op1  Instr ''seta'' (Some op1) None None"

abbreviation SETNBE
  where "SETNBE op1  Instr ''setnbe'' (Some op1) None None"

abbreviation semantics_SETL :: "Operand  state  state"
  where "semantics_SETL  semantics_setXX (FE_NOT (FE_EQ (Flag ''sf'') (Flag ''of'')))"

abbreviation SETL
  where "SETL op1  Instr ''setl'' (Some op1) None None"

abbreviation SETNGE
  where "SETNGE op1  Instr ''setnge'' (Some op1) None None"

abbreviation semantics_SETGE :: "Operand  state  state"
  where "semantics_SETGE  semantics_setXX (FE_EQ (Flag ''sf'') (Flag ''of''))"

abbreviation SETGE
  where "SETGE op1  Instr ''setge'' (Some op1) None None"

abbreviation SETNL
  where "SETNL op1  Instr ''setnl'' (Some op1) None None"

abbreviation semantics_SETLE :: "Operand  state  state"
  where "semantics_SETLE  semantics_setXX (FE_OR (Flag ''zf'') (FE_NOT (FE_EQ (Flag ''sf'') (Flag ''of''))))"

abbreviation SETLE
  where "SETLE op1  Instr ''setle'' (Some op1) None None"

abbreviation SETNG
  where "SETNG op1  Instr ''setng'' (Some op1) None None"

abbreviation semantics_SETG :: "Operand  state  state"
  where "semantics_SETG  semantics_setXX (FE_AND (FE_NOT (Flag ''zf'')) (FE_EQ (Flag ''sf'') (Flag ''of'')))"

abbreviation SETG
  where "SETG op1  Instr ''setg'' (Some op1) None None"

abbreviation SETNLE
  where "SETNLE op1  Instr ''setnle'' (Some op1) None None"


text ‹ conditional moves ›

primrec cmov
  where 
    "cmov True dst src = src"
  | "cmov False dst src = dst"

definition semantics_CMOV :: "FlagExpr  Operand  Operand  state  state"
  where "semantics_CMOV fe op1 op2 σ 
          let fv = readFlagExpr fe σ;
              dst = operand_read σ op1;
              src = operand_read σ op2 in
            operand_write op1 (cmov fv dst src) σ"

abbreviation "semantics_CMOVNE  semantics_CMOV (FE_NOT (Flag ''zf''))"

abbreviation CMOVNE
  where "CMOVNE op1 op2  Instr ''movne'' (Some op1) (Some op2) None"

abbreviation "semantics_CMOVNS  semantics_CMOV (FE_NOT (Flag ''sf''))"

abbreviation CMOVNS
  where "CMOVNS op1 op2  Instr ''movns'' (Some op1) (Some op2) None"


text ‹Floating Point›
definition semantics_ADDSD :: "Operand  Operand  state  state"
  where "semantics_ADDSD  binop_XMM unknown_addsd"

definition semantics_SUBSD :: "Operand  Operand  state  state"
  where "semantics_SUBSD  binop_XMM unknown_subsd"

definition semantics_MULSD :: "Operand  Operand  state  state"
  where "semantics_MULSD  binop_XMM unknown_mulsd"

definition semantics_DIVSD :: "Operand  Operand  state  state"
  where "semantics_DIVSD  binop_XMM unknown_divsd"

definition UCOMISD_flags :: "64 word  64 word  string  bool"
  where "UCOMISD_flags w0 w1 f  
  if f  {''zf'',''pf'',''cf''} then case unknown_ucomisd w0 w1 of
      FP_Unordered  True
    | FP_GT         False
    | FP_LT         f = ''cf''
    | FP_EQ         f = ''zf''
  else
    unknown_flags ''UCOMISD'' f"

definition semantics_UCOMISD :: "Operand  Operand  state  state"
  where "semantics_UCOMISD  binop_flags UCOMISD_flags"


abbreviation ADDSD
  where "ADDSD op1 op2  Instr ''addsd'' (Some op1) (Some op2) None"

abbreviation SUBSD
  where "SUBSD op1 op2  Instr ''subsd'' (Some op1) (Some op2) None"

abbreviation MULSD
  where "MULSD op1 op2  Instr ''mulsd'' (Some op1) (Some op2) None"

abbreviation DIVSD
  where "DIVSD op1 op2  Instr ''divsd'' (Some op1) (Some op2) None"

abbreviation UCOMISD
  where "UCOMISD op1 op2  Instr ''ucomisd'' (Some op1) (Some op2) None"



(* SIMD *)

definition simd_32_128 :: "(32 word  32 word  32 word)  128 word  128 word  128 word" 
  where "simd_32_128 f dst src  
            ((ucast (0,32(f (ucast (96,128dst)) (ucast (96,128src))))) << 96) OR
            ((ucast (0,32(f (ucast (64,96dst))  (ucast (64,96src))))) << 64)  OR
            ((ucast (0,32(f (ucast (32,64dst))  (ucast (32,64src))))) << 32)  OR
             (ucast (0,32(f (ucast (0,32dst))   (ucast (0,32src)))))"

abbreviation semantics_PADDD :: "Operand  Operand  state  state"
  where "semantics_PADDD  binop_no_flags (simd_32_128 (+))"

abbreviation PADDD
  where "PADDD op1 op2  Instr ''paddd'' (Some op1) (Some op2) None"

definition pshufd :: "128 word  8 word  128 word"
  where "pshufd src n  ((0,32(src >> (unat (6,8n)*32))) << 96) OR
                        ((0,32(src >> (unat (4,6n)*32))) << 64) OR
                        ((0,32(src >> (unat (2,4n)*32))) << 32) OR
                        ((0,32(src >> (unat (0,2n)*32))))"

lemmas pshufd_numeral[simp] = pshufd_def[of "numeral n"] for n
lemmas pshufd_0[simp] = pshufd_def[of 0]
lemmas pshufd_1[simp] = pshufd_def[of 1]

definition semantics_PSHUFD :: "Operand  Operand  Operand  state  state"
  where "semantics_PSHUFD op1 op2 op3 σ  
    let src = ucast (operand_read σ op2);
        n   = ucast (operand_read σ op3) in
      operand_write op1 (ucast (pshufd src n)) σ"

abbreviation PSHUFD
  where "PSHUFD op1 op2 op3  Instr ''pshufd'' op1 op2 op3"

definition semantics_PEXTRD :: "Operand  Operand  Operand  state  state"
  where "semantics_PEXTRD op1 op2 op3 σ 
          let src = operand_read σ op2;
              n   = unat (operand_read σ op3) mod 4 in
             operand_write op1 (ucast ((0,32(src >> n*32)))) σ"

abbreviation PEXTRD
  where "PEXTRD op1 op2 op3  Instr ''pextrd'' op1 op2 op3"

definition semantics_PINSRD :: "Operand  Operand  Operand  state  state"
  where "semantics_PINSRD op1 op2 op3 σ 
          let dst = ucast (operand_read σ op1)::128 word;
              src = ucast (operand_read σ op2)::128 word;
              n   = unat (operand_read σ op3) mod 4;
              m   = 0xFFFFFFFF << (n * 32) :: 128 word;
              t   = (src << (n *32)) AND m in
             operand_write op1 (ucast ((dst AND NOT m) OR t)) σ"

abbreviation PINSRD
  where "PINSRD op1 op2 op3  Instr ''pinsrd'' op1 op2 op3"


(* remainder *)



definition bswap :: "32 word  32 word"
  where "bswap w  ((0,8w) << 24) OR ((8,16w) << 16) OR ((16,24w) << 8) OR (24,32w)"

lemmas bswap_numeral[simp] = bswap_def[of "numeral n"] for n
lemmas bswap_0[simp] = bswap_def[of 0]
lemmas bswap_1[simp] = bswap_def[of 1]

definition semantics_BSWAP :: "Operand  state  state"
  where "semantics_BSWAP  unop_no_flags bswap"

abbreviation BSWAP
  where "BSWAP op1  Instr ''bswap'' op1 None None"



definition semantics_NOP :: "state  state"
  where "semantics_NOP  id"

abbreviation NOP0
  where "NOP0  Instr ''nop'' None None None"

abbreviation NOP1
  where "NOP1 op1  Instr ''nop'' (Some op1) None None"

abbreviation NOP2
  where "NOP2 op1 op2  Instr ''nop'' (Some op1) (Some op2) None"

abbreviation NOP3
  where "NOP3 op1 op2 op3  Instr ''nop'' (Some op1) (Some op2) (Some op3)"



definition semantics
  where "semantics i 
  case i of
    (Instr ''mov''     (Some op1) (Some op2) _ _)           semantics_MOV op1 op2
  | (Instr ''movabs''  (Some op1) (Some op2) _ _)           semantics_MOV op1 op2
  | (Instr ''movaps''  (Some op1) (Some op2) _ _)           semantics_MOV op1 op2
  | (Instr ''movdqu''  (Some op1) (Some op2) _ _)           semantics_MOV op1 op2
  | (Instr ''movd''    (Some op1) (Some op2) _ _)           semantics_MOVD op1 op2
  | (Instr ''movzx''   (Some op1) (Some op2) _ _)           semantics_MOV op1 op2
  | (Instr ''movsd''   (Some op1) (Some op2) _ _)           semantics_MOVSD op1 op2
  | (Instr ''movq''    (Some op1) (Some op2) _ _)           semantics_MOVSD op1 op2
  | (Instr ''lea''     (Some op1) (Some op2) _ _)           semantics_LEA op1 op2
  | (Instr ''push''    (Some op1) _          _ _)           semantics_PUSH op1
  | (Instr ''pop''     (Some op1) _          _ _)           semantics_POP op1
  | (Instr ''ret''     _ _ _ _)                             semantics_RET
  | (Instr ''call''    (Some op1) _ _ _)                    semantics_CALL op1
  | (Instr ''leave''   _ _ _ _)                             semantics_LEAVE
  ― ‹arithmetic›                                           
  | (Instr ''add''     (Some op1) (Some op2)  _ _)          semantics_ADD op1 op2
  | (Instr ''inc''     (Some op1) _           _ _)          semantics_INC op1
  | (Instr ''dec''     (Some op1) _           _ _)          semantics_DEC op1
  | (Instr ''neg''     (Some op1) _           _ _)          semantics_NEG op1
  | (Instr ''sub''     (Some op1) (Some op2)  _ _)          semantics_SUB op1 op2
  | (Instr ''sbb''     (Some op1) (Some op2)  _ _)          semantics_SBB op1 op2
  | (Instr ''adc''     (Some op1) (Some op2)  _ _)          semantics_ADC op1 op2
  | (Instr ''mul''     (Some op1) _ _ _)                    semantics_MUL op1 
  | (Instr ''imul''    (Some op1) None _ _)                 semantics_IMUL1 op1 
  | (Instr ''imul''    (Some op1) (Some op2) None _)        semantics_IMUL2 op1 op2
  | (Instr ''imul''    (Some op1) (Some op2) (Some op3) _)  semantics_IMUL3 op1 op2 op3
  | (Instr ''shl''     (Some op1) (Some op2) None _)        semantics_SHL op1 op2
  | (Instr ''sal''     (Some op1) (Some op2) None _)        semantics_SHL op1 op2
  | (Instr ''shr''     (Some op1) (Some op2) None _)        semantics_SHR op1 op2
  | (Instr ''sar''     (Some op1) (Some op2) None _)        semantics_SAR op1 op2
  | (Instr ''shld''    (Some op1) (Some op2) (Some op3) _)  semantics_SHLD op1 op2 op3
  | (Instr ''rol''     (Some op1) (Some op2) None _)        semantics_ROL op1 op2
  | (Instr ''ror''     (Some op1) (Some op2) None _)        semantics_ROR op1 op2
  ― ‹flag-related›
  | (Instr ''cmp''     (Some op1) (Some op2)  _ _)          semantics_CMP op1 op2
  | (Instr ''test''    (Some op1) (Some op2)  _ _)          semantics_TEST op1 op2
  ― ‹sign-extension›                                       
  | (Instr ''movsxd''  (Some op1) (Some op2)  _ _)          semantics_MOVSXD op1 op2
  | (Instr ''movsx''   (Some op1) (Some op2)  _ _)          semantics_MOVSXD op1 op2
  | (Instr ''cdqe''    (Some op1) (Some op2)  _ _)          semantics_CDQE
  | (Instr ''cdq''     (Some op1) (Some op2)  _ _)          semantics_CDQ
  | (Instr ''cqo''     (Some op1) (Some op2)  _ _)          semantics_CQO
  ― ‹logic›
  | (Instr ''and''     (Some op1) (Some op2)  _ _)          semantics_AND op1 op2
  | (Instr ''or''      (Some op1) (Some op2)  _ _)          semantics_OR  op1 op2
  | (Instr ''xor''     (Some op1) (Some op2)  _ _)          semantics_XOR op1 op2
  | (Instr ''xorps''   (Some op1) (Some op2)  _ _)          semantics_XORPS op1 op2
  | (Instr ''not''     (Some op1) _  _ _)                   semantics_NOT op1
  ― ‹jumps›
  | (Instr ''jmp''     (Some op1) None  _ _)                semantics_JMP op1
  | (Instr ''jo''      (Some op1) None  _ _)                semantics_JO op1
  | (Instr ''jno''     (Some op1) None  _ _)                semantics_JNO op1
  | (Instr ''js''      (Some op1) None  _ _)                semantics_JS op1
  | (Instr ''jns''     (Some op1) None  _ _)                semantics_JNS op1
  | (Instr ''je''      (Some op1) None  _ _)                semantics_JE op1
  | (Instr ''jz''      (Some op1) None  _ _)                semantics_JE op1
  | (Instr ''jne''     (Some op1) None  _ _)                semantics_JNE op1
  | (Instr ''jnz''     (Some op1) None  _ _)                semantics_JNE op1
  | (Instr ''jb''      (Some op1) None  _ _)                semantics_JB op1
  | (Instr ''jnae''    (Some op1) None  _ _)                semantics_JB op1
  | (Instr ''jc''      (Some op1) None  _ _)                semantics_JB op1
  | (Instr ''jnb''     (Some op1) None  _ _)                semantics_JNB op1
  | (Instr ''jae''     (Some op1) None  _ _)                semantics_JNB op1
  | (Instr ''jnc''     (Some op1) None  _ _)                semantics_JNB op1
  | (Instr ''jbe''     (Some op1) None  _ _)                semantics_JBE op1
  | (Instr ''jna''     (Some op1) None  _ _)                semantics_JBE op1
  | (Instr ''ja''      (Some op1) None  _ _)                semantics_JA op1
  | (Instr ''jnbe''    (Some op1) None  _ _)                semantics_JA op1
  | (Instr ''jl''      (Some op1) None  _ _)                semantics_JL op1
  | (Instr ''jnge''    (Some op1) None  _ _)                semantics_JL op1
  | (Instr ''jge''     (Some op1) None  _ _)                semantics_JGE op1
  | (Instr ''jnl''     (Some op1) None  _ _)                semantics_JGE op1
  | (Instr ''jle''     (Some op1) None  _ _)                semantics_JLE op1
  | (Instr ''jng''     (Some op1) None  _ _)                semantics_JLE op1
  | (Instr ''jg''      (Some op1) None  _ _)                semantics_JG op1
  | (Instr ''jnle''    (Some op1) None  _ _)                semantics_JG op1
  ― ‹setXX›
  | (Instr ''seto''    (Some op1) None  _ _)                semantics_SETO op1
  | (Instr ''setno''   (Some op1) None  _ _)                semantics_SETNO op1
  | (Instr ''sets''    (Some op1) None  _ _)                semantics_SETS op1
  | (Instr ''setns''   (Some op1) None  _ _)                semantics_SETNS op1
  | (Instr ''sete''    (Some op1) None  _ _)                semantics_SETE op1
  | (Instr ''setz''    (Some op1) None  _ _)                semantics_SETE op1
  | (Instr ''setne''   (Some op1) None  _ _)                semantics_SETNE op1
  | (Instr ''setnz''   (Some op1) None  _ _)                semantics_SETNE op1
  | (Instr ''setb''    (Some op1) None  _ _)                semantics_SETB op1
  | (Instr ''setnae''  (Some op1) None  _ _)                semantics_SETB op1
  | (Instr ''setc''    (Some op1) None  _ _)                semantics_SETB op1
  | (Instr ''setnb''   (Some op1) None  _ _)                semantics_SETNB op1
  | (Instr ''setae''   (Some op1) None  _ _)                semantics_SETNB op1
  | (Instr ''setnc''   (Some op1) None  _ _)                semantics_SETNB op1
  | (Instr ''setbe''   (Some op1) None  _ _)                semantics_SETBE op1
  | (Instr ''setna''   (Some op1) None  _ _)                semantics_SETBE op1
  | (Instr ''seta''    (Some op1) None  _ _)                semantics_SETA op1
  | (Instr ''setnbe''  (Some op1) None  _ _)                semantics_SETA op1
  | (Instr ''setl''    (Some op1) None  _ _)                semantics_SETL op1
  | (Instr ''setnge''  (Some op1) None  _ _)                semantics_SETL op1
  | (Instr ''setge''   (Some op1) None  _ _)                semantics_SETGE op1
  | (Instr ''setnl''   (Some op1) None  _ _)                semantics_SETGE op1
  | (Instr ''setle''   (Some op1) None  _ _)                semantics_SETLE op1
  | (Instr ''setng''   (Some op1) None  _ _)                semantics_SETLE op1
  | (Instr ''setg''    (Some op1) None  _ _)                semantics_SETG op1
  | (Instr ''setnle''  (Some op1) None  _ _)                semantics_SETG op1
  ― ‹conditional moves›
  | (Instr ''cmovne''  (Some op1) (Some op2)  _ _)          semantics_CMOVNE op1 op2
  | (Instr ''cmovns''  (Some op1) (Some op2)  _ _)          semantics_CMOVNS op1 op2
  ― ‹floating point (double)›                                           
  | (Instr ''addsd''   (Some op1) (Some op2)  _ _)          semantics_ADDSD op1 op2
  | (Instr ''subsd''   (Some op1) (Some op2)  _ _)          semantics_SUBSD op1 op2
  | (Instr ''mulsd''   (Some op1) (Some op2)  _ _)          semantics_MULSD op1 op2
  | (Instr ''divsd''   (Some op1) (Some op2)  _ _)          semantics_DIVSD op1 op2
  | (Instr ''ucomisd'' (Some op1) (Some op2)  _ _)          semantics_UCOMISD op1 op2
  ― ‹simd›
  | (Instr ''paddd''   (Some op1) (Some op2) _ _)           semantics_PADDD op1 op2
  | (Instr ''pshufd''  (Some op1) (Some op2) (Some op3) _)  semantics_PSHUFD op1 op2 op3
  | (Instr ''pextrd''  (Some op1) (Some op2) (Some op3) _)  semantics_PEXTRD op1 op2 op3
  | (Instr ''pinsrd''  (Some op1) (Some op2) (Some op3) _)  semantics_PINSRD op1 op2 op3
  ― ‹remainder›
  | (Instr ''nop''     _ _ _ _)                             semantics_NOP
  | (Instr ''bswap''   (Some op1) _ _ _)                    semantics_BSWAP op1
  ― ‹external function›
  | (ExternalFunc f)  f
  | i  unknown_semantics i"


text ‹A step function. In X86. the RIP register is incremented before the instruction is executed.
      This is important, e.g., when RIP is used in a jump address.›
definition step :: "I  state  state" 
  where "step i σ  
    let σ' = σ with [setRip (instr_next i)] in
      semantics i σ'"


text ‹All simplification rules used during symbolic execution.›
lemmas semantics_simps = 
      Let_def unop_def unop_no_flags_def binop_def binop_flags_def binop_no_flags_def binop_XMM_def
      semantics_def mov_sign_extension_def simd_32_128_def
      write_MUL_result_def unop_MUL_def ternop_IMUL_def sbb_def adc_def shld_def

      semantics_MOV_def semantics_MOVSD_def semantics_MOVD_def semantics_CMOV_def
      semantics_LEA_def semantics_PUSH_def semantics_POP_def
      semantics_RET_def semantics_CALL_def semantics_LEAVE_def
      semantics_ADD_def semantics_INC_def semantics_DEC_def semantics_NEG_def semantics_SUB_def
      semantics_SBB_def semantics_ADC_def
      semantics_MUL_def semantics_IMUL1_def semantics_IMUL2_def semantics_IMUL3_def
      semantics_SHL_def semantics_SHR_def semantics_SAR_def semantics_SHLD_def
      semantics_ROL_def semantics_ROR_def
      semantics_CMP_def semantics_TEST_def
      semantics_MOVSXD_def semantics_CDQE_def semantics_CDQ_def semantics_CQO_def
      semantics_AND_def semantics_OR_def semantics_XOR_def semantics_XORPS_def semantics_NOT_def
      semantics_cond_jump_def semantics_JMP_def
      semantics_JO_def semantics_JNO_def semantics_JS_def semantics_JNS_def
      semantics_JE_def semantics_JNE_def semantics_JB_def semantics_JNB_def
      semantics_JBE_def semantics_JA_def semantics_JL_def semantics_JGE_def 
      semantics_JLE_def semantics_JG_def
      semantics_setXX_def 
      semantics_ADDSD_def semantics_SUBSD_def semantics_MULSD_def semantics_DIVSD_def semantics_UCOMISD_def
      semantics_NOP_def semantics_BSWAP_def semantics_PSHUFD_def semantics_PEXTRD_def semantics_PINSRD_def

      SUB_flags_def ADD_flags_def INC_flags_def DEC_flags_def NEG_flags_def MUL_flags_def IMUL_flags_def
      SHL_flags_def SHR_flags_def SAR_flags_def SHLD_flags_def logic_flags_def UCOMISD_flags_def


end
end

Theory StateCleanUp

(*  Title:       X86 instruction semantics and basic block symbolic execution
    Authors:     Freek Verbeek, Abhijith Bharadwaj, Joshua Bockenek, Ian Roessle, Timmy Weerwag, Binoy Ravindran
    Year:        2020
    Maintainer:  Freek Verbeek (freek@vt.edu)
*)

section "Removing superfluous memory writes"

theory StateCleanUp
  imports State "HOL-Eisbach.Eisbach"
begin


definition "assumptions  id"

text ‹We are going to make schematic theorems of the form:
      \[
        @{term assumptions} ?A \implies \ldots 
      \]
      The assumptions will be generated on-the-fly. The seemingly weird lemmas below achieves that.›


lemma assumptions_impI:
assumes "assumptions (P  A)"
  shows "P"
  using assms
  by (auto simp add: assumptions_def)

lemma assumptions_conjE:
  shows "assumptions (P  A)  P  assumptions A"
  by (auto simp add: assumptions_def)

lemma assumptionsI:
  shows "assumptions True"
  by (auto simp add: assumptions_def)



text ‹Consider two consecutive memory updates. If they write to the same memory locations, only one of these
      need to be kept. We formulate an Eisbach method to that end. ›


text ‹Returns true if two states are equal except for a specific memory region.›
definition eq_except_for_mem :: "state  state  64 word  nat  256 word  bool  bool"
  where "eq_except_for_mem σ σ' a si v b  σ with [a,si :=m v] = (if b then σ' else σ' with [a,si :=m v])"


lemma eefm_start:
  assumes "eq_except_for_mem (σ with updates) (σ with updates') a si v b"
  shows "(σ with ((a,si :=m v)#updates)) = (if b then σ with updates' else σ with ((a,si :=m v)#updates'))"
  using assms
  by (auto simp add: eq_except_for_mem_def region_addresses_def state_with_updates.simps(2) state_update.simps)

lemma eefm_clean_mem:
assumes "si'  si"
    and "eq_except_for_mem (σ with updates) (σ with updates') a si v b"
  shows "eq_except_for_mem (σ with ((a,si' :=m v')#updates)) (σ with updates') a si v b"
  using assms
  apply (auto simp add: eq_except_for_mem_def state_with_updates.simps(2) state_update.simps)
  subgoal
    apply (cases "σ with updates";cases "σ with updates'";cases "σ")
    by (auto simp add: override_on_def region_addresses_iff)
  apply (cases "σ with updates";cases "σ with updates'")
  apply (auto simp add: override_on_def region_addresses_iff)
  apply (rule ext)
  apply (auto split: if_splits)
  by meson

method eefm_clean_mem = (rule eefm_clean_mem, (simp (no_asm);fail))


lemma eefm_clean_mem_enclosed:
  assumes "si < 32" (* due to 256 words being written to memory *)
      and "enclosed a' si' a si"
      and "eq_except_for_mem (σ with updates) (σ with updates') a' si' v' b"
  shows "eq_except_for_mem (σ with ((a,si :=m v)#updates)) 
                           (σ with ((a,si :=m overwrite (8 * unat (a' - a)) (8 * unat (a' - a) + 8*si') v (v' << unat (a'-a)*8))#updates'))
                           a' si' v' True"
proof(cases b)
  case True
  thus ?thesis
    using assms(3)
    apply (auto simp add: eq_except_for_mem_def state_with_updates.simps(2) state_update.simps)
    apply (cases "σ with updates";cases "σ with updates'")
    apply (auto simp add: override_on_def)
    apply (rule ext)
    apply (auto)

    apply (rule word_eqI)
    subgoal premises prems for _ _ _ _ x n
    proof-
      have 1: "unat (x - a) - unat (a' - a) = unat(x - a')"
        using address_in_enclosed_region_as_linarith[OF assms(2),of x]
              address_of_enclosed_region_ge[OF assms(2)]
              word_le_nat_alt prems(4-5)
        by (auto simp add: unat_sub_if_size word_size)
      thus ?thesis
        using prems address_in_enclosed_region[of a' si' a si x,OF assms(2)] assms(1)
        by (auto simp add: word_size take_byte_shiftlr_256 nth_take_byte_overwrite)
    qed

    apply (rule word_eqI)
    subgoal for _ _ _ _ x n
      using notin_region_addresses_sub[of x a' si' a]
      by (auto simp add: word_size nth_take_byte_overwrite)
    done
next
  case False
  thus ?thesis
    using assms(3)
    apply (auto simp add: eq_except_for_mem_def state_with_updates.simps(2) state_update.simps)
    apply (cases "σ with updates";cases "σ with updates'")
    apply (auto simp add: override_on_def)
    apply (rule ext)
    using enclosed_spec[OF assms(2)]
    apply (auto)

    subgoal premises prems for _ _ _ _ _ x 
    proof-
      have 1: "unat (x - a) - unat (a' - a) = unat(x - a')"
        using address_in_enclosed_region_as_linarith[OF assms(2),of x]
              address_of_enclosed_region_ge[OF assms(2)]
              word_le_nat_alt prems(6)
        by (auto simp add: unat_sub_if_size word_size)
      show ?thesis
        apply (rule word_eqI)
        using 1 prems address_in_enclosed_region[of a' si' a si x,OF assms(2)] assms(1)
        by (auto simp add: word_size take_byte_shiftlr_256 nth_take_byte_overwrite)
    qed

     subgoal for _ _ _ _ _ x
       apply (rule word_eqI)
       using notin_region_addresses_sub[of x a' si' a]
       by (auto simp add: word_size nth_take_byte_overwrite)
     by meson
qed


lemmas eefm_clean_mem_enclosed_plus  = eefm_clean_mem_enclosed[OF _ enclosed_plus, simplified]
lemmas eefm_clean_mem_enclosed_minus_numeral = eefm_clean_mem_enclosed[OF _ enclosed_minus_minus, of _ "numeral n" "numeral m"] for n m

method eefm_clean_mem_enclosed_plus = 
        (rule eefm_clean_mem_enclosed_plus, (
            (simp (no_asm);fail), (simp (no_asm);fail),
            (
             (simp (no_asm_simp);fail) |
             (rule assumptions_impI[of "_ + _ < 18446744073709551616"],simp (no_asm_simp),subst (asm) assumptions_conjE))
            )
        )

method eefm_clean_mem_enclosed_minus_numeral = 
        (rule eefm_clean_mem_enclosed_minus_numeral, (
            (simp (no_asm);fail), (simp (no_asm);fail), (simp (no_asm);fail), (simp (no_asm);fail), (simp (no_asm);fail),
            (
             (simp (no_asm_simp);fail) |           
             (rule assumptions_impI[of "_  _"],simp (no_asm_simp),subst (asm) assumptions_conjE))
            ) 
        )

lemma eefm_next_mem:
assumes "separate a si a' si'"
   and "eq_except_for_mem (σ with updates) (σ with updates') a si v b"
 shows "eq_except_for_mem (σ with ((a',si' :=m v')#updates)) (σ with ((a',si' :=m v')#updates')) a si v b"
  using assms
  apply (auto simp add: eq_except_for_mem_def override_on_def separate_def state_with_updates.simps(2) state_update.simps)
  apply (cases "σ with updates";cases "σ with updates'")
  apply (auto simp add: override_on_def)
  apply (rule ext)
   apply (auto)
  apply (cases "σ with updates";cases "σ with updates'")
  apply (auto simp add: override_on_def)
  apply (rule ext)
  apply (auto)
  by (metis select_convs(2))

method eefm_next_mem =        
        (rule eefm_next_mem,
            (  (simp (no_asm_simp) add: separate_simps state_simps; fail) |
               (rule assumptions_impI[of "separate _ _ _ _ "],simp (no_asm_simp),subst (asm) assumptions_conjE))
        )

lemma eefm_end:
  shows "eq_except_for_mem (σ with updates) (σ with updates) a si v False"
  by (auto simp add: eq_except_for_mem_def)


text ‹We need a tactic exactly like ``subst'' but that applies to the outer most term.›

ML_file ‹MySubst.ML›


text ‹The following method takes a goal with state with symbolic state updates.
      It first applies @{thm eefm_start}, considering the outer-most memory update to some region $\llbracket a, si \rrbracket$.
      A list @{term updates'} is generated that produces a similar state except for region $\llbracket a, si \rrbracket$.
      The list thus can have fewer updates since any update to a region that is enclosed in region $\llbracket a, si \rrbracket$ can be removed.
      Consecutively, this method applies rules to determine whether a state update can be kept, merged or removed.
      It may add assumptions to the goal, that assume no overflow.
›

method clean_up_mem = (
    mysubst eefm_start,
    ( eefm_clean_mem | eefm_clean_mem_enclosed_plus | eefm_clean_mem_enclosed_minus_numeral | eefm_next_mem)+,
    rule eefm_end,
    simp (no_asm),
    ((match premises in A[thin]: "assumptions (?A  ?B)"  cut_tac A;subst (asm) assumptions_conjE, erule conjE ›)+)?
  )


text ‹The method above applies to one state update. This method can be repeated as long as modifies
      the goal, as it always makes the goal smaller. The method below repeats a given method until
      the goal is unchanged. In deterministic fashion (a la the REPEAT\_DETERM tactic).›

method_setup repeat_until_unchanged = Method.text_closure >>
    (fn text => fn ctxt => fn using => 
          ― ‹parse the method supplied as parameter›
      let val ctxt_tac = Method.evaluate_runtime text ctxt using
          fun repeat_until_unchanged (ctxt,st) = 
            case Seq.pull (ctxt_tac (ctxt,st)) of
                SOME (Seq.Result (ctxt',st'), _) => 
                  if Thm.eq_thm (st, st') then 
                    Seq.make_results (Seq.succeed (ctxt',st'))
                  else
                    repeat_until_unchanged (ctxt',st')
              | _ => Seq.make_results (Seq.succeed (ctxt,st))
      in
        repeat_until_unchanged 
      end)

method clean_up = repeat_until_unchanged clean_up_mem

end

File ‹MySubst.ML›

(* The original header of the file is:

    Title:      Tools/eqsubst.ML
    Author:     Lucas Dixon, University of Edinburgh

    Perform a substitution using an equation.


    We modified this so that subst applies to the outer most term. In order to do that, I copy-pasted
    all the code below and made one minor modification.
    Please do not use this in any other context, I can give no guarantees.
*)

signature EQMYSUBST =
sig
  type match =
    ((indexname * (sort * typ)) list (* type instantiations *)
      * (indexname * (typ * term)) list) (* term instantiations *)
    * (string * typ) list (* fake named type abs env *)
    * (string * typ) list (* type abs env *)
    * term (* outer term *)

  type searchinfo =
    Proof.context
    * int (* maxidx *)
    * Zipper.T (* focusterm to search under *)

  datatype 'a skipseq = SkipMore of int | SkipSeq of 'a Seq.seq Seq.seq

  val skip_first_asm_occs_search: ('a -> 'b -> 'c Seq.seq Seq.seq) -> 'a -> int -> 'b -> 'c skipseq
  val skip_first_occs_search: int -> ('a -> 'b -> 'c Seq.seq Seq.seq) -> 'a -> 'b -> 'c Seq.seq
  val skipto_skipseq: int -> 'a Seq.seq Seq.seq -> 'a skipseq

  (* tactics *)
  val eqsubst_tac: Proof.context -> bool -> 
    thm list -> int -> tactic
  val eqsubst_tac': Proof.context ->
    (searchinfo -> term -> match Seq.seq) (* search function *)
    -> thm (* equation theorem to rewrite with *)
    -> int (* subgoal number in goal theorem *)
    -> thm (* goal theorem *)
    -> thm Seq.seq (* rewritten goal theorem *)

  (* search for substitutions *)
  val valid_match_start: Zipper.T -> bool
  val search_lr_all: Zipper.T -> Zipper.T Seq.seq
  val search_lr_valid: (Zipper.T -> bool) -> Zipper.T -> Zipper.T Seq.seq
  val searchf_lr_unify_all: searchinfo -> term -> match Seq.seq Seq.seq
  val searchf_lr_unify_valid: searchinfo -> term -> match Seq.seq Seq.seq
  val searchf_bt_unify_valid: searchinfo -> term -> match Seq.seq Seq.seq
end;

structure EqMySubst: EQMYSUBST =
struct

(* changes object "=" to meta "==" which prepares a given rewrite rule *)
fun prep_meta_eq ctxt =
  Simplifier.mksimps ctxt #> map Drule.zero_var_indexes;

(* make free vars into schematic vars with index zero *)
fun unfix_frees frees =
   fold (K (Thm.forall_elim_var 0)) frees o Drule.forall_intr_list frees;


type match =
  ((indexname * (sort * typ)) list (* type instantiations *)
   * (indexname * (typ * term)) list) (* term instantiations *)
  * (string * typ) list (* fake named type abs env *)
  * (string * typ) list (* type abs env *)
  * term; (* outer term *)

type searchinfo =
  Proof.context
  * int (* maxidx *)
  * Zipper.T; (* focusterm to search under *)


(* skipping non-empty sub-sequences but when we reach the end
   of the seq, remembering how much we have left to skip. *)
datatype 'a skipseq =
  SkipMore of int |
  SkipSeq of 'a Seq.seq Seq.seq;

(* given a seqseq, skip the first m non-empty seq's, note deficit *)
fun skipto_skipseq m s =
  let
    fun skip_occs n sq =
      (case Seq.pull sq of
        NONE => SkipMore n
      | SOME (h, t) =>
        (case Seq.pull h of
          NONE => skip_occs n t
        | SOME _ => if n <= 1 then SkipSeq (Seq.cons h t) else skip_occs (n - 1) t))
  in skip_occs m s end;



(* note: outerterm is the taget with the match replaced by a bound
   variable : ie: "P lhs" beocmes "%x. P x"
   insts is the types of instantiations of vars in lhs
   and typinsts is the type instantiations of types in the lhs
   Note: Final rule is the rule lifted into the ontext of the
   taget thm. *)
fun mk_foo_match mkuptermfunc Ts t =
  let
    val ty = Term.type_of t
    val bigtype = rev (map snd Ts) ---> ty
    fun mk_foo 0 t = t
      | mk_foo i t = mk_foo (i - 1) (t $ (Bound (i - 1)))
    val num_of_bnds = length Ts
    (* foo_term = "fooabs y0 ... yn" where y's are local bounds *)
    val foo_term = mk_foo num_of_bnds (Bound num_of_bnds)
  in Abs ("fooabs", bigtype, mkuptermfunc foo_term) end;

(* T is outer bound vars, n is number of locally bound vars *)
(* THINK: is order of Ts correct...? or reversed? *)
fun mk_fake_bound_name n = ":b_" ^ n;
fun fakefree_badbounds Ts t =
  let val (FakeTs, Ts, newnames) =
    fold_rev (fn (n, ty) => fn (FakeTs, Ts, usednames) =>
      let
        val newname = singleton (Name.variant_list usednames) n
      in
        ((mk_fake_bound_name newname, ty) :: FakeTs,
          (newname, ty) :: Ts,
          newname :: usednames)
      end) Ts ([], [], [])
  in (FakeTs, Ts, Term.subst_bounds (map Free FakeTs, t)) end;

(* before matching we need to fake the bound vars that are missing an
   abstraction. In this function we additionally construct the
   abstraction environment, and an outer context term (with the focus
   abstracted out) for use in rewriting with RW_Inst.rw *)
fun prep_zipper_match z =
  let
    val t = Zipper.trm z
    val c = Zipper.ctxt z
    val Ts = Zipper.C.nty_ctxt c
    val (FakeTs', Ts', t') = fakefree_badbounds Ts t
    val absterm = mk_foo_match (Zipper.C.apply c) Ts' t'
  in
    (t', (FakeTs', Ts', absterm))
  end;

(* Unification with exception handled *)
(* given context, max var index, pat, tgt; returns Seq of instantiations *)
fun clean_unify ctxt ix (a as (pat, tgt)) =
  let
    (* type info will be re-derived, maybe this can be cached
       for efficiency? *)
    val pat_ty = Term.type_of pat;
    val tgt_ty = Term.type_of tgt;
    (* FIXME is it OK to ignore the type instantiation info?
       or should I be using it? *)
    val typs_unify =
      SOME (Sign.typ_unify (Proof_Context.theory_of ctxt) (pat_ty, tgt_ty) (Vartab.empty, ix))
        handle Type.TUNIFY => NONE;
  in
    (case typs_unify of
      SOME (typinsttab, ix2) =>
        let
          (* FIXME is it right to throw away the flexes?
             or should I be using them somehow? *)
          fun mk_insts env =
            (Vartab.dest (Envir.type_env env),
             Vartab.dest (Envir.term_env env));
          val initenv =
            Envir.Envir {maxidx = ix2, tenv = Vartab.empty, tyenv = typinsttab};
          val useq = Unify.smash_unifiers (Context.Proof ctxt) [a] initenv
            handle ListPair.UnequalLengths => Seq.empty
              | Term.TERM _ => Seq.empty;
          fun clean_unify' useq () =
            (case (Seq.pull useq) of
               NONE => NONE
             | SOME (h, t) => SOME (mk_insts h, Seq.make (clean_unify' t)))
            handle ListPair.UnequalLengths => NONE
              | Term.TERM _ => NONE;
        in
          (Seq.make (clean_unify' useq))
        end
    | NONE => Seq.empty)
  end;

(* Unification for zippers *)
(* Note: Ts is a modified version of the original names of the outer
   bound variables. New names have been introduced to make sure they are
   unique w.r.t all names in the term and each other. usednames' is
   oldnames + new names. *)
fun clean_unify_z ctxt maxidx pat z =
  let val (t, (FakeTs, Ts, absterm)) = prep_zipper_match z in
    Seq.map (fn insts => (insts, FakeTs, Ts, absterm))
      (clean_unify ctxt maxidx (t, pat))
  end;


fun bot_left_leaf_of (l $ _) = bot_left_leaf_of l
  | bot_left_leaf_of (Abs (_, _, t)) = bot_left_leaf_of t
  | bot_left_leaf_of x = x;

(* Avoid considering replacing terms which have a var at the head as
   they always succeed trivially, and uninterestingly. *)
fun valid_match_start z =
  (case bot_left_leaf_of (Zipper.trm z) of
    Var _ => false
  | _ => true);

(* search from top, left to right, then down *)
val search_lr_all = ZipperSearch.all_bl_ur;

(* search from top, left to right, then down *)
fun search_lr_valid validf =
  let
    fun sf_valid_td_lr z =
      let val here = if validf z then [Zipper.Here z] else [] in
        (case Zipper.trm z of
          _ $ _ =>
            [Zipper.LookIn (Zipper.move_down_left z)] @ here @
            [Zipper.LookIn (Zipper.move_down_right z)]
        | Abs _ => here @ [Zipper.LookIn (Zipper.move_down_abs z)]
        | _ => here)
      end;
  in Zipper.lzy_search sf_valid_td_lr end;

(* search from bottom to top, left to right *)
fun search_bt_valid validf =
  let
    fun sf_valid_td_lr z =
      let val here = if validf z then [Zipper.Here z] else [] in
        (case Zipper.trm z of
          _ $ _ =>
            [Zipper.LookIn (Zipper.move_down_left z),
             Zipper.LookIn (Zipper.move_down_right z)] @ here
        | Abs _ => [Zipper.LookIn (Zipper.move_down_abs z)] @ here
        | _ => here)
      end;
  in Zipper.lzy_search sf_valid_td_lr end;

fun searchf_unify_gen f (ctxt, maxidx, z) lhs =
  Seq.map (clean_unify_z ctxt maxidx lhs) (Zipper.limit_apply f z);

(* search all unifications *)
val searchf_lr_unify_all = searchf_unify_gen search_lr_all;

(* search only for 'valid' unifiers (non abs subterms and non vars) *)
val searchf_lr_unify_valid = searchf_unify_gen (search_lr_valid valid_match_start);

val searchf_bt_unify_valid = searchf_unify_gen (search_bt_valid valid_match_start);

(* apply a substitution in the conclusion of the theorem *)
(* cfvs are certified free var placeholders for goal params *)
(* conclthm is a theorem of for just the conclusion *)
(* m is instantiation/match information *)
(* rule is the equation for substitution *)
fun apply_subst_in_concl ctxt i st (cfvs, conclthm) rule m =
  RW_Inst.rw ctxt m rule conclthm
  |> unfix_frees cfvs
  |> Conv.fconv_rule Drule.beta_eta_conversion
  |> (fn r => resolve_tac ctxt [r] i st);

(* substitute within the conclusion of goal i of gth, using a meta
equation rule. Note that we assume rule has var indicies zero'd *)
fun prep_concl_subst ctxt i gth =
  let
    val th = Thm.incr_indexes 1 gth;
    val tgt_term = Thm.prop_of th;

    val (fixedbody, fvs) = IsaND.fix_alls_term ctxt i tgt_term;
    val cfvs = rev (map (Thm.cterm_of ctxt) fvs);

    val conclterm = Logic.strip_imp_concl fixedbody;
    val conclthm = Thm.trivial (Thm.cterm_of ctxt conclterm);
    val maxidx = Thm.maxidx_of th;
    val ft =
      (Zipper.move_down_right (* ==> *)
       o Zipper.move_down_left (* Trueprop *)
       o Zipper.mktop
       o Thm.prop_of) conclthm
  in
    ((cfvs, conclthm), (ctxt, maxidx, ft))
  end;

(* substitute using an object or meta level equality *)
fun eqsubst_tac' ctxt searchf instepthm i st =
  let
    val (cvfsconclthm, searchinfo) = prep_concl_subst ctxt i st;
    val stepthms = Seq.of_list (prep_meta_eq ctxt instepthm);
    fun rewrite_with_thm r =
      let val (lhs,_) = Logic.dest_equals (Thm.concl_of r) in
        searchf searchinfo lhs
        |> Seq.maps (apply_subst_in_concl ctxt i st cvfsconclthm r)
      end;
  in stepthms |> Seq.maps rewrite_with_thm end;


(* General substitution of multiple occurrences using one of
   the given theorems *)

fun skip_first_occs_search occ srchf sinfo lhs =
  (case skipto_skipseq occ (srchf sinfo lhs)  of
    SkipMore _ => Seq.empty
  | SkipSeq ss => Seq.flat ss);

(* The "occs" argument is a list of integers indicating which occurrence
w.r.t. the search order, to rewrite. Backtracking will also find later
occurrences, but all earlier ones are skipped. Thus you can use [0] to
just find all rewrites. *)

fun eqsubst_tac ctxt butlast thms i st =
  let val nprems = Thm.nprems_of st in
    if nprems < i then Seq.empty else
    let
      val thmseq = Seq.of_list thms;
      fun apply_occ occ st =
        thmseq |> Seq.maps (fn r =>
          eqsubst_tac' ctxt
            (skip_first_occs_search occ searchf_lr_unify_valid) r
            (i + (Thm.nprems_of st - nprems)) st);
      fun max_occ occ st = 
        case Seq.pull (apply_occ occ st) of
          NONE => occ - 1 
          | _ => max_occ (occ+1) st
      val sorted_occs = [max_occ 0 st - (if butlast then 1 else 0)];
    in
      Seq.maps distinct_subgoals_tac (Seq.EVERY (map apply_occ sorted_occs) st)
    end
  end;


fun skip_first_asm_occs_search searchf sinfo occ lhs =
  skipto_skipseq occ (searchf sinfo lhs);


(* combination method that takes a flag (true indicates that subst
   should be done to an assumption, false = apply to the conclusion of
   the goal) as well as the theorems to use *)
val _ =
  Theory.setup
    (Method.setup binding‹mysubst›
      (Scan.lift (Args.mode "butlast") --
        Attrib.thms >> (fn ((butlast), inthms) => fn ctxt =>
          SIMPLE_METHOD' (eqsubst_tac ctxt butlast inthms)))
      "single-step substitution");

end;

Theory SymbolicExecution

(*  Title:       X86 instruction semantics and basic block symbolic execution
    Authors:     Freek Verbeek, Abhijith Bharadwaj, Joshua Bockenek, Ian Roessle, Timmy Weerwag, Binoy Ravindran
    Year:        2020
    Maintainer:  Freek Verbeek (freek@vt.edu)
*)

section "A symbolic execution engine"


theory SymbolicExecution
  imports X86_InstructionSemantics StateCleanUp
begin



definition eq (infixl "" 50)
  where "(≜)  (=)"

context unknowns
begin 


inductive run :: "64 word  (64 word  I)  state  state  bool"
  where 
    "rip σ = af  fetch (rip σ) = i  σ'  step i σ  run af fetch σ σ'"
  | "rip σ  af  fetch (rip σ) = i  run af fetch (step i σ) σ'  run af fetch σ σ'"


method fetch_and_execute = (
   ((rule run.intros(2),(simp (no_asm) add: state_simps;fail))
    | (rule run.intros(1),(simp (no_asm) add: state_simps;fail))),
   (simp (no_asm) add: state_simps),                                             ― ‹fetch instruction›
   (simp (no_asm_simp) add: step_def semantics_simps state_simps BitByte_simps), ― ‹simplification›
   (subst clean_state_updates[symmetric],simp (no_asm))                          ― ‹cleaning up›
 )

method resolve_mem_reads = (
  subst mem_read_mem_write_separate,
  ((simp (no_asm_simp) add: separate_simps state_simps; fail) 
   | (rule assumptions_impI,simp (no_asm_simp),subst (asm) assumptions_conjE, erule conjE)),
  (simp (no_asm_simp) add: semantics_simps state_simps BitByte_simps separate_simps)?
 )

method se_step = 
  fetch_and_execute,
  ((resolve_mem_reads)+;(subst clean_state_updates[symmetric],simp (no_asm)))?,
  clean_up

method se_step_no_clean = 
  fetch_and_execute,
  ((resolve_mem_reads)+;(subst clean_state_updates[symmetric],simp (no_asm)))?

end


abbreviation RSP0 
  where "RSP0 σ  regs σ ''rsp''"
abbreviation RBP0 
  where "RBP0 σ  regs σ ''rbp''"
abbreviation RAX0 
  where "RAX0 σ  regs σ ''rax''"
abbreviation RBX0 
  where "RBX0 σ  regs σ ''rbx''"
abbreviation RCX0 
  where "RCX0 σ  regs σ ''rcx''"
abbreviation RDX0 
  where "RDX0 σ  regs σ ''rdx''"
abbreviation RDI0 
  where "RDI0 σ  regs σ ''rdi''"
abbreviation RSI0 
  where "RSI0 σ  regs σ ''rsi''"
abbreviation R150 
  where "R150 σ  regs σ ''r15''"
abbreviation R140
  where "R140 σ  regs σ ''r14''"
abbreviation R130 
  where "R130 σ  regs σ ''r13''"
abbreviation R120 
  where "R120 σ  regs σ ''r12''"
abbreviation R110 
  where "R110 σ  regs σ ''r11''"
abbreviation R100 
  where "R100 σ  regs σ ''r10''"
abbreviation R90 
  where "R90 σ  regs σ ''r9''"
abbreviation R80 
  where "R80 σ  regs σ ''r8''"



text ‹Repeat a command up to n times, in deterministic fashion (a la the REPEAT\_DETERM tactic).›

method_setup repeat_n = ‹
  Scan.lift Parse.nat -- Method.text_closure >>
    (fn (n,text) => fn ctxt => fn using => 
      let
        val ctxt_tac = Method.evaluate_runtime text ctxt using;
        fun repeat_n 0 ctxt_st = Seq.make_results (Seq.succeed ctxt_st)
          | repeat_n i ctxt_st = case Seq.pull (ctxt_tac ctxt_st) of
                            SOME (Seq.Result ctxt_st', _) => repeat_n (i-1) ctxt_st'
                          | _ => Seq.make_results (Seq.succeed ctxt_st)
      in
        repeat_n n
      end)

end

Theory Examples

(*  Title:       X86 instruction semantics and basic block symbolic execution
    Authors:     Freek Verbeek, Abhijith Bharadwaj, Joshua Bockenek, Ian Roessle, Timmy Weerwag, Binoy Ravindran
    Year:        2020
    Maintainer:  Freek Verbeek (freek@vt.edu)
*)

section "Small examples"

theory Examples
  imports SymbolicExecution 
begin

context unknowns
begin
  
  text ‹
  A simple hand-crafted example showing some basic instructions. 
›


schematic_goal example1:
  assumes[simp]: "fetch 0x0  = PUSH (Reg ''rbp'') 1"
      and[simp]: "fetch 0x1  = SUB  (Reg ''rsp'') (Imm 30) 2"
      and[simp]: "fetch 0x2  = MOV  (QWORD PTR [22 + ''rsp'']2) (Imm 42) 3"
      and[simp]: "fetch 0x3  = MOV  (QWORD PTR [30 + ''rsp'']2) (Imm 43) 4"
      and[simp]: "fetch 0x4  = ADD  (Reg ''rsp'') (Imm 30) 5"
      and[simp]: "fetch 0x5  = POP  (Reg ''rbp'') 6"
      and[simp]: "fetch 0x6  = RET 1"
    shows "run 0x6 fetch (σ with [setRip 0x0]) ?σ'"
  apply se_step+
  apply (subst eq_def,simp)
  done

thm example1




text ‹Demonstrates little-endian memory and register-aliasing›

text‹
\begin{verbatim}
  RAX +   0    1    2     3     4     5     6     7
        | FF | EE | DD  | CC  | BB  | AA  | 00  | 00  |
  
  EDI := 0xCCDDEEFF
  EBX := 0xAABB
  RCX := 0xAABBCCDDAABB
\end{verbatim}
›

schematic_goal example2:
  assumes[simp]: "fetch 0x0   = MOV  (QWORD PTR [''rax'']1) (Imm 0xAABBCCDDEEFF) 1"
      and[simp]: "fetch 0x1   = MOV  (Reg ''edi'')          (DWORD PTR [''rax'']1) 2"
      and[simp]: "fetch 0x2   = MOV  (Reg ''ebx'')          (DWORD PTR [4 + ''rax'']2) 3"
      and[simp]: "fetch 0x3   = MOV  (Reg ''rcx'')          (QWORD PTR [''rax'']1) 4"
      and[simp]: "fetch 0x4   = MOV   (Reg ''cx'')          (WORD PTR  [4 + ''rax'']2) 5"
    shows "run 0x4 fetch (σ with [setRip 0x0]) ?σ'" 
  apply se_step+
  apply (subst eq_def,simp)
  done

thm example2

text ‹
  This example show how assumptions over regions are generated.
  Since no relation over rax and rbx is known in the initial state, they will be assumed to be
  separate by default.
›
schematic_goal example3:
  assumes[simp]: "fetch 0x0   = MOV  (QWORD PTR [''rax'']1) (Imm 0xAABBCCDDEEFF) 1"
      and[simp]: "fetch 0x1   = MOV  (QWORD PTR [''rbx'']1) (Imm 0x112233445566) 2"
      and[simp]: "fetch 0x2   = MOV  (Reg ''rcx'')          (DWORD PTR [2 + ''rax'']2) 3"
      and[simp]: "fetch 0x3   = MOV  (Reg ''cx'')           (WORD PTR  [4 + ''rbx'']2) 4"
      and[simp]: "fetch 0x4   = MOV  (Reg ''cl'')           (BYTE PTR  [''rax'']1) 5"
    shows "assumptions ?A  run 0x4 fetch (σ with [setRip 0x0]) ?σ'" 
  apply se_step+
  apply (subst eq_def,simp)
  done

thm example3

end







end

Theory X86_Parse

section ‹Parser›

theory X86_Parse
  imports X86_InstructionSemantics
  keywords "x86_64_parser" :: thy_decl
begin

ML_file ‹X86_Parse.ML›

end

File ‹X86_Parse.ML›


(* TODO: segment registers *)

datatype Operand =
           (*  size (in bytes)   segment           offset   base     index    scale *)
    Mem of (   int             * string option   * int    * string * string * int)
  | Reg of string
  | Imm of LargeInt.int

datatype Instr = Instr of (LargeInt.int * LargeInt.int * string * Operand option * Operand option * Operand option)

(* PRETTY PRINTING *)
fun pp_option NONE = ""
  | pp_option (SOME s) = s

fun
  pp_mem_size 1 = "BYTE PTR"
| pp_mem_size 2 = "WORD PTR"
| pp_mem_size 4 = "DWORD PTR"
| pp_mem_size 8 = "QWORD PTR"
| pp_mem_size 16 = "XMMWORD PTR"
| pp_mem_size n = "SIZEDIR " ^ Int.toString (n*8) ^ " PTR"

fun pp_operand (Mem (si,segment,offset,base, index, scale)) =
    pp_mem_size si ^ " " ^ pp_option segment ^ ":[" ^ Int.toString offset ^ " + " ^ base ^ " + " ^ index ^ " * " ^ Int.toString scale ^ "]"
| pp_operand (Reg r) = r
| pp_operand (Imm i) = Int.toString i

fun pp_operands [] = ""
  | pp_operands (NONE::_) = ""
  | pp_operands [SOME op1] = pp_operand op1
  | pp_operands [SOME op1,NONE] = pp_operand op1
  | pp_operands (SOME op1::op2::ops) = pp_operand op1 ^ ", " ^ pp_operands (op2::ops)

fun pp_instr (Instr (a,si,m,op1,op2,op3)) =
  LargeInt.toString a ^ ": " ^ m ^ " " ^ pp_operands [op1,op2,op3] ^ "  (" ^ LargeInt.toString si ^ ")"

val intFromHexString = StringCvt.scanString (LargeInt.scan StringCvt.HEX) o Substring.string

fun intFromHexString_forced s =
    case intFromHexString s of
         SOME i => i
       | NONE => raise Fail ("Could not convert string '" ^ Substring.string s ^ "' to int.")

fun is_whitespace c = (c = #" " orelse c = #"\t"  orelse c = #"\n")

fun trim str =
  let val (_,x) = Substring.splitl is_whitespace str
      val (y,_) = Substring.splitr is_whitespace x in
    y
  end;

(* PARSING *)

val registers = [
  "rip",
  "rax", "eax", "ax", "ah", "al",
  "rbx", "ebx", "bx", "bh", "bl",
  "rcx", "ecx", "cx", "ch", "cl",
  "rdx", "edx", "dx", "dh", "dl",
  "rbp", "ebp", "bp", "bpl",
  "rsp", "esp", "sp", "spl",
  "rdi", "edi", "di", "dil",
  "rsi", "esi", "si", "sil",
  "r15", "r15d", "r15w", "r15b",
  "r14", "r14d", "r14w", "r14b",
  "r13", "r13d", "r13w", "r13b",
  "r12", "r12d", "r12w", "r12b",
  "r11", "r11d", "r11w", "r11b",
  "r10", "r10d", "r10w", "r10b",
  "r9", "r9d", "r9w", "r9b",
  "r8", "r8d", "r8w", "r8b",

  "xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7","xmm8",
  "xmm9","xmm10","xmm11","xmm12","xmm13","xmm14","xmm15"
]

fun is_register str = List.find (fn (str') => String.compare (Substring.string str,str') = EQUAL) registers <> NONE

fun overwrite_str "" s = s
| overwrite_str s "" = s
| overwrite_str _  s = s

fun overwrite_str_option NONE s = s
| overwrite_str_option s NONE = s
| overwrite_str_option _ s     = s

fun max x y = if x >= y then x else y

fun overwrite_Mem (Mem (si,seg,off,base,ind,sc)) (Mem (si',seg',off',base',ind',sc')) =
  Mem (max si si',overwrite_str_option seg seg',max off off',overwrite_str base base',overwrite_str ind ind',max sc sc')

fun parse_operand_address_between_brackets_inner str =
  if is_register str then
    Mem (0,NONE,0,Substring.string str,"",0) (* base *)
  else
    let val tokens = map trim (Substring.tokens (fn c => c = #"*") str) in
      if length tokens = 1 then
        case intFromHexString str of
          SOME i => Mem (0,NONE,i,"","",0) (* offset *)
          | NONE => raise Fail ("Don't know how to parse operand part:" ^ Substring.string str)
      else if length tokens = 2 then
        if is_register (nth tokens 0) then
          Mem (0,NONE,0,"",Substring.string (nth tokens 0),intFromHexString_forced (nth tokens 1)) (* index * scale *)
        else if is_register (nth tokens 1) then
          Mem (0,NONE,0,"",Substring.string (nth tokens 1),intFromHexString_forced (nth tokens 0)) (* scale * index *)
        else
          raise Fail ("Don't know how to parse operand part:" ^ Substring.string str)
      else
        raise Fail ("Don't know how to parse operand part:" ^ Substring.string str)
    end

fun parse_operand_address_between_brackets_sum si segment_reg str =
  let val tokens = map trim (Substring.tokens (fn c => c = #"+") str) in
    fold (overwrite_Mem o parse_operand_address_between_brackets_inner)
         tokens
         (Mem (si,segment_reg ,0,"","",0))
  end;


fun parse_operand_address_between_brackets_sub si segment_reg str =
  let val (lhs,num) = Substring.splitl (fn c => c <> #"-") str;
      val (Mem (x0,x1,_,x3,x4,x5)) = parse_operand_address_between_brackets_sum si segment_reg lhs in
    Mem (x0,x1,intFromHexString_forced num,x3,x4,x5)
   end

fun parse_operand_address_between_brackets si segment_reg str =
  let val (_,num) = Substring.splitl (fn c => c <> #"-") str in
    if Substring.isEmpty num then
      parse_operand_address_between_brackets_sum si segment_reg str
    else
      parse_operand_address_between_brackets_sub si segment_reg str
      end

fun skip_brackets str =
  let val (x,y) = Substring.splitAt (trim str,1)
      val (z,_) = Substring.splitl (fn c => c <> #"]") y in
    if Substring.compare (x,Substring.full "[") = EQUAL then
      z
    else
      raise Fail ("Expecting non-empty bracketed string preceded with colon or an immediate in hex-format, but got: " ^ Substring.string str)
  end;

fun parse_operand_address_bracketed si segment_reg str =
  case intFromHexString str of
      SOME imm => Mem (si,segment_reg,imm,"", "",0)
    | NONE => parse_operand_address_between_brackets si segment_reg (skip_brackets str)

fun tail str =
  case Substring.getc str of
      NONE => raise Fail ("Expecting non-empty string, but got: " ^ Substring.string str)
    | SOME (_,s) => s;

fun parse_operand_address si str =
  case Substring.splitl (fn c => c <> #":") str of
      (before_colon, after_colon) =>
          if Substring.isEmpty after_colon then
            parse_operand_address_bracketed si NONE before_colon
          else
            parse_operand_address_bracketed si (SOME (Substring.string (trim before_colon))) (tail after_colon);

fun parse_operand str' =
  let val str = trim str' in
    if Substring.isPrefix "BYTE PTR" str then
      parse_operand_address 1 (snd (Substring.splitAt (str,8)))
    else if Substring.isPrefix "WORD PTR" str then
      parse_operand_address 2 (snd (Substring.splitAt (str,8)))
    else if Substring.isPrefix "DWORD PTR" str then
      parse_operand_address 4 (snd (Substring.splitAt (str,9)))
    else if Substring.isPrefix "QWORD PTR" str then
      parse_operand_address 8 (snd (Substring.splitAt (str,9)))
    else if Substring.isPrefix "XMMWORD PTR" str then
      parse_operand_address 16 (snd (Substring.splitAt (str,11)))
    else if Substring.isPrefix "[" str then (* happens in case of a LEA instruction *)
      parse_operand_address 0 str
    else if List.find (fn (str') => String.compare (Substring.string str,str') = EQUAL) registers <> NONE then
      Reg (Substring.string str)
    else
      case intFromHexString str of
          NONE => raise Fail ("Cannot read hex number in string: " ^ (Substring.string str))
        | SOME imm => Imm imm
  end;

fun parse_operands str =
  let val tokens = map trim (Substring.tokens (fn c => c = #",") (trim str))
      val ops = map parse_operand tokens in
    case ops of
        [] => (NONE,NONE,NONE)
      | [op1] => (SOME op1,NONE,NONE)
      | [op1,op2] => (SOME op1,SOME op2,NONE)
      | [op1,op2,op3] => (SOME op1,SOME op2,SOME op3)
      | _ => raise Fail ("Unexpected number of operands in : " ^ Substring.string str)
  end;

fun remove_comment str =
  let val (str0,str1) = Substring.splitl (fn c => c <> #"#" andalso c <> #"<") str
  in
    if Substring.isEmpty str1 then str0 else Substring.trimr 1 str0
  end

fun parse_external_func a si str =
  let val (m,func)  = Substring.splitl (fn c => c <> #" ") str
      val func_name =  Substring.string (trim func)
  in
    Instr (a, si, Substring.string m, SOME (Reg func_name), NONE, NONE)
  end

fun parse_normal_instr a si str =
  let val (_,rem1)      = Substring.splitl (fn c => c =  #":" orelse c = #" ") str
      val (m,rem2)      = Substring.splitl (fn c => c <> #" ") rem1
      val (op1,op2,op3) = parse_operands rem2 in
    Instr (a, si, Substring.string m, op1,op2,op3)
  end;

fun parse_instr si str =
  let val str'          = remove_comment (Substring.full str)
      val (addr,rem0)   = Substring.splitl (fn c => c <> #":") str'
      val a             = intFromHexString_forced (Substring.full ("0x" ^ Substring.string (trim addr)))
  in
    if Substring.isPrefix "EXTERNAL_FUNCTION" (trim (tail (rem0))) then
      parse_external_func a si (trim (tail (rem0)))
    else
      parse_normal_instr a si rem0
  end;

fun read_instr_addr str =
  let val instr = parse_instr 0 str
      val (Instr (a,_,_,_,_,_)) = instr in
    a
  end

(* EMBEDDING INTO HOL *)

val mk_nat = HOLogic.mk_number @{typ nat}
val mk_string = HOLogic.mk_string
fun mk_word_typ_from_num s = Syntax.read_typ @{context} ("num ⇒ " ^ Int.toString s ^ " word")
fun mk_word_typ s = Syntax.read_typ @{context} (Int.toString s ^ " word")

fun mk_word i b =
  if i=0 then
    Const ("Groups.zero_class.zero", mk_word_typ b)
  else if i=1 then
    Const ("Groups.one_class.one", mk_word_typ b)
  else if i < 0 then
    Syntax.read_term @{context} ("uminus :: " ^ Int.toString b ^ " word ⇒ " ^ Int.toString b ^ " word")
      $ (Const ("Num.numeral_class.numeral", mk_word_typ_from_num b) $ HOLogic.mk_numeral (0 - i))
  else
    Const ("Num.numeral_class.numeral", mk_word_typ_from_num b) $ HOLogic.mk_numeral i

fun mk_operand (Mem (8,segment,offset,base,index,scale)) =
  @{term "qword_ptr"} $ HOLogic.mk_prod (mk_word offset 64,
                         HOLogic.mk_prod (mk_string base,
                          HOLogic.mk_prod (mk_string index, mk_nat scale)))
 | mk_operand (Mem (4,segment,offset,base,index,scale)) =
  @{term "dword_ptr"} $ HOLogic.mk_prod (mk_word offset 64,
                         HOLogic.mk_prod (mk_string base,
                          HOLogic.mk_prod (mk_string index, mk_nat scale)))
 | mk_operand (Mem (2,segment,offset,base,index,scale)) =
  @{term "word_ptr"} $ HOLogic.mk_prod (mk_word offset 64,
                         HOLogic.mk_prod (mk_string base,
                          HOLogic.mk_prod (mk_string index, mk_nat scale)))
 | mk_operand (Mem (1,segment,offset,base,index,scale)) =
  @{term "byte_ptr"} $ HOLogic.mk_prod (mk_word offset 64,
                         HOLogic.mk_prod (mk_string base,
                          HOLogic.mk_prod (mk_string index, mk_nat scale)))
 | mk_operand (Mem (si,segment,offset,base,index,scale)) =
  @{term Mem} $ mk_nat si $ mk_word offset 64 $ mk_string base $ mk_string index $ mk_nat scale
 | mk_operand (Reg reg) =
  @{term Reg} $ mk_string reg
 | mk_operand (Imm imm) =
  @{term Imm} $ mk_word imm 256

fun mk_operand_option NONE       = @{term "None :: Operand option"}
  | mk_operand_option (SOME op1) = @{term "Some :: Operand  Operand option"} $ mk_operand op1

fun mk_instr (Instr (_,_,"EXTERNAL_FUNCTION",SOME (Reg f),NONE,NONE)) lthy =
  let val def = Syntax.read_term (Local_Theory.target_of lthy) ("EXTERNAL_FUNCTION_" ^ f) in
    if fastype_of def = (@{typ state} --> @{typ state}) then
      @{term ExternalFunc} $ def
    else
      raise Fail ("Unknown external function: " ^ f ^ "; expecting a function named EXTERNAL_FUNCTION_" ^ f ^ " in locale unknowns of type state ⇒ state")
  end
| mk_instr (Instr (a,si,m,op1,op2,op3)) _ =
  @{term Instr} $ mk_string m $ mk_operand_option op1 $ mk_operand_option op2 $ mk_operand_option op3 $ mk_word (a+si) 64

(*
  Make a definition in HOL with name "name" and as body "value".
  Value can be any HOL term, e.g.,:
      HOLogic.mk_number @{typ nat} 42
  Note that HOL terms can be produced using antiquotations, e.g.,
      @{term "42::nat"}
  does the same as the above code.
*)
fun mk_definition name value lthy = let
  val binding = Binding.name name
  val (_, lthy) = Local_Theory.define ((binding, NoSyn), ((Thm.def_binding binding, []), value)) lthy
  val _ = tracing ("Added definition: " ^ (Local_Theory.full_name lthy binding))
  in
    lthy
  end


fun main localename assembly lthy = let
  (* Build a locale name *)
  val _ = not (Long_Name.is_qualified localename) orelse raise Fail ("Given localename looks like qualified Isabelle ID: " ^ localename)
  val _ = localename <> "" orelse raise Fail ("Given localename is illegal")

  (* The locale fixes a variable called "fetch" of type "64 word ⟹ I" *)
  val fixes_fetch = Element.Fixes [( Binding.name "fetch" , SOME (@{typ "64 word => I"}), NoSyn)]

  (* the locale contains a list of assumptions, one for each instruction. They are given the [simp] attribute. *)
  val simp_attrib = Attrib.internal (fn (_) => Simplifier.simp_add)
  fun mk_assume thm_name term =  ((Binding.name thm_name, [simp_attrib]), [term]);
  val mk_fetch = Free ("fetch", @{typ "64 word => I"})
  fun mk_fetch_equality_assume si str =
    let val instr = parse_instr si str
        val (Instr (a,_,_,_,_,_)) = instr
        val asm_name = "fetch_" ^ LargeInt.toString a
        val eq_term = HOLogic.mk_eq (mk_fetch $ mk_word a 64, mk_instr instr lthy)
    in
      mk_assume asm_name (HOLogic.Trueprop $ eq_term, [])
    end

  fun mk_fetch_equality_assumes [] = []
    | mk_fetch_equality_assumes [str] = [mk_fetch_equality_assume 1 str]
    | mk_fetch_equality_assumes (str0::str1::strs) = (mk_fetch_equality_assume (read_instr_addr str1 - read_instr_addr str0) str0) :: mk_fetch_equality_assumes (str1::strs)


  val assembly = String.tokens (fn c => c = #"\n") assembly |>
    List.filter (Substring.full #> remove_comment #> Substring.explode #> List.all Char.isSpace #> not)
  val loc_bindings = Binding.name localename
  val loc_elems = [fixes_fetch,Element.Assumes (mk_fetch_equality_assumes assembly)]
  val thy = Local_Theory.exit_global lthy
  val loc_expr : (string, term) Expression.expr = [(Locale.intern thy "unknowns",(("",false),(Expression.Named [], [])))]
  val (_,lthy) = Expression.add_locale loc_bindings loc_bindings [] (loc_expr,[]) loc_elems thy
  val _ = tracing ("Added locale: " ^ localename ^ " with a fetch function for " ^ Int.toString (length assembly) ^ " instructions. To get started, execute:\n\ncontext " ^ localename ^ "\nbegin\n   find_theorems fetch\nend\n")

  in
   lthy
  end




(*
  Add the command "x86_64_parser" to the Isabelle syntax.
  Its argument is parsed by Parse.text, which simply returns
  the text. The parsed text is given to the "main" function.
*)

val _ =
    Outer_Syntax.local_theory
      command_keywordx86_64_parser
      "Generate a locale from a list of assembly instructions."
      (Parse.text -- Parse.text >> (fn (localename, assembly) => main localename assembly))

Theory Example_WC

(*  Title:       X86 instruction semantics and basic block symbolic execution
    Authors:     Freek Verbeek, Abhijith Bharadwaj, Joshua Bockenek, Ian Roessle, Timmy Weerwag, Binoy Ravindran
    Year:        2020
    Maintainer:  Freek Verbeek (freek@vt.edu)
*)

section "Example: word count program from GNU"

theory Example_WC
  imports SymbolicExecution X86_Parse
begin


text ‹
  The wordcount (wc) program, specifically, the functions getword and counter.
  We compiled the source code found here:\\ 
  
      \url{https://www.gnu.org/software/cflow/manual/html_node/Source-of-wc-command.html}\\

  The source code is also found in the directory \texttt{./examples/wc}.

  The assembly below has been obtained by running in \texttt{./examples/wc}:
  \begin{verbatim}
    gcc wc.c -o wc
    objdump -M intel -d --no-show-raw-insn wc
  \end{verbatim}


  This example:
  \begin{itemize}
  \item contains a lot of memory accesses and demonstrates how a memory model is generated through assumptions;
  \item contains external function calls and demonstrates how to deal with that.
  \end{itemize}


  First, we define definitions named ``EXTERNAL\_FUNCTION\_*'' for each external function. The definitions
  are added to the simplifier.

  We model a C file (of C-type ``FILE'') as a pointer to a part of memory that contains the contents.
  We assume the contents are 0-terminated.

  Function \texttt{feof} takes as input (via \texttt{rdi}) a FILE*. It reads one byte from **rdi, i.e., the next byte of the file.
  It returns true iff the byte equals 0.\\

  Function \texttt{fopen} writes into memory both 1.) the contents of a file (the string "Hello"), and 2.) a pointer to the
  beginning of that file. It returns a pointer to that pointer.\\

  Function \texttt{getc} reads the next byte of the FILE (same is feof) and increments the pointer.\\
 
  Function \texttt{isword} simply returns true, and functions report and fclose simply do nothing.\\
›


context unknowns
begin

definition EXTERNAL_FUNCTION_feof :: "state  state"
  where "EXTERNAL_FUNCTION_feof σ  
          let ptr = ucast (operand_read σ (QWORD PTR [''rdi'']1));
              val = mem_read σ ptr 1 in
            (semantics_RET  o 
             semantics_MOV (Reg ''eax'') (Imm (fromBool (val = 0))))
              σ"

declare EXTERNAL_FUNCTION_feof_def [simp]

definition EXTERNAL_FUNCTION__IO_getc :: "state  state"
  where "EXTERNAL_FUNCTION__IO_getc σ 
          let ptr = ucast (operand_read σ (QWORD PTR [''rdi'']1));
              val = mem_read σ ptr 1 in
            (semantics_RET  o 
              semantics_MOV (Reg ''rax'') (Imm (if val = 0 then -1 else val)) o
              semantics_INC (QWORD PTR [''rdi'']1))
              σ"

declare EXTERNAL_FUNCTION__IO_getc_def [simp]

definition "EXTERNAL_FUNCTION_fopen σ = 
      semantics_RET (σ with [''rax'' :=r 100,
                             100,8 :=m 108,
                             108,6 :=m 0x006E6C6C6548])"

declare EXTERNAL_FUNCTION_fopen_def [simp]

definition EXTERNAL_FUNCTION_isword :: "state  state"
  where "EXTERNAL_FUNCTION_isword = operand_write (Reg ''rax'') 1 o semantics_RET"

declare EXTERNAL_FUNCTION_isword_def [simp]

definition EXTERNAL_FUNCTION_fclose :: "state  state"
  where "EXTERNAL_FUNCTION_fclose = semantics_RET"

declare EXTERNAL_FUNCTION_fclose_def [simp]

definition EXTERNAL_FUNCTION_report :: "state  state"
  where "EXTERNAL_FUNCTION_report = semantics_RET"

declare EXTERNAL_FUNCTION_report_def [simp]

end


text ‹
  Below, one can see that, e.g. 810 denotes an external call (see address 0xc1b).
  For each external call, we replace the actual .got.plt section with a special
  instruction @{term ExternalFunc} followed by a name. These special instructions
  are interpreted as executing the related definition above.
›

context unknowns
begin
  x86_64_parser wc_objdump ‹
    7d0: EXTERNAL_FUNCTION fclose
    810: EXTERNAL_FUNCTION feof
    820: EXTERNAL_FUNCTION _IO_getc
    830: EXTERNAL_FUNCTION fopen
    bd5: EXTERNAL_FUNCTION isword
    b93: EXTERNAL_FUNCTION report

    c01: push   rbp
    c02: mov    rbp,rsp
    c05: sub    rsp,0x20
    c09: mov    QWORD PTR [rbp-0x18],rdi
    c0d: mov    DWORD PTR [rbp-0x4],0x0
    c14: mov    rax,QWORD PTR [rbp-0x18]
    c18: mov    rdi,rax
    c1b: call   810 <feof@plt>
    c20: test   eax,eax
    c22: je     c7d <getword+0x7c>
    c24: mov    eax,0x0
    c29: jmp    cf1 <getword+0xf0>
    c2e: mov    eax,DWORD PTR [rbp-0x8]
    c31: movzx  eax,al
    c34: mov    edi,eax
    c36: call   bd5 <isword>
    c3b: test   eax,eax
    c3d: je     c53 <getword+0x52>
    c3f: mov    rax,QWORD PTR [rip+0x201402]        # 202048 <wcount>
    c46: add    rax,0x1
    c4a: mov    QWORD PTR [rip+0x2013f7],rax        # 202048 <wcount>
    c51: jmp    c92 <getword+0x91>
    c53: mov    rax,QWORD PTR [rip+0x2013f6]        # 202050 <ccount>
    c5a: add    rax,0x1
    c5e: mov    QWORD PTR [rip+0x2013eb],rax        # 202050 <ccount>
    c65: cmp    DWORD PTR [rbp-0x8],0xa
    c69: jne    c7d <getword+0x7c>
    c6b: mov    rax,QWORD PTR [rip+0x2013e6]        # 202058 <lcount>
    c72: add    rax,0x1
    c76: mov    QWORD PTR [rip+0x2013db],rax        # 202058 <lcount>
    c7d: mov    rax,QWORD PTR [rbp-0x18]
    c81: mov    rdi,rax
    c84: call   820 <_IO_getc@plt>
    c89: mov    DWORD PTR [rbp-0x8],eax
    c8c: cmp    DWORD PTR [rbp-0x8],0xffffffff
    c90: jne    c2e <getword+0x2d>
    c92: jmp    cde <getword+0xdd>
    c94: mov    rax,QWORD PTR [rip+0x2013b5]        # 202050 <ccount>
    c9b: add    rax,0x1
    c9f: mov    QWORD PTR [rip+0x2013aa],rax        # 202050 <ccount>
    ca6: cmp    DWORD PTR [rbp-0x8],0xa
    caa: jne    cbe <getword+0xbd>
    cac: mov    rax,QWORD PTR [rip+0x2013a5]        # 202058 <lcount>
    cb3: add    rax,0x1
    cb7: mov    QWORD PTR [rip+0x20139a],rax        # 202058 <lcount>
    cbe: mov    eax,DWORD PTR [rbp-0x8]
    cc1: movzx  eax,al
    cc4: mov    edi,eax
    cc6: call   bd5 <isword>
    ccb: test   eax,eax
    ccd: je     ce6 <getword+0xe5>
    ccf: mov    rax,QWORD PTR [rbp-0x18]
    cd3: mov    rdi,rax
    cd6: call   820 <_IO_getc@plt>
    cdb: mov    DWORD PTR [rbp-0x8],eax
    cde: cmp    DWORD PTR [rbp-0x8],0xffffffff
    ce2: jne    c94 <getword+0x93>
    ce4: jmp    ce7 <getword+0xe6>
    ce6: nop
    ce7: cmp    DWORD PTR [rbp-0x8],0xffffffff
    ceb: setne  al
    cee: movzx  eax,al
    cf1: leave  
    cf2: ret


    cf3: push   rbp
    cf4: mov    rbp,rsp
    cf7: sub    rsp,0x20
    cfb: mov    QWORD PTR [rbp-0x18],rdi
    cff: mov    rax,QWORD PTR [rbp-0x18]
    d03: lea    rsi,[rip+0x1ff]        # f09 <_IO_stdin_used+0x19>
    d0a: mov    rdi,rax
    d0d: call   830 <fopen@plt>
    d12: mov    QWORD PTR [rbp-0x8],rax
    d16: cmp    QWORD PTR [rbp-0x8],0x0
    d1b: jne    d35 <counter+0x42>
    d1d: mov    rax,QWORD PTR [rbp-0x18]
    d21: mov    rsi,rax
    d24: lea    rdi,[rip+0x1e0]        # f0b <_IO_stdin_used+0x1b>
    d2b: mov    eax,0x0
    d30: call   ac6 <perrf>
    d35: mov    QWORD PTR [rip+0x201318],0x0        # 202058 <lcount>
    d40: mov    rax,QWORD PTR [rip+0x201311]        # 202058 <lcount>
    d47: mov    QWORD PTR [rip+0x2012fa],rax        # 202048 <wcount>
    d4e: mov    rax,QWORD PTR [rip+0x2012f3]        # 202048 <wcount>
    d55: mov    QWORD PTR [rip+0x2012f4],rax        # 202050 <ccount>
    d5c: nop
    d5d: mov    rax,QWORD PTR [rbp-0x8]
    d61: mov    rdi,rax
    d64: call   c01 <getword>
    d69: test   eax,eax
    d6b: jne    d5d <counter+0x6a>
    d6d: mov    rax,QWORD PTR [rbp-0x8]
    d71: mov    rdi,rax
    d74: call   7d0 <fclose@plt>
    d79: mov    rcx,QWORD PTR [rip+0x2012d8]        # 202058 <lcount>
    d80: mov    rdx,QWORD PTR [rip+0x2012c1]        # 202048 <wcount>
    d87: mov    rsi,QWORD PTR [rip+0x2012c2]        # 202050 <ccount>
    d8e: mov    rax,QWORD PTR [rbp-0x18]
    d92: mov    rdi,rax
    d95: call   b93 <report>
    d9a: mov    rdx,QWORD PTR [rip+0x20128f]        # 202030 <total_ccount>
    da1: mov    rax,QWORD PTR [rip+0x2012a8]        # 202050 <ccount>
    da8: add    rax,rdx
    dab: mov    QWORD PTR [rip+0x20127e],rax        # 202030 <total_ccount>
    db2: mov    rdx,QWORD PTR [rip+0x20127f]        # 202038 <total_wcount>
    db9: mov    rax,QWORD PTR [rip+0x201288]        # 202048 <wcount>
    dc0: add    rax,rdx
    dc3: mov    QWORD PTR [rip+0x20126e],rax        # 202038 <total_wcount>
    dca: mov    rdx,QWORD PTR [rip+0x20126f]        # 202040 <total_lcount>
    dd1: mov    rax,QWORD PTR [rip+0x201280]        # 202058 <lcount>
    dd8: add    rax,rdx
    ddb: mov    QWORD PTR [rip+0x20125e],rax        # 202040 <total_lcount>
    de2: nop
    de3: leave  
    de4: ret
  ›
end

context wc_objdump
begin
find_theorems fetch


text ‹Note: this theorems takes roughly 15 minutes to prove.›
schematic_goal counter:
  assumes "σI = σ with [setRip 0xcf3]" 
  shows "assumptions ?A  run 0xde4 fetch σI ?σ'"
  apply (subst assms)
  (* you can simply run 

  apply (se_step)+

  However, we have broken it down so that some intermediate states are visible.
  Specifically, one can see the result of running getc, which fetches the next character.
  *)

  apply (repeat_n 8 se_step)
  ― ‹rip = 0x830 (2096), calling fopen›
  apply se_step

  apply (repeat_n 12 se_step)
  ― ‹rip = 0xc01 (3073), calling getword›
  apply se_step


  (* GETWORD BEGIN *)
  apply (repeat_n 13 se_step)
  ― ‹rip = 0xc84 (2080), calling getc›
  apply se_step

  apply (repeat_n 32 se_step)
  ― ‹rip = 0xc84 (2080), calling getc›
  apply se_step

  apply (repeat_n 18 se_step)
  ― ‹rip = 0xc84 (2080), calling getc›
  apply se_step

  apply (repeat_n 18 se_step)
  ― ‹rip = 0xc84 (2080), calling getc›
  apply se_step

  apply (repeat_n 18 se_step)
  ― ‹rip = 0xc84 (2080), calling getc›
  apply se_step

  apply (repeat_n 18 se_step)
  ― ‹rip = 0xc84 (2080), calling getc›
  apply se_step

  apply (repeat_n 9 se_step)
  (* GETWORD END*)

  apply (repeat_n 5 se_step)
  ― ‹rip = 0x7d0 (2000), calling fclose›
  apply se_step

  apply (repeat_n 6 se_step)
  ― ‹rip = 0xb93 (2963), calling report›
  apply se_step

  apply (repeat_n 15 se_step)
  apply (subst eq_def,simp)
  done

thm counter


text ‹
The file opened by "fopen" contains the zero-terminated string "Hello": 0x006E6C6C6548.
After each call of getc, register RAX contains the read characters' ASCII code.

After termination, we can see the contents of the following global variables, set by function getword:
\begin{verbatim}
  Word count:      wcount = 1      (0x202048 = 2105416) 
  Character count: ccount = 5      (0x202050 = 2105424) 
  Line count:      lcount = lcount (0x202058 = 2105432) 
\end{verbatim}

  The totals accumulate to:
\begin{verbatim}
  Word count:      total_wcount = total_wcount + 5 (0x202038 = 2105400) 
  Character count: total_ccount = total_ccount + 5 (0x202030 = 2105392) 
  Line count:      total_lcount = total_lcount     (0x202040 = 2105408) 
\end{verbatim}
›



end
end