# Theory Stochastic_Matrix

section ‹Stochastic Matrices›

text ‹We define a type for stochastic vectors and right-stochastic matrices,
i.e., non-negative real vectors and matrices where the sum of each column is 1.
For this type we define a matrix-vector multplication, i.e., we show that
$A * v$ is a stochastic vector, if $A$ is a right-stochastic matrix and $v$
a stochastic vector.
›
theory Stochastic_Matrix
imports Perron_Frobenius.Perron_Frobenius_Aux (* for non_neg_mat *)
begin

definition non_neg_vec :: "'a :: linordered_idom ^ 'n ⇒ bool" where
"non_neg_vec A ≡ (∀ i. A $i ≥ 0)" definition stoch_vec :: "'a :: comm_ring_1 ^ 'n ⇒ bool" where "stoch_vec v = (sum (λ i. v$ i) UNIV = 1)"

definition right_stoch_mat :: "'a :: comm_ring_1 ^ 'n ^ 'm ⇒ bool" where
"right_stoch_mat a = (∀ j. stoch_vec (column j a))"

typedef 'i st_mat = "{ a :: real ^ 'i ^ 'i. non_neg_mat a ∧ right_stoch_mat a}"
morphisms st_mat Abs_st_mat
by (rule exI[of _ "χ i j. if i = undefined then 1 else 0"],
auto simp: non_neg_mat_def elements_mat_h_def right_stoch_mat_def stoch_vec_def column_def)

setup_lifting type_definition_st_mat

typedef 'i st_vec = "{ v :: real ^ 'i. non_neg_vec v ∧ stoch_vec v}"
morphisms st_vec Abs_st_vec
by (rule exI[of _ "χ i. if i = undefined then 1 else 0"],
auto simp: non_neg_vec_def stoch_vec_def)

setup_lifting type_definition_st_vec

lift_definition transition_vec_of_st_mat :: "'i :: finite st_mat ⇒ 'i ⇒ 'i st_vec"
is "λ a i. column i a"
by (auto simp: right_stoch_mat_def non_neg_mat_def stoch_vec_def
elements_mat_h_def non_neg_vec_def column_def)

lemma non_neg_vec_st_vec: "non_neg_vec (st_vec v)"
by (transfer, auto)

lemma non_neg_mat_mult_non_neg_vec: "non_neg_mat a ⟹ non_neg_vec v ⟹
non_neg_vec (a *v v)"
unfolding non_neg_mat_def non_neg_vec_def  elements_mat_h_def
by (auto simp: matrix_vector_mult_def intro!: sum_nonneg)

lemma right_stoch_mat_mult_stoch_vec: assumes "right_stoch_mat a" and "stoch_vec v"
shows "stoch_vec (a *v v)"
proof -
note * = assms[unfolded right_stoch_mat_def column_def stoch_vec_def, simplified]
have "stoch_vec (a *v v) = ((∑i∈UNIV. ∑j∈UNIV. a $i$ j * v $j) = 1)" (is "_ = (?sum = 1)") unfolding stoch_vec_def matrix_vector_mult_def by auto also have "?sum = (∑j∈UNIV. ∑i∈UNIV. a$ i $j * v$ j)"
by (rule sum.swap)
also have "… = (∑j∈UNIV. v $j)" by (rule sum.cong[OF refl], insert *, auto simp: sum_distrib_right[symmetric]) also have "… = 1" using * by auto finally show ?thesis by simp qed lift_definition st_mat_times_st_vec :: "'i :: finite st_mat ⇒ 'i st_vec ⇒ 'i st_vec" (infixl "*st" 70) is "(*v)" using right_stoch_mat_mult_stoch_vec non_neg_mat_mult_non_neg_vec by auto lift_definition to_st_vec :: "real ^ 'i ⇒ 'i st_vec" is "λ x. if stoch_vec x ∧ non_neg_vec x then x else (χ i. if i = undefined then 1 else 0)" by (auto simp: non_neg_vec_def stoch_vec_def) lemma right_stoch_mat_st_mat: "right_stoch_mat (st_mat A)" by transfer auto lemma non_neg_mat_st_mat: "non_neg_mat (st_mat A)" by (transfer, auto simp: non_neg_mat_def elements_mat_h_def) lemma st_mat_mult_st_vec: "st_mat A *v st_vec X = st_vec (A *st X)" by (transfer, auto) lemma st_vec_nonneg[simp]: "st_vec x$ i ≥ 0"
using non_neg_vec_st_vec[of x] by (auto simp: non_neg_vec_def)

lemma st_mat_nonneg[simp]: "st_mat x $i$h j ≥ 0"
using non_neg_mat_st_mat[of x] by (auto simp: non_neg_mat_def elements_mat_h_def)

end

# Theory Stochastic_Vector_PMF

section ‹Stochastic Vectors and Probability Mass Functions›

text ‹We prove that over a finite type, stochastic vectors and probability
mass functions are essentially the same thing: one can convert between both
representations.›

theory Stochastic_Vector_PMF
imports Stochastic_Matrix
"HOL-Probability.Probability_Mass_Function"
begin

lemma sigma_algebra_UNIV_finite[simp]: "sigma_algebra (UNIV :: 'a :: finite set) UNIV"
proof (unfold_locales, goal_cases)
case (4 a b)
show ?case by (intro exI[of _ "{a-b}"], auto)
qed auto

definition measure_of_st_vec' :: "'a st_vec ⇒ 'a :: finite set ⇒ ennreal" where
"measure_of_st_vec' x I = sum (λ i. st_vec x $i) I" lemma positive_measure_of_st_vec'[simp]: "positive A (measure_of_st_vec' x)" unfolding measure_of_st_vec'_def positive_def by auto lemma measure_space_measure_of_st_vec': "measure_space UNIV UNIV (measure_of_st_vec' x)" unfolding measure_space_def proof (simp, simp add: countably_additive_def measure_of_st_vec'_def disjoint_family_on_def, clarify, goal_cases) case (1 A) let ?x = "st_vec x" define N where "N = {i. A i ≠ {}}" let ?A = "⋃(A  N)" have "finite B ⟹ B ⊆ ?A ⟹ ∃ K. finite K ∧ K ⊆ N ∧ B ⊆ ⋃(A  K)" for B proof (induct rule: finite_induct) case (insert b B) from insert(3-4) obtain K where K: "finite K" "K ⊆ N" "B ⊆ ⋃(A  K)" by auto from insert(4) obtain a where a: "a ∈ N" "b ∈ A a" by auto show ?case by (intro exI[of _ "insert a K"], insert a K, auto) qed auto from this[OF _ subset_refl] obtain K where *: "finite K" "K ⊆ N" "⋃(A  K) = ?A" by auto { assume "K ⊂ N" then obtain n where **: "n ∈ N" "n ∉ K" by auto from this[unfolded N_def] obtain a where a: "a ∈ A n" by auto with ** * obtain k where ***: "k ∈ K" "a ∈ A k" by force from ** *** have "n ≠ k" by auto from 1[rule_format, OF this] have "A n ∩ A k = {}" by auto with *** a have False by auto } with * have fin: "finite N" by auto have id: "⋃(A  UNIV) = ?A" unfolding N_def by auto show "(∑i. ennreal (sum (($h) ?x) (A i))) =
ennreal (sum (($h) ?x) (⋃(A  UNIV)))" unfolding id apply (subst suminf_finite[OF fin], (auto simp: N_def)) apply (subst sum_ennreal, (insert non_neg_vec_st_vec[of x], auto simp: non_neg_vec_def intro!: sum_nonneg)) apply (rule arg_cong[of _ _ ennreal]) apply (subst sum.UNION_disjoint[OF fin], insert 1, auto) done qed context begin setup_lifting type_definition_measure lift_definition measure_of_st_vec :: "'a st_vec ⇒ 'a :: finite measure" is "λ x. (UNIV, UNIV, measure_of_st_vec' x)" by (auto simp: measure_space_measure_of_st_vec') lemma sets_measure_of_st_vec[simp]: "sets (measure_of_st_vec x) = UNIV" unfolding sets_def by (transfer, auto) lemma space_measure_of_st_vec[simp]: "space (measure_of_st_vec x) = UNIV" unfolding space_def by (transfer, auto) lemma emeasure_measure_of_st_vec[simp]: "emeasure (measure_of_st_vec x) I = sum (λ i. st_vec x$ i) I"
unfolding emeasure_def by (transfer', auto simp: measure_of_st_vec'_def)

lemma prob_space_measure_of_st_vec: "prob_space (measure_of_st_vec x)"
by (unfold_locales, intro exI[of _ UNIV], auto, transfer, auto simp: stoch_vec_def)
end

lift_definition st_vec_of_pmf :: "'i :: finite pmf ⇒ 'i st_vec" is
"λ pmF. vec_lambda (pmf pmF)"
proof (intro conjI, goal_cases)
case (2 pmF)
show "stoch_vec (vec_lambda (pmf pmF))"
unfolding stoch_vec_def
apply auto
apply (unfold measure_pmf_UNIV[of pmF, symmetric])
by (simp add: measure_pmf_conv_infsetsum)
qed (auto simp: non_neg_vec_def stoch_vec_def)

context pmf_as_measure
begin
lift_definition pmf_of_st_vec :: "'a :: finite st_vec ⇒ 'a pmf" is measure_of_st_vec
proof (goal_cases)
case (1 x)
show ?case
by (auto simp: prob_space_measure_of_st_vec measure_def)
(rule AE_I[where N = "{i. st_vec x $i = 0}"], auto) qed lemma st_vec_st_vec_of_pmf[simp]: "st_vec (st_vec_of_pmf x)$ i = pmf x i"
by (simp add: st_vec_of_pmf.rep_eq)

lemma pmf_pmf_of_st_vec[simp]: "pmf (pmf_of_st_vec x) i = st_vec x $i" by (transfer, auto simp: measure_def) lemma st_vec_of_pmf_pmf_of_st_vec[simp]: "st_vec_of_pmf (pmf_of_st_vec x) = x" proof - have "st_vec (st_vec_of_pmf (pmf_of_st_vec x)) = st_vec x" unfolding vec_eq_iff by auto thus ?thesis using st_vec_inject by blast qed lemma pmf_of_st_vec_inj: "(pmf_of_st_vec x = pmf_of_st_vec y) = (x = y)" by (metis st_vec_of_pmf_pmf_of_st_vec) end end # Theory Stochastic_Matrix_Markov_Models section ‹Stochastic Matrices and Markov Models› text ‹We interpret stochastic matrices as Markov chain with discrete time and finite state and prove that the bind-operation on probability mass functions is precisely matrix-vector multiplication. As a consequence, the notion of stationary distribution is equivalent to being an eigenvector with eigenvalue 1.› theory Stochastic_Matrix_Markov_Models imports Markov_Models.Classifying_Markov_Chain_States Stochastic_Vector_PMF begin definition transition_of_st_mat :: "'i st_mat ⇒ 'i :: finite ⇒ 'i pmf" where "transition_of_st_mat a i = pmf_as_measure.pmf_of_st_vec (transition_vec_of_st_mat a i)" lemma st_vec_transition_vec_of_st_mat[simp]: "st_vec (transition_vec_of_st_mat A a)$ i = st_mat A $i$ a"
by (transfer, auto simp: column_def)

locale transition_matrix = pmf_as_measure +
fixes A :: "'i :: finite st_mat"
begin
sublocale MC_syntax "transition_of_st_mat A" .

lemma measure_pmf_of_st_vec[simp]: "measure_pmf (pmf_of_st_vec x) = measure_of_st_vec x"
by (rule pmf_as_measure.pmf_of_st_vec.rep_eq)

lemma pmf_transition_of_st_mat[simp]: "pmf (transition_of_st_mat A a) i = st_mat A $i$ a"
unfolding transition_of_st_mat_def
by (transfer, auto simp: measure_def)

lemma bind_is_matrix_vector_mult: "(bind_pmf x (transition_of_st_mat A)) =
pmf_as_measure.pmf_of_st_vec (A *st st_vec_of_pmf x)"
proof (rule pmf_eqI, goal_cases)
case (1 i)
define X where "X = st_vec_of_pmf x"
have "pmf (bind_pmf x (transition_of_st_mat A)) i =
(∑a∈UNIV. pmf x a *⇩R pmf (transition_of_st_mat A a) i)"
unfolding pmf_bind by (subst integral_measure_pmf[of UNIV], auto)
also have "… = (∑a∈UNIV. st_mat A $i$ a * st_vec X $a)" by (rule sum.cong[OF refl], auto simp: X_def) also have "… = (st_mat A *v st_vec X)$ i"
unfolding matrix_vector_mult_def by auto
from arg_cong[OF id, of "λ x. x $i"] carr i have "?c (v$ i) = a * ?c (w $i)" by auto with i(2) have "a ∈ Reals" by (metis Reals_cnj_iff complex_cnj_complex_of_real complex_cnj_mult mult_cancel_right mult_eq_0_iff of_real_hom.hom_zero of_real_hom.injectivity) then obtain b where a: "a = ?c b" by (rule Reals_cases) from id[unfolded a] have "map_vec ?c v = map_vec ?c (b ⋅⇩v w)" by auto hence "v = b ⋅⇩v w" by (rule of_real_hom.vec_hom_inj) thus ?thesis by auto qed text ‹Finally, the statement converted to HMA-world.› lemma unique_eigen_vector_real: assumes ord: "order lam (charpoly A :: real poly) = 1" and ev: "eigen_vector A v lam" "eigen_vector A w lam" shows "∃ a. v = a *s w" using assms proof (transfer, goal_cases) case (1 lam A v w) show ?case by (rule unique_eigenvector_real[OF 1(1-2,4,6)]) qed end  # Theory Stochastic_Matrix_Perron_Frobenius section ‹Stochastic Matrices and the Perron--Frobenius Theorem› text ‹Since a stationary distribution corresponds to a non-negative real eigenvector of the stochastic matrix, we can apply the Perron--Frobenius theorem. In this way we easily derive that every stochastic matrix has a stationary distribution, and moreover that this distribution is unique, if the matrix is irreducible, i.e., if the graph of the matrix is strongly connected.› theory Stochastic_Matrix_Perron_Frobenius imports Perron_Frobenius.Perron_Frobenius_Irreducible Stochastic_Matrix_Markov_Models Eigenspace begin hide_const (open) Coset.order lemma pf_nonneg_mat_st_mat: "pf_nonneg_mat (st_mat A)" by (unfold_locales, auto simp: non_neg_mat_st_mat) lemma stoch_non_neg_vec_norm1: assumes "stoch_vec (v :: real ^ 'n)" "non_neg_vec v" shows "norm1 v = 1" unfolding assms(1)[unfolded stoch_vec_def, symmetric] norm1_def by (rule sum.cong, insert assms(2)[unfolded non_neg_vec_def], auto) lemma stationary_distribution_exists: "∃ v. A *st v = v" proof - let ?A = "st_mat A" let ?c = "complex_of_real" let ?B = "χ i j. ?c (?A$ i $j)" have "real_non_neg_mat ?B" using non_neg_mat_st_mat[of A] unfolding real_non_neg_mat_def elements_mat_h_def non_neg_mat_def by auto from Perron_Frobenius.perron_frobenius_both[OF this] obtain v a where ev: "eigen_vector ?B v (?c a)" and nn: "real_non_neg_vec v" and a: "a = HMA_Connect.spectral_radius ?B" by auto from spectral_radius_ev[of ?B, folded a] have a0: "a ≥ 0" by auto define w where "w = (χ i. Re (v$ i))"
from nn have vw: "v = (χ i. ?c (w $i))" unfolding real_non_neg_vec_def w_def by (auto simp: vec_elements_h_def) from ev[unfolded eigen_vector_def] have v0: "v ≠ 0" and ev: "?B *v v = ?c a *s v" by auto from v0 have w0: "w ≠ 0" unfolding vw by (auto simp: Finite_Cartesian_Product.vec_eq_iff) { fix i from ev have "Re ((?B *v v)$ i) = Re ((?c a *s v) $i)" by simp also have "Re ((?c a *s v)$ i) = (a *s w) $i" unfolding vw by simp also have "Re ((?B *v v)$ i) = (?A *v w) $i" unfolding vw by (simp add: matrix_vector_mult_def) also note calculation } hence ev: "?A *v w = a *s w" by (auto simp: Finite_Cartesian_Product.vec_eq_iff) from nn have nn: "non_neg_vec w" unfolding vw by (auto simp: real_non_neg_vec_def non_neg_vec_def vec_elements_h_def) (* we now mainly have to prove that a = 1 *) let ?n = "norm1 w" from w0 have n0: "?n ≠ 0" by auto hence n_pos: "?n > 0" using norm1_ge_0[of w] by linarith define u where "u = inverse ?n *s w" have nn: "non_neg_vec u" using nn n_pos unfolding u_def non_neg_vec_def by auto have nu: "norm1 u = 1" unfolding u_def scalar_mult_eq_scaleR norm1_scaleR using n_pos by (auto simp: field_simps) have 1: "stoch_vec u" unfolding stoch_vec_def nu[symmetric] norm1_def by (rule sum.cong, insert nn[unfolded non_neg_vec_def], auto) from arg_cong[OF ev, of "λ x. inverse ?n *s x"] have ev: "?A *v u = a *s u" unfolding u_def by (auto simp: ac_simps vector_smult_distrib matrix_vect_scaleR) from right_stoch_mat_mult_stoch_vec[OF right_stoch_mat_st_mat[of A] 1, unfolded ev] have st: "stoch_vec (a *s u)" . from non_neg_mat_mult_non_neg_vec[OF non_neg_mat_st_mat[of A] nn, unfolded ev] have nn': "non_neg_vec (a *s u)" . from stoch_non_neg_vec_norm1[OF st nn', unfolded scalar_mult_eq_scaleR norm1_scaleR nu] a0 have "a = 1" by auto with ev st have ev: "?A *v u = u" and st: "stoch_vec u" by auto show ?thesis using ev st nn by (intro exI[of _ "to_st_vec u"], transfer, auto) qed lemma stationary_distribution_unique: assumes "fixed_mat.irreducible (st_mat A)" shows "∃! v. A *st v = v" proof - from stationary_distribution_exists obtain v where ev: "A *st v = v" by auto show ?thesis proof (intro ex1I, rule ev) fix w assume "A *st w = w" thus "w = v" using ev assms proof (transfer, goal_cases) case (1 A w v) interpret perron_frobenius A by (unfold_locales, insert 1, auto) from 1 have *: "eigen_vector A v 1" "le_vec 0 v" "eigen_vector A w 1" by (auto simp: eigen_vector_def stoch_vec_def non_neg_vec_def) from nonnegative_eigenvector_has_ev_sr[OF *(1-2)] have sr1: "sr = 1" by auto from multiplicity_sr_1[unfolded sr1] have "order 1 (charpoly A) = 1" . from unique_eigen_vector_real[OF this *(1,3)] obtain a where vw: "v = a *s w" by auto from 1(2,4)[unfolded stoch_vec_def] have "sum (($h) v) UNIV = sum (($h) w) UNIV" by auto also have "sum (($h) v) UNIV = a * sum ((\$h) w) UNIV" unfolding vw
by (auto simp: sum_distrib_left)
finally have "a = 1" using 1(2)[unfolded stoch_vec_def] by auto
with vw show "v = w" by auto
qed
qed
qed

text ‹Let us now convert the stationary distribution results from matrices to Markov chains.›

context transition_matrix
begin

lemma stationary_distribution_exists:
"∃ x. stationary_distribution (pmf_of_st_vec x)"
proof -
from stationary_distribution_exists obtain x where ev: "A *st x = x" by auto
show ?thesis
by (intro exI[of _ x], unfold stationary_distribution_pmf_of_st_vec,
qed

lemma stationary_distribution_unique: assumes "fixed_mat.irreducible (st_mat A)"
shows "∃! N. stationary_distribution N"
proof -
from stationary_distribution_exists obtain x where
st: "stationary_distribution (pmf_of_st_vec x)" by blast
show ?thesis
proof (rule ex1I, rule st)
fix N
assume st': "stationary_distribution N"
from stationary_distribution_implies_pmf_of_st_vec[OF this] obtain y where
N: "N = pmf_of_st_vec y" by auto
from st'[unfolded N] st
have "A *st x = x" "A *st y = y" unfolding stationary_distribution_pmf_of_st_vec by auto
from stationary_distribution_unique[OF assms] this have "x = y" by auto
with N show "N = pmf_of_st_vec x" by auto
qed
qed
end
end