Markov Model

Here we demonstrate a Markov model. We start by showing how to create some data and estimate such a model via the markovchain package. You may want to play with it to get a better feel for how it works, as we will use it for comparison later.

                                          library(tidyverse)                                            library(markovchain)                            A                =                matrix(c(.7, .3, .9, .1),                nrow =                2,                byrow =                TRUE)                            dtmcA                =                new(                              'markovchain',                              transitionMatrix =                A,                              states =                c('a',                'b'),                              name =                'MarkovChain A'                            )                            dtmcA                      
          MarkovChain A   A  2 - dimensional discrete Markov Chain defined by the following states:   a, b   The transition matrix  (by rows)  is defined as follows:      a   b a 0.7 0.3 b 0.9 0.1        

                                          transitionProbability(dtmcA,                'b',                'b')                      
          [1] 0.1        
                          initialState                =                c(0,                1)              steps                =                4                            finalState                =                initialState                *                dtmcA^steps                # using power operator                            finalState                      
                      a      b [1,] 0.7488 0.2512        
                      a    b [1,] 0.75 0.25        
                          observed_states                =                sample(c('a',                'b'),                50,                c(.7, .3),                replace =                TRUE)                                            createSequenceMatrix(observed_states)                      
                      a  b a 24 11 b 11  3        
                                          markovchainFit(observed_states)                      
          $estimate MLE Fit   A  2 - dimensional discrete Markov Chain defined by the following states:   a, b   The transition matrix  (by rows)  is defined as follows:            a         b a 0.6857143 0.3142857 b 0.7857143 0.2142857   $standardError           a          b a 0.1399708 0.09476071 b 0.2369018 0.12371791  $confidenceLevel [1] 0.95  $lowerEndpointMatrix           a         b a 0.4113764 0.1285581 b 0.3213953 0.0000000  $upperEndpointMatrix           a         b a 0.9600522 0.5000133 b 1.0000000 0.4567684  $logLikelihood [1] -29.06116        

Data Setup

Data Functions

A recursive function to take a matrix power.

                                  mat_power                    <-                    function(M, N) {                                      if                    (N                    ==                    1)                    return(M)                                                                          M                    %*%                    mat_power(M, N                    -                    1)                  }                              

A function to create a sequence.

                                  create_sequence                    <-                    function(states, len, tmat) {                                      # states: number of states                                                        # len: length of sequence                                                        # tmat: the transition matrix                                                        states_numeric                    =                    length(unique(states))                                      out                    =                    numeric(len)                                      out[1]                    =                    sample(states_numeric,                    1,                    prob =                    colMeans(tmat))                    # initial state                                                                                            for                    (i                    in                    2                    :len){                                      out[i]                    =                    sample(states_numeric,                    1,                    prob =                    tmat[out[i                    -                    1], ])                                      }                                                                          states[out]                  }                              
                                                      # example                                    test_matrix                    =                    matrix(rep(2,                    4),                    nrow =                    2)                  test_matrix                              
                              [,1] [,2] [1,]    2    2 [2,]    2    2            
                                                      mat_power(test_matrix,                    2)                              
                              [,1] [,2] [1,]    8    8 [2,]    8    8            
                                                      # transition matrix                                    A                    =                    matrix(c(.7, .3, .4, .6),                    nrow =                    2,                    byrow =                    TRUE)                                                        mat_power(A,                    10)                              
                              [,1]      [,2] [1,] 0.5714311 0.4285689 [2,] 0.5714252 0.4285748            

Two states Demo

Note that a notably long sequence is needed to get close to recovering the true transition matrix.

                                  A                    =                    matrix(c(.7, .3, .9, .1),                    nrow =                    2,                    byrow =                    TRUE)                  observed_states                    =                    create_sequence(c('a',                    'b'),                    500,                    tmat =                    A)                                                        createSequenceMatrix(observed_states)                              
                              a   b a 288 100 b 101  10            
                                                      prop.table(createSequenceMatrix(observed_states),                    1)                              
                              a          b a 0.7422680 0.25773196 b 0.9099099 0.09009009            
                                  fit                    =                    markovchainFit(observed_states)                  fit                              
              $estimate MLE Fit   A  2 - dimensional discrete Markov Chain defined by the following states:   a, b   The transition matrix  (by rows)  is defined as follows:            a          b a 0.7422680 0.25773196 b 0.9099099 0.09009009   $standardError            a          b a 0.04373856 0.02577320 b 0.09053942 0.02848899  $confidenceLevel [1] 0.95  $lowerEndpointMatrix           a          b a 0.6565420 0.20721741 b 0.7324559 0.03425269  $upperEndpointMatrix           a         b a 0.8279941 0.3082465 b 1.0000000 0.1459275  $logLikelihood [1] -255.0253            
                                                      # log likelihood                                                        sum(createSequenceMatrix(observed_states)                    *                    log(fit$estimate@transitionMatrix))                              
              [1] -255.0253            

Three states demo

                                  A                    =                    matrix(                                      c(.70, .20, .10,                                      .20, .40, .40,                                      .05, .05, .90),                                                        nrow  =                    3,                                                        byrow =                    TRUE                                    )                                    observed_states                    =                    create_sequence(c('a',                    'b',                    'c'),                    500,                    tmat =                    A)                                      createSequenceMatrix(observed_states)                              
                              a  b   c a 92 20  11 b 11 24  22 c 20 13 286            
                                                      prop.table(createSequenceMatrix(observed_states),                    1)                              
                              a          b          c a 0.74796748 0.16260163 0.08943089 b 0.19298246 0.42105263 0.38596491 c 0.06269592 0.04075235 0.89655172            
                                                      markovchainFit(observed_states)                              
              $estimate MLE Fit   A  3 - dimensional discrete Markov Chain defined by the following states:   a, b, c   The transition matrix  (by rows)  is defined as follows:             a          b          c a 0.74796748 0.16260163 0.08943089 b 0.19298246 0.42105263 0.38596491 c 0.06269592 0.04075235 0.89655172   $standardError            a          b          c a 0.07798100 0.03635883 0.02696443 b 0.05818640 0.08594701 0.08228800 c 0.01401923 0.01130267 0.05301421  $confidenceLevel [1] 0.95  $lowerEndpointMatrix            a          b          c a 0.59512750 0.09133962 0.03658157 b 0.07893918 0.25259956 0.22468337 c 0.03521872 0.01859952 0.79264575  $upperEndpointMatrix            a          b         c a 0.90080746 0.23386364 0.1422802 b 0.30702573 0.58950571 0.5472465 c 0.09017313 0.06290518 1.0000000  $logLikelihood [1] -277.6268            

Function

Now we create a function to calculate the (negative) log likelihood.

                              markov_ll                  <-                  function(par, x) {                                  # par should be the c(A) of transition probabilities A                                                  nstates                  =                  length(unique(x))                                                                  # create transition matrix                                                  par                  =                  matrix(par,                  ncol =                  nstates)                                  par                  =                  t(apply(par,                  1,                  function(x) x                  /                  sum(x)))                                                                  # create seq matrix                                                  seq_mat                  =                  table(x[-                  length(x)], x[-                  1])                                                                  # calculate log likelihood                                                  ll                  =                  sum(seq_mat                  *                  log(par))                                                                  -ll                }                          
                              A                  =                  matrix(                                  c(.70, .20, .10,                                  .40, .20, .40,                                  .10, .15, .75),                                  nrow  =                  3,                                  byrow =                  TRUE                                )                                observed_states                  =                  create_sequence(c('a',                  'b',                  'c'),                  1000,                  tmat =                  A)                          

Estimation

Note that initial state values will be transformed to rowsum to one, so the specific initial values don't matter (i.e. they don't have to be probabilities). With the basic optim approach, sometimes log(0) will occur and produce a warning. Can be ignored, or use LFBGS as demonstrated at the end.

                              initpar                  =                  rep(1,                  9)                                fit                  =                  optim(                                  par =                  initpar,                                  fn  =                  markov_ll,                                  x   =                  observed_states,                                  method  =                  'BFGS',                                  control =                  list(reltol =                  1e-12)                )                                                  # get estimates on prob scale                                est_mat                  =                  matrix(fit$par,                  ncol =                  3)                est_mat                  =                  t(apply(est_mat,                  1,                  function(x)  x                  /                  sum(x)))                          

Comparison

Compare with markovchain package.

                              fit_compare                  =                  markovchainFit(observed_states)                                                  # compare log likelihood                                                  c(-fit$value, fit_compare$logLikelihood)                          
            [1] -815.6466 -815.6466          
                                                # compare estimated transition matrix                                                  list(                                  `                  Estimated via optim                  `                  =                  est_mat,                                  `                  markovchain Package                  `                  =                  fit_compare$estimate@transitionMatrix,                                  `                  Analytical Solution                  `                  =                  prop.table(                                  table(observed_states[-                  length(observed_states)], observed_states[-                  1])                                  ,                  1)                )                  %>%                                                  purrr::                  map(round,                  3)                          
            $`Estimated via optim`       [,1]  [,2]  [,3] [1,] 0.674 0.242 0.084 [2,] 0.462 0.126 0.412 [3,] 0.106 0.151 0.743  $`markovchain Package`       a     b     c a 0.674 0.242 0.084 b 0.462 0.126 0.412 c 0.106 0.151 0.743  $`Analytical Solution`             a     b     c   a 0.674 0.242 0.084   b 0.462 0.126 0.412   c 0.106 0.151 0.743          

Visualize.

                                                plot(                                  new(                                  'markovchain',                                  transitionMatrix =                  est_mat,                                  states =                  c('a',                  'b',                  'c'),                                  name =                  'Estimated Markov Chain'                                                  )                )                          

If you don't want warnings due to zeros use constraints (?constrOptim).

                              fit                  =                  optim(                                  par =                  initpar,                                  fn  =                  markov_ll,                                  x   =                  observed_states,                                  method  =                  'L-BFGS',                                  lower   =                  rep(1e-20,                  length(initpar)),                                  control =                  list(pgtol =                  1e-12)                )