inclass_5

devtools::install_github("LukeCe/spflow",force = TRUE) 
Using github PAT from envvar GITHUB_PAT
Downloading GitHub repo LukeCe/spflow@HEAD

── R CMD build ─────────────────────────────────────────────────────────────────
* checking for file 'C:\Users\idrin\AppData\Local\Temp\Rtmp48lcIE\remotes573c72cf387f\LukeCe-spflow-df91367/DESCRIPTION' ... OK
* preparing 'spflow':
* checking DESCRIPTION meta-information ... OK
* installing the package to process help pages
Loading required namespace: spflow
* saving partial Rd database
* checking for LF line-endings in source and make files and shell scripts
* checking for empty or unneeded directories
* building 'spflow_0.1.0.9010.tar.gz'
pacman::p_load(tmap, sf, spdep, sp, Matrix, spflow, reshape2, knitr, tidyverse)
mpsz <- st_read(dsn = "data/geospatial",
                layer = "MPSZ-2019") %>%
  st_transform(crs = 3414)
Reading layer `MPSZ-2019' from data source 
  `C:\kekekay\ISSS624\In-class_Ex5\data\geospatial' using driver `ESRI Shapefile'
Simple feature collection with 332 features and 6 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: 103.6057 ymin: 1.158699 xmax: 104.0885 ymax: 1.470775
Geodetic CRS:  WGS 84
Note

abc

intersect to see how many busstops in each planning subzone

filter 0 values - mpsz-busstop

prepare spatial weights

use k-means to find 3 nearby neighbors mpsz_nb - summary of neighbour list

take note -preparing the spatial weights - exclude those messy maps

#| eval:false - for those time consuming computation, hide it

Getting started

mpsz_nb <- read_rds("data/rds/mpsz_nb.rds") 
mpsz_flow <- read_rds("data/rds/mpsz_flow.rds") 
mpsz_var <- read_rds("data/rds/mpsz_var.rds")

origin-sz should be grid , must be char

the 0 values can be retained coz it jusr measn there is no flow between 2 bus stops

mpsz_net <- spflow_network(
  id_net = "sg",
  node_neighborhood = 
    nb2mat(mpsz_nb$by_contiguity),
  node_data = mpsz_var,
  node_key_column = "SZ_CODE")

mpsz_net
Spatial network nodes with id: sg
--------------------------------------------------
Number of nodes: 313
Average number of links per node: 6.077
Density of the neighborhood matrix: 1.94% (non-zero connections)

Data on nodes:
                SZ_NAME SZ_CODE BUSSTOP_COUNT AGE7_12 AGE13_24 AGE25_64
1      INSTITUTION HILL  RVSZ05             2     330      360     2260
2        ROBERTSON QUAY  SRSZ01            10     320      350     2200
3          FORT CANNING  MUSZ02             6       0       10       30
4      MARINA EAST (MP)  MPSZ05             2       0        0        0
5               SENTOSA  SISZ01             1     200      260     1440
6        CITY TERMINALS  BMSZ17            10       0        0        0
---                 ---     ---           ---     ---      ---      ---
308            NEE SOON  YSSZ07            12      90      140      590
309       UPPER THOMSON  BSSZ01            47    1590     3660    15980
310          SHANGRI-LA  AMSZ05            12     810     1920     9650
311          TOWNSVILLE  AMSZ04             9     980     2000    11320
312           MARYMOUNT  BSSZ02            25    1610     4060    16860
313 TUAS VIEW EXTENSION  TSSZ06            11       0        0        0
    SCHOOL_COUNT BUSINESS_COUNT RETAILS_COUNT FINSERV_COUNT ENTERTN_COUNT
1              1              6            26             3             0
2              0              4           207            18             6
3              0              7            17             0             3
4              0              0             0             0             0
5              0              1            84            29             2
6              0             11            14             4             0
---          ---            ---           ---           ---           ---
308            0              0             7             0             0
309            3             21           305            30             0
310            3              0            53             9             0
311            1              0            83            11             0
312            3             19           135             8             0
313            0             53             3             1             0
    FB_COUNT LR_COUNT COORD_X COORD_Y
1          4        3  103.84    1.29
2         38       11  103.84    1.29
3          4        7  103.85    1.29
4          0        0  103.88    1.29
5         38       20  103.83    1.25
6         15        0  103.85    1.26
---      ---      ---     ---     ---
308        0        0  103.81     1.4
309        5       11  103.83    1.36
310        0        0  103.84    1.37
311        1        1  103.85    1.36
312        3       11  103.84    1.35
313        0        0  103.61    1.26

spflow_network : neighbour list ,which matrix to use(Kmeans or contiguity)

id_net: re-assign ID - new

so that we can have a table includes all the areas and their neighbours

adding in flow w the net

take note of the sequence of orig_key_col and des_key_col

mpsz_net_pairs <- spflow_network_pair(
  id_orig_net = "sg",
  id_dest_net = "sg",
  pair_data = mpsz_flow,
  orig_key_column = "ORIGIN_SZ",
  dest_key_column = "DESTIN_SZ")

mpsz_net_pairs
Spatial network pair with id: sg_sg
--------------------------------------------------
Origin network id: sg (with 313 nodes)
Destination network id: sg (with 313 nodes)
Number of pairs: 97969
Completeness of pairs: 100.00% (97969/97969)

Data on node-pairs:
      DESTIN_SZ ORIGIN_SZ DISTANCE TRIPS
1        RVSZ05    RVSZ05        0    67
314      SRSZ01    RVSZ05   305.74   251
627      MUSZ02    RVSZ05   951.83     0
940      MPSZ05    RVSZ05  5254.07     0
1253     SISZ01    RVSZ05     4975     0
1566     BMSZ17    RVSZ05  3176.16     0
---         ---       ---      ---   ---
96404    YSSZ07    TSSZ06 26972.97     0
96717    BSSZ01    TSSZ06 25582.48     0
97030    AMSZ05    TSSZ06 26714.79     0
97343    AMSZ04    TSSZ06 27572.74     0
97656    BSSZ02    TSSZ06  26681.7     0
97969    TSSZ06    TSSZ06        0   270

combine mpsz_net and net_pairs to get mpsz_multi_net

mpsz_multi_net <- spflow_network_multi(mpsz_net, mpsz_net_pairs)

mpsz_multi_net
Collection of spatial network nodes and pairs
--------------------------------------------------
Contains 1 spatial network nodes  
    With id :  sg
Contains 1 spatial network pairs  
    With id :  sg_sg

Availability of origin-destination pair information:

 ID_ORIG_NET ID_DEST_NET ID_NET_PAIR COMPLETENESS     C_PAIRS  C_ORIG  C_DEST
          sg          sg       sg_sg      100.00% 97969/97969 313/313 313/313

since we only have 313 geo areas after we removed all the nobusstop value, so total number of flow should be 313^2 - check this if not will have error msg

before regression model, always rmb to check multicollinearity

pair_cor() - create correlation matrix (instead 1 by 1)

cor_image() -

for those highly correlated vars, either exclude or combine them tgt if they have similiar

cor_formula <- log(1 + TRIPS) ~     # dependant var - usually is the flow 
  BUSSTOP_COUNT +
  AGE7_12 +
  AGE13_24 +
  AGE25_64 +
  SCHOOL_COUNT +
  BUSINESS_COUNT +
  RETAILS_COUNT +
  FINSERV_COUNT +
  P_(log(DISTANCE + 1))

cor_mat <- pair_cor(
  mpsz_multi_net,
  spflow_formula = cor_formula,
  add_lags_x = FALSE)  # just to test the input var, no need the lag(from continguity)

colnames(cor_mat) <- paste0(
  substr(
    colnames(cor_mat),1,3),"...")

  cor_image(cor_mat)

base_model <- spflow(
  spflow_formula = log(1 + TRIPS) ~
    O_(BUSSTOP_COUNT +
         AGE25_64) +
    D_(SCHOOL_COUNT +
         BUSINESS_COUNT +
         RETAILS_COUNT + 
         FINSERV_COUNT) +
    P_(log(DISTANCE + 1)),
  spflow_networks = mpsz_multi_net)
  
base_model
--------------------------------------------------
Spatial interaction model estimated by: MLE  
Spatial correlation structure: SDM (model_9)
Dependent variable: log(1 + TRIPS)

--------------------------------------------------
Coefficients:
                          est     sd   t.stat  p.val
rho_d                   0.680  0.004  192.554  0.000
rho_o                   0.678  0.004  187.733  0.000
rho_w                  -0.396  0.006  -65.592  0.000
(Intercept)             0.410  0.065    6.267  0.000
(Intra)                 1.312  0.081   16.262  0.000
D_SCHOOL_COUNT          0.017  0.002    7.885  0.000
D_SCHOOL_COUNT.lag1     0.002  0.004    0.552  0.581
D_BUSINESS_COUNT        0.000  0.000    3.015  0.003
D_BUSINESS_COUNT.lag1   0.000  0.000   -0.249  0.804
D_RETAILS_COUNT         0.000  0.000   -0.306  0.759
D_RETAILS_COUNT.lag1    0.000  0.000    0.152  0.880
D_FINSERV_COUNT         0.002  0.000    6.787  0.000
D_FINSERV_COUNT.lag1   -0.002  0.001   -3.767  0.000
O_BUSSTOP_COUNT         0.002  0.000    6.807  0.000
O_BUSSTOP_COUNT.lag1   -0.001  0.000   -2.364  0.018
O_AGE25_64              0.000  0.000    7.336  0.000
O_AGE25_64.lag1         0.000  0.000   -2.797  0.005
P_log(DISTANCE + 1)    -0.050  0.007   -6.794  0.000

--------------------------------------------------
R2_corr: 0.6942947  
Observations: 97969  
Model coherence: Validated
old_par = par(mfrow = c(1,3),
              mar = c(2,2,2,2))

spflow_moran_plots(base_model)

par(old_par)

corr_residual <- pair_cor(base_model)
colnames(corr_residual) <- substr(colnames(corr_residual),1,3)
cor_image(corr_residual)

spflow_formula <- log(1 + TRIPS) ~
    O_(BUSSTOP_COUNT +
         AGE25_64) +
    D_(SCHOOL_COUNT +
         BUSINESS_COUNT +
         RETAILS_COUNT + 
         FINSERV_COUNT) +
    P_(log(DISTANCE + 1))

model_control <- spflow_control(
  estimation_method = "mle",
  model = "model_1")
  
mle_model1 <- spflow(
  spflow_formula,
  spflow_networks = mpsz_multi_net,
  estimation_control = model_control)
spflow_formula <- log(1 + TRIPS) ~
    O_(BUSSTOP_COUNT +
         AGE25_64) +
    D_(SCHOOL_COUNT +
         BUSINESS_COUNT +
         RETAILS_COUNT + 
         FINSERV_COUNT) +
    P_(log(DISTANCE + 1))

model_control <- spflow_control(
  estimation_method = "mle",
  model = "model_8")
  
mle_model8 <- spflow(
  spflow_formula,
  spflow_networks = mpsz_multi_net,
  estimation_control = model_control)

mle_model8
--------------------------------------------------
Spatial interaction model estimated by: MLE  
Spatial correlation structure: SDM (model_8)
Dependent variable: log(1 + TRIPS)

--------------------------------------------------
Coefficients:
                          est     sd    t.stat  p.val
rho_d                   0.689  0.003   196.832  0.000
rho_o                   0.687  0.004   192.214  0.000
rho_w                  -0.473  0.003  -142.469  0.000
(Intercept)             1.086  0.049    22.275  0.000
(Intra)                 0.840  0.075    11.255  0.000
D_SCHOOL_COUNT          0.019  0.002     8.896  0.000
D_SCHOOL_COUNT.lag1     0.019  0.004     5.130  0.000
D_BUSINESS_COUNT        0.000  0.000     3.328  0.001
D_BUSINESS_COUNT.lag1   0.000  0.000     1.664  0.096
D_RETAILS_COUNT         0.000  0.000    -0.414  0.679
D_RETAILS_COUNT.lag1    0.000  0.000    -0.171  0.864
D_FINSERV_COUNT         0.002  0.000     6.150  0.000
D_FINSERV_COUNT.lag1   -0.003  0.001    -4.601  0.000
O_BUSSTOP_COUNT         0.003  0.000     7.676  0.000
O_BUSSTOP_COUNT.lag1    0.000  0.000     0.552  0.581
O_AGE25_64              0.000  0.000     6.870  0.000
O_AGE25_64.lag1         0.000  0.000    -0.462  0.644
P_log(DISTANCE + 1)    -0.125  0.005   -22.865  0.000

--------------------------------------------------
R2_corr: 0.6965974  
Observations: 97969  
Model coherence: Validated

in this model libray, total have 3 models? so can manually change it from 1 2 8?

check documentation again