library(readxl)
library(tools)
library(knitr)
library(captioner)
library(foreign)
library(ggplot2)
library(reshape2)
library(ggrepel)
library(stringr)
knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)
options(stringsAsFactors = FALSE)

projectdir <- "/projects/yakima_equity"
source(file.path (projectdir, "tools/setup.R"))

# connect to PostgreSQL database
source("http://gis.washington.edu/phurvitz/R/functions.R")
if(!exists("password")){
    password <- scan("/home/phurvitz/.pgpasswd", what = 'character')
}
us_census <- connect(dbname = "us_census")
sqldf.pg.options(dbname = 'us_census')

# where to store images
imagedir <- "/projects/yakima_equity/www/images"

ggsavewidth <- 5
ggsaveheight <- 4
# lists of map files for inclusion as graphics
mymaps <-sort(list.files("/projects/yakima_equity/www/maps", pattern = ".png", full.names = TRUE))
tractmaps <- grep("tract", mymaps, value = TRUE)
parkmaps <- grep("park", mymaps, value = TRUE)
bemaps <- grep("propval|yearbuilt", mymaps, value = TRUE)
othermaps <- grep("tract|park|propval|yearbuilt", mymaps, value = TRUE, invert = TRUE)
#>>>>>>>>>>>>>>>>>>>>>>> need to rerun wrapper_census if any of the census variables are changed

# table captions
table_nums <- captioner::captioner(prefix = "Table")
t.ref <- function(x) {
    stringr::str_extract(table_nums(x), "[^:]*")
}

fig_nums <- captioner::captioner(prefix = "Figure")
f.ref <- function(x) {
    stringr::str_extract(fig_nums(x), "[^:]*")
}

# table exists function
dbTableExists <- function(conn = dbconn, schema, tablename){
cmd <- sprintf("select count(*) = 1 as texists from information_schema.tables where table_schema = '%s' and table_name = '%s';", schema, tablename)
dbGetQuery(conn = dbconn, cmd)$texists
}

# database table schema
tschema <- function(conn, tname){
    dbGetQuery(conn = conn, statement = sprintf("select column_name, data_type from information_schema.columns where table_name = '%s';", tname))
}

# years to use in analysis
years <- c(1980, 1990, 2000, 2010, 2015)

# wrapper to prepare census data, individual functions below.
wrapper_census <- function(years=years){
    go_nhgis_years()
    sf3_2000()
    acs(2010)
    acs(2015)
    sapply(years, awf)
}

# NHGIS data, from https://www.nhgis.org/
# included 1970, 1980, 1990 data (though 1970 had no data for Yakima County)
nhgis_data <- function(){
    x <- read.csv("/projects/census/nhgis/nhgis0004_csv/nhgis0004_ts_nominal_tract.csv", as.is = TRUE, stringsAsFactors = FALSE)
    dbWriteTable(us_census, c("nhgis","nhgis0004_ts_nominal_tract"), x, row.names=F)
    colnames(x) <- tolower(colnames(x))
    x <- read.csv("/projects/census/nhgis/nhgis0005_csv/nhgis0005_ts_nominal_tract.csv", as.is = TRUE, stringsAsFactors = FALSE)
    dbWriteTable(us_census, c("nhgis","nhgis0005_ts_nominal_tract"), x, row.names=F)
    colnames(x) <- tolower(colnames(x))
    x <- read.csv("/projects/census/nhgis/nhgis0007_csv/nhgis0007_ts_nominal_tract.csv", as.is = TRUE, stringsAsFactors = FALSE)
    colnames(x) <- tolower(colnames(x))
    dbWriteTable(us_census, c("nhgis","nhgis0007_ts_nominal_tract"), x, row.names=F, overwrite = TRUE)
}

# SRID for Esri Albers
# INSERT into spatial_ref_sys (srid, auth_name, auth_srid, proj4text, srtext) values ( 9102003, 'esri', 102003, '+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs ', 'PROJCS["USA_Contiguous_Albers_Equal_Area_Conic",GEOGCS["GCS_North_American_1983",DATUM["North_American_Datum_1983",SPHEROID["GRS_1980",6378137,298.257222101]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]],PROJECTION["Albers_Conic_Equal_Area"],PARAMETER["False_Easting",0],PARAMETER["False_Northing",0],PARAMETER["longitude_of_center",-96],PARAMETER["Standard_Parallel_1",29.5],PARAMETER["Standard_Parallel_2",45.5],PARAMETER["latitude_of_center",37.5],UNIT["Meter",1],AUTHORITY["EPSG","102003"]]');

# NHGIS shapefiles
# shp2pgsql -s 102003:2286 -I -g the_geom_2286 -N skip US_tract_1970_conflated.shp nhgis.us_tract_1970_conflated | psql us_census
# shp2pgsql -s 102003:2286 -I -g the_geom_2286 -N skip US_tract_1980_conflated.shp nhgis.us_tract_1980_conflated | psql us_census
# shp2pgsql -s 102003:2286 -I -g the_geom_2286 -N skip US_tract_1990_conflated.shp nhgis.us_tract_1990_conflated | psql us_census


# NHGIS data processing for a year
nhgis_year <- function(year){
    message("nhgis ", year)
    
    # an SQL statement with placeholders for substituting year strings
    # creates a materialized view named yakima_equity.yNNNN where NNNN is the year string
    sql <- "drop materialized view if exists yakima_equity.yxxxYEARxxx cascade; create materialized view yakima_equity.yxxxYEARxxx as 
    with x1 as 
    (select 
    gjoinxxxYEARxxx
    , countyfp
    , tract
    , namexxxYEARxxx as name
    --av0 = total pop
    , av0aaxxxYEARxxx as total_pop
    --av1 = persons by sex
    , av1aaxxxYEARxxx as sex_male
    , av1abxxxYEARxxx as sex_female
    --b57 = persons by age
    --<18
    , b57aaxxxYEARxxx + b57abxxxYEARxxx + b57acxxxYEARxxx + b57adxxxYEARxxx as age_lt_18
    -->=65
    , av0aaxxxYEARxxx - (b57apxxxYEARxxx + b57aqxxxYEARxxx + b57arxxxYEARxxx) as age_lt_65
    , b57apxxxYEARxxx + b57aqxxxYEARxxx + b57arxxxYEARxxx as age_ge_65
    --b18 = persons by race
    , b18aaxxxYEARxxx as race_white
    , av0aaxxxYEARxxx - b18aaxxxYEARxxx as race_nonwhite
    --a35 = persons of hispanic or latino origin
    , a35aaxxxYEARxxx as eth_hispanic
    , av0aaxxxYEARxxx - a35aaxxxYEARxxx as eth_non_hispanic
    --ar5 = total households
    , ar5aaxxxYEARxxx as n_households
    --a68 = total families
    , a68aaxxxYEARxxx as n_families
    --at5 = persons by nativity
    , at5aaxxxYEARxxx as native
    , av0aaxxxYEARxxx - a35aaxxxYEARxxx as non_native
    --b69 = persons >= 25y by edu attainment
    , b69aaxxxYEARxxx as edu_lt_9th
    , b69abxxxYEARxxx as edu_9th_somecoll
    , b69acxxxYEARxxx as edu_collgrad
    --a88 = families by income in previous year
    , a88aaxxxYEARxxx as income_lt_10k
    , a88abxxxYEARxxx as income_10_15k
    , a88acxxxYEARxxx as income_15_25k
    , a88adxxxYEARxxx as income_25_50k
    , a88aexxxYEARxxx as income_ge_50k
    --b37 occupied housing units by tenure
    , b37aaxxxYEARxxx as housingunits_owner
    , b37abxxxYEARxxx as housingunits_renter
    from nhgis.nhgis0004_ts_nominal_tract where statefp = '53' and countyfp = '077')
    
    , x2 as (select
    gjoinxxxYEARxxx
    --marriage, 'now married'
    , bl1abxxxYEARxxx + bl1ahxxxYEARxxx as married
    --marriage: never married, widowed, divorced
    , bl1aaxxxYEARxxx + bl1aexxxYEARxxx + bl1afxxxYEARxxx + bl1agxxxYEARxxx + bl1akxxxYEARxxx + bl1alxxxYEARxxx as unmarried
    --poverty
    , ax6aaxxxYEARxxx as pov_status
    , cl6aaxxxYEARxxx as pov_below_100pct
    , c19abxxxYEARxxx as pov_below_125pct
    from nhgis.nhgis0005_ts_nominal_tract where statefp = '53' and countyfp = '077')

    , x3 as (select nhgiscode, gjoinxxxYEARxxx, b79aaxxxYEARxxx as median_hh_income, ab2aaxxxYEARxxx as median_family_income from nhgis.nhgis0007_ts_nominal_tract where countyfp = '077')
    
    --joined
    , f as (select * from x1 join x2 using(gjoinxxxYEARxxx) join x3 using(gjoinxxxYEARxxx))
    --yakima only
    , f2 as (select * from f where countyfp = '077')
    
    --join GIS
    , g as (select st_transform(the_geom_2286, 4326)::geometry(Multipolygon, 4326) as the_geom_4326, the_geom_2286, gisjoin as gjoinxxxYEARxxx from nhgis.us_tract_xxxYEARxxx_conflated where gisjoin in (select gisjoin from f2))

    --final
    , fx as (select * from f2 join g using (gjoinxxxYEARxxx))

    select * from fx;"
    
    # substitute the year in the SQL string
    sql <- gsub("xxxYEARxxx", year, sql)
    # assign the SQL to view or run stand-alone
    assign("sql", sql, envir = .GlobalEnv)
    
    # run the query
    O <- dbGetQuery(conn = us_census, statement = sql)
}

# a wrapper to run for each NHGIS year
go_nhgis_years <- function(){
    for (i in c(1980, 1990)){
        nhgis_year(i)
    }
}

# 2000 data
# shp2pgsql -s 4326 -I -g the_geom_4326 -N skip tr53_d00.shp sf3_2000_wa.tiger_tract | psql us_census

# process the y2000 SF3 data
sf3_2000 <- function(){
    message("sf3 2000")
    sql <- "drop materialized view if exists yakima_equity.y2000 cascade; create materialized view yakima_equity.y2000 as 
    with
    a as (select logrecno
        , p001001 as total_pop
        , p008002 as sex_male
        , p008041 as sex_female 
        from sf3_2000_wa.sf30001)
    , b as (select logrecno
        , p019002 as age_lt_18
        , p019002 + p019024 as age_lt_65
        , p019046 as age_ge_65 
        from sf3_2000_wa.sf30002)
    , c as (select logrecno
        , p006002 as race_white
        , p006001 - p006002 as race_nonwhite
        , p007002 as eth_non_hispanic
        , p007010 as eth_hispanic
        , p010001 as n_households
        , p010005 as n_families
        from sf3_2000_wa.sf30001)
    , d as (select logrecno
        , p021002 as native
        , p021001 - p021002 as non_native
        from sf3_2000_wa.sf30002)
    , e as (select logrecno
        , p037003 + p037004 + p037005 + p036006 
            + p037020 + p037021 + p037022 + p037023 as edu_lt_9th
        , p037007 + p037008 + p037009 + p037010 + p037011 + p037012 + p037013 + p037014  
            + p037024 + p037025 + p037026 + p037027 + p037028 + p037029 + p037030 + p037031 as edu_9th_somecoll
        , p037015 + p037016 + p037017 + p037018 
            + p037032 + p037033 + p037034 + p037035 as edu_collgrad
        from sf3_2000_wa.sf30003)
    , f as (select logrecno
        , p052002 as income_lt_10k
        , p052003 as income_10_15k
        , p052004 + p052005 as income_15_25k
        , p052006 + p052007 + p052008 + p052009 + p052010 as income_25_50k
        , p052011 + p052012 + p052013 + p052014 + p052015 + p052016 + p052017 as income_ge_50k
        , p053001 as median_hh_income
        from sf3_2000_wa.sf30006)
    , f001 as (select logrecno
        , p077001 as median_family_income
        from sf3_2000_wa.sf30007)
    , g as (select logrecno
        , h007002 as housingunits_owner
        , h007003 as housingunits_renter
        from sf3_2000_wa.sf30056)
    , h as (select logrecno
        , p018004 + p018013 as married
        , p018003 + p018009 + p018010 + p018012 + p018018 + p018019 as unmarried
        from sf3_2000_wa.sf30002)
    , i as (select logrecno
        , p087001 as pov_status
        , p089002 as pov_below_100pct
        , p088002 + p088003 + p088004 + p088005 as pov_below_125pct
        , p088002 + p088003 + p088004 + p088005 + p088006 as pov_below_150pct
        from sf3_2000_wa.sf30007)
    , geo as (select logrecno, state||county||tract as tract from sf3_2000_wa.sf3geo where county = '077' and sumlev = '140')
    , t as (select the_geom_4326, st_transform(the_geom_4326, 2286)::geometry(MultiPolygon, 2286) as the_geom_2286, statefp00 || countyfp00 || tractce00 as tract from sf3_2000_wa.tiger_tract where countyfp00 = '077')
    , fx as (select * from a join b using (logrecno) join c using (logrecno) join d using (logrecno) join e using (logrecno) join f using (logrecno) join f001 using(logrecno) join g using (logrecno) join h using (logrecno) join i using (logrecno) right join geo using(logrecno) join t using(tract))
    
    select * from fx;"
    
    # run the query
    O <- dbGetQuery(conn = us_census, statement = sql)
}

# 2010 data
# shp2pgsql -s 4326 -I -g the_geom_4326 -N skip tl_2010_53_tract10.shp acs2010_5yr.tiger_tract | psql us_census

# 2015 data
# shp2pgsql -s 4326 -I -g the_geom_4326 -N skip tl_2015_53_tract.shp acs2015_5yr.tiger_tract | psql us_census

# I also did this so that 2015 tiger data would have geoid10 columns:
# alter table acs2015_5yr.tiger_tract add column geoid10 text;
# update acs2015_5yr.tiger_tract set geoid10 = geoid;

# function to run on ACS data, 2010, 2015
acs <- function(year){
    message("acs ", year)
    sql <- "
    --do this so as not to require typing in the schema for each 'from' statement
    set search_path = acsxxxYEARxxx_5yr,public;
    drop materialized view if exists yakima_equity.yxxxYEARxxx cascade;
    create materialized view yakima_equity.yxxxYEARxxx as
    --CTE
    with 
    --get tract data (sumlev=140); geoid from here, used in other CTE queries
    geoheader as (select geoid, substr(geoid, position('S' in geoid) + 1, length(geoid)) as geoid10, state, county, tract from geoheader where state = '53' and county = '077' and sumlevel = 140)
    --variables from various tables
    , a as (select geoid
        , b01001001 as total_pop
        , b01001002 as sex_male
        , b01001026 as sex_female

        , b01001003 + b01001004 + b01001005 + b01001006 +  b01001027 + b01001028 + b01001029 + b01001030 as age_lt_18

        , b01001003 + b01001004 + b01001005 + b01001006 + b01001007 + b01001008 + b01001009 + b01001010 + b01001011 + b01001012 + b01001013 + b01001014 + b01001015 + b01001016 + b01001017 + b01001018 + b01001019 + b01001027 + b01001028 + b01001029 + b01001030 + b01001031 + b01001032 + b01001033 + b01001034 + b01001035 + b01001036 + b01001037 + b01001038 + b01001039 + b01001040 + b01001041 + b01001042 + b01001043 as age_lt_65
        , b01001020 + b01001021 + b01001022 + b01001023 + b01001024 + b01001025 + b01001044 + b01001045 + b01001046 + b01001047 + b01001048 + b01001049 as age_ge_65
        from b01001 where geoid in (select geoid from geoheader))
    , b as (select geoid, b19013001 as median_hh_income
        from b19013 where geoid in (select geoid from geoheader))
    , b001 as (select geoid, b19113001 as median_family_income
        from b19113 where geoid in (select geoid from geoheader))
    , c as (select geoid, b01001a001 as race_white
        from b01001a where geoid in (select geoid from geoheader)) 
    , d as (select geoid, b01001i001 as eth_hispanic 
        from b01001i where geoid in (select geoid from geoheader))
    , e as (select geoid, b08201001 as n_households 
        from b08201 where geoid in (select geoid from geoheader))
    , f as (select geoid, b11001002 as n_families 
        from b11001 where geoid in (select geoid from geoheader))
    , g as (select geoid, b05002002 as native, b05002001 - b05002002 as non_native
        from b05002 where geoid in (select geoid from geoheader))
    , h as (select geoid, b06009002 as edu_lt_9th, 
            b06009003 + b06009004 as edu_9th_somecoll, 
            b06009005 + b06009006 as edu_collgrad
        from b06009 where geoid in (select geoid from geoheader))
    , i as (select geoid, b25003002 as housingunits_owner, b25003003 as housingunits_renter
        from b25003 where geoid in (select geoid from geoheader))
    , j as (select geoid, b07008003 as married, 
        b07008002 + b07008004 + b07008005 + b07008006 as unmarried
        from b07008 where geoid in (select geoid from geoheader))
    , k as (select geoid, b06012001 as pov_status, b06012002 as pov_below_100pct, b06012002 + b06012003 as pov_below_150pct
        from b06012 where geoid in (select geoid from geoheader))
    --tract GIS data
    , tract as (select geoid10, the_geom_4326, st_transform(the_geom_4326, 2286)::geometry(MultiPolygon, 2286) as the_geom_2286 from tiger_tract where geoid10 in (select geoid10 from geoheader))
    --preliminary join of non-calculated fields
    , p1 as (select * from a join b using(geoid)join b001 using(geoid) join c using(geoid) join d using(geoid) join e using(geoid) join f using(geoid) join g using(geoid) join h using(geoid) join i using(geoid) join j using(geoid) join k using(geoid)
    )
    --calculated fields
    , x1 as (select geoid, total_pop - race_white as race_nonwhite, total_pop - eth_hispanic as eth_non_hispanic from p1 order by geoid)
    --join non-calculated, calculated, tract data
    , fx as (select * from p1 join x1 using (geoid) join geoheader using(geoid) join tract using(geoid10))
    --final
    select * from fx;
    set search_path = \"$user\", public;"
    
    # substitute the year
    sql <- gsub("xxxYEARxxx", year, sql)
    assign("sql", sql, envir = .GlobalEnv)
    # run the function
    O <- dbGetQuery(conn = us_census, statement = sql)
}

# area weighting function.
# overlays city limits from different years and calculates proportions of counts by proportion of the census tract within the city limits
awf <- function(year){
        
    # help to build the list of vars:
    # SELECT  a.attname
    # FROM pg_attribute a
    #   JOIN pg_class t on a.attrelid = t.oid
    #   JOIN pg_namespace s on t.relnamespace = s.oid
    # WHERE a.attnum > 0 
    #   AND NOT a.attisdropped
    #   AND t.relname = 'y1980' --<< replace with the name of the MV 
    #   AND s.nspname = 'yakima_equity' --<< change to the schema your MV is in 
    # ORDER BY a.attnum;

    
    sql <- "
    --makes a materialized view
    drop materialized view if exists yakima_equity.tract_xxxYEARxxx;
    create materialized view yakima_equity.tract_xxxYEARxxx as 
    with 
    -- census data
    yx as (select *, st_area(the_geom_2286) as area_ct_orig_ft from yakima_equity.yxxxYEARxxx)
    --city limits as of the year
    , citylim as (select st_union(the_geom_2286) as the_geom_2286 from yakima_equity.annexations where extract(year from annex_date) <= xxxYEARxxx)
    --dividing line
    , ew as (select * from yakima_equity.ew_16th)
    --intersect
    , int1 as (select y.*, st_intersection(y.the_geom_2286, c.the_geom_2286) as geomi from yx as y, citylim as c where st_intersects(y.the_geom_2286, c.the_geom_2286))
    , int2 as (select *, st_intersection(int1.geomi, ew.the_geom_2286) as geom_int from int1, ew where st_intersects(int1.geomi, ew.the_geom_2286))
    --area proportion
    , fx as (select st_area(geom_int) / area_ct_orig_ft as area_prop, * from int2)
    --proportions of variables based on area-weighting
    , fx1 as (select row_number() over() as gid, tract, geom_int as the_geom_2286 
        , area_prop
        , zone_16th
        , total_pop as total_pop_orig, total_pop * area_prop as total_pop
        , sex_male as sex_male_orig, sex_male * area_prop as sex_male
        , sex_female as sex_female_orig, sex_female * area_prop as sex_female
        , age_lt_18 as age_lt_18_orig, age_lt_18 * area_prop as age_lt_18
        , age_lt_65 as age_lt_65_orig, age_lt_65 * area_prop as age_lt_65
        , age_ge_65 as age_ge_65_orig, age_ge_65 * area_prop as age_ge_65
        , race_white as race_white_orig, race_white * area_prop as race_white
        , race_nonwhite as race_nonwhite_orig, race_nonwhite * area_prop as race_nonwhite
        , eth_hispanic as eth_hispanic_orig, eth_hispanic * area_prop as eth_hispanic
        , eth_non_hispanic as eth_non_hispanic_orig, eth_non_hispanic * area_prop as eth_non_hispanic
        , n_households as n_households_orig, n_households * area_prop as n_households
        , n_families as n_families_orig, n_families * area_prop as n_families
        , native as native_orig, native * area_prop as native
        , non_native as non_native_orig, non_native * area_prop as non_native
        , edu_lt_9th as edu_lt_9th_orig, edu_lt_9th * area_prop as edu_lt_9th
        , edu_9th_somecoll as edu_9th_somecoll_orig, edu_9th_somecoll * area_prop as edu_9th_somecoll
        , edu_collgrad as edu_collgrad_orig, edu_collgrad * area_prop as edu_collgrad
        , housingunits_owner as housingunits_owner_orig, housingunits_owner * area_prop as housingunits_owner
        , housingunits_renter as housingunits_renter_orig, housingunits_renter * area_prop as housingunits_renter
        , married as married_orig, married * area_prop as married
        , unmarried as unmarried_orig, unmarried * area_prop as unmarried
        , pov_status as pov_status_orig, pov_status * area_prop as pov_status
        , pov_below_100pct as pov_below_100pct_orig, pov_below_100pct * area_prop as pov_below_100pct
        , pov_below_xxxPCTxxxpct as pov_below_xxxPCTxxxpct_orig, pov_below_xxxPCTxxxpct * area_prop as pov_below_xxxPCTxxxpct
        , median_hh_income
        , median_family_income
        from fx)
    --remove slivers
    , fx2 as (select * from fx1 where total_pop > 10)
    select * from fx2;
    create index idx_tract_xxxYEARxxx on yakima_equity.tract_xxxYEARxxx using gist(the_geom_2286)"
    
    # a switch for the year and the additional below %X of poverty, deprecated because 125 and 150% aren't really comparable across years
    pct <- switch(as.character(year),
                  "1980"="125",
                  "1990"="125",
                  "2000"="125",
                  "2010"="150",
                  "2015"="150")
    
    # substitute the year and %
    sql <- gsub("xxxYEARxxx", year, gsub("xxxPCTxxx", pct, sql))
    # in case we need the SQL string for testing/troubleshooting
    assign("sql", sql, envir = .GlobalEnv)
    # run the query
    O <- dbGetQuery(conn = us_census, statement = sql)
}


# summarize city-level, either as a whole or with zone (e/w based on 16th Ave dividing line)
ycsum <- function(year, zone = FALSE){
    sql <- "
    --tract-clipped data
    with a as (select row_number() over() as ttract, * from yakima_equity.tract_xxxYEARxxx)
    --parcels
    , p as (select mkt_land + mkt_impvt as propvalue, year_blt::integer xxxZONE1xxx from yakima_equity.parcels as p, a where st_intersects(p.the_geom_2286, a.the_geom_2286) and year_blt <> '' and mkt_land > 0 and mkt_impvt > 0)
    -- parcel summary
    , ps as (select avg(propvalue) as propvalue, avg(year_blt) as year_built xxxZONE1xxx from p xxxZONE2xxx)
    --variables, summed across either entire city or by dividing line
    , b as (select
        xxxYEARxxx as year
        xxxZONE1xxx
        --sums of count data
        , sum(total_pop) as total_pop
        , sum(sex_male) as sex_male
        , sum(sex_female) as sex_female
        , sum(age_lt_18) as age_lt_18
        , sum(age_lt_65) as age_lt_65
        , sum(age_ge_65) as age_ge_65
        , sum(race_white) as race_white
        , sum(race_nonwhite) as race_nonwhite
        , sum(eth_hispanic) as eth_hispanic
        , sum(eth_non_hispanic) as eth_non_hispanic
        , sum(n_households) as n_households
        , sum(n_families) as n_families
        , sum(native) as native
        , sum(non_native) as non_native
        , sum(edu_lt_9th) as edu_lt_9th
        , sum(edu_9th_somecoll) as edu_9th_somecoll
        , sum(edu_collgrad) as edu_collgrad
        , sum(housingunits_owner) as housingunits_owner
        , sum(housingunits_renter) as housingunits_renter
        , sum(married) as married
        , sum(unmarried) as unmarried
        , sum(pov_status) as pov_status
        , sum(pov_below_100pct) as pov_below_100pct
        --weighted means of income
        , sum(median_hh_income * total_pop) / sum(total_pop) as median_hh_income
        , sum(median_family_income * total_pop) / sum(total_pop) as median_family_income
    from a xxxZONE2xxx)
    --calculated fields
    , c as (select *, 
        age_lt_18 / total_pop * 100 as pct_age_lt_18,
        age_ge_65 / total_pop * 100 as pct_age_ge_65,
        race_nonwhite / total_pop * 100 as pct_nonwhite, 
        eth_hispanic / total_pop * 100 as pct_hispanic, 
        edu_collgrad / total_pop * 100 as pct_edu_coll_grad, 
        pov_below_100pct / pov_status * 100 as pct_pov_below_100pct, 
        married / (married + unmarried) * 100 as pct_married, 
        housingunits_renter / (housingunits_owner + housingunits_renter) * 100 as pct_housingunits_renter 
        from b)
    select * from c xxxZONE3xxx;"

    pct <- switch(as.character(year),
                  "1980"="125",
                  "1990"="125",
                  "2000"="125",
                  "2010"="150",
                  "2015"="150")

    # substitute the year and %poverty (the latter being deprecated)
    sql <- gsub("xxxYEARxxx", year, gsub("xxxPCTxxx", pct, sql))
    
    # handle whether the summary will be by zone (E/W of 16th) or not
    if(zone) {
        sql <- gsub("xxxZONE1xxx", ", zone_16th", gsub("xxxZONE2xxx", "group by zone_16th order by zone_16th", gsub("xxxZONE3xxx", " join ps using(zone_16th)", sql)))
    } else {
        sql <- gsub("xxxZONE1xxx", "", gsub("xxxZONE2xxx", "", gsub("xxxZONE3xxx", ", ps", sql)))
    }
    # sql string for troubleshooting
    assign("sql", sql, envir = .GlobalEnv)
    # run the query, returns a data frame
    dbGetQuery(conn = us_census, statement = sql)
}

# city summary across all years
ycsum.all <- function(zone = TRUE){
    # are we doing zonal summaries?
    if(zone){
        # apply the ycsum function for city summary across all years, include zone
        do.call(rbind, lapply(years, ycsum, zone))
    } else {
        # don't include zone
        do.call(rbind, lapply(years, ycsum))
    }
}

# bus ridership
if(!dbGetQuery(conn = dbconn, "select count(*) = 1 as ext from information_schema.columns where table_schema = 'yakima_equity' and table_name = 'ridershipstatistics' and column_name = 'alightings';")$ext){
    O <- dbGetQuery(conn = dbconn, statement = "alter table yakima_equity.ridershipstatistics add column alightings integer;
        update yakima_equity.ridershipstatistics set alightings =  cashadultcount + 
            cashyouthcount + 
            cashreducedcount + 
            ticketadultcount + 
            ticketyouthcount + 
            ticketreducedcount + 
            passadultcount + 
            passyouthcount + 
            passreducedcount + 
            childcount + 
            transfercount;")
}
# scatter plots
corplotgg <- function(dat, xvar, yvar, mytitle, xlab, ylab, smooth_method = "lm"){
    # title length to vary title font size
    title_font_size <- 12
    if(nchar(mytitle) > 60){
        title_font_size <- 11
    } 
    xvarcol <- grep(xvar, colnames(dat))
    yvarcol <- grep(yvar, colnames(dat))
    g <- ggplot(data = dat, aes_string(x = xvar, y = yvar)) + 
    geom_point() +
    #geom_smooth(method = smooth_method, se = FALSE, size = 1.5) +
    labs(x=xlab, y=ylab, title=mytitle) +
    theme(plot.title = element_text(size=title_font_size))
    g
}

corplotgg_fill <- function(dat, xvar, yvar, fillvar, mytitle, xlab, ylab, smooth_method = "lm", legend_title){
    title_font_size <- 12
    if(nchar(mytitle) > 60){
        title_font_size <- 11
    }     
    # e/w count, factor, legend count
    dat$zone_16th <- factor(dat$zone_16th)
    ewcount <- sprintf("n = %s", table(dat$zone_16th))
    
    xvarcol <- grep(xvar, colnames(dat))
    yvarcol <- grep(yvar, colnames(dat))
    g <- ggplot(data = dat, aes_string(x = xvar, y = yvar, color=fillvar)) + 
    geom_point() +
    #geom_smooth(method = smooth_method, se = FALSE, size = 1.5) +
    labs(x=xlab, y=ylab, title=mytitle, fill=legend_title, colour = legend_title) +
    theme(plot.title = element_text(size=title_font_size)) +
    scale_color_discrete(labels = paste(levels(dat$zone_16th), ewcount, sep = ","))

    g
}

# a version with text labels
corplotgg_fill <- function(dat, xvar, yvar, fillvar, mytitle, xlab, ylab, smooth_method = "lm", legend_title){
    title_font_size <- 12
    if(nchar(mytitle) > 60){
        title_font_size <- 11
    }     
    # e/w count, factor, legend count
    dat$zone_16th <- factor(dat$zone_16th)
    ewcount <- sprintf("n = %s", table(dat$zone_16th))
    
    xvarcol <- grep(xvar, colnames(dat))
    yvarcol <- grep(yvar, colnames(dat))
    g <- ggplot(data = dat, aes_string(x = xvar, y = yvar, color=fillvar)) + 
        geom_point() +
        #geom_smooth(method = smooth_method, se = FALSE, size = 1.5) +
        labs(x=xlab, y=ylab, title=mytitle, fill=legend_title, colour = legend_title) +
        theme(plot.title = element_text(size=title_font_size)) +
        scale_color_discrete(labels = paste(levels(dat$zone_16th), ewcount, sep = ",")) +
        geom_text_repel(aes(label=as.numeric(str_sub(dat$tract, start = -4))), show.legend = FALSE, size = 3)

    g
}
# points and tracts overlay function, make scatter plot
ptfcn <- function(pttablename, where = "", normvar, mytitle, legend_title){
    ptsql <- "
    with
    --substitute input table and where clause
    p as (select the_geom_2286 from yakima_equity.xxxPTDATAxxx xxxWHERExxx)
    --census tracts
    , ct as (select st_area(the_geom_2286) as area_ct, * from yakima_equity.tract_2015)
    --intersect points and tracts
    , px as (select tract, zone_16th from ct, p where st_intersects(p.the_geom_2286, ct.the_geom_2286))
    --jsum by tract
    , pj as (select 
        --normalization
        count(*) / xxxNORMxxx as xxxNORMALIASxxx
        , tract
        , ct.zone_16th
        from px left join ct using(tract)
        group by tract, xxxNORMxxx
        , ct.zone_16th
        )
    --join to tract data
     , f as (select pj.*
        , median_hh_income
        , median_family_income
        , age_lt_18 / total_pop::numeric * 100 as pct_age_lt_18
        , age_ge_65 / total_pop::numeric * 100 as pct_age_ge_65
        , eth_hispanic / total_pop::numeric * 100 as pct_hispanic
        , race_nonwhite / total_pop::numeric * 100 as pct_nonwhite
        , case when (edu_collgrad + edu_9th_somecoll + edu_lt_9th) = 0 then 0 else edu_collgrad::numeric / (edu_collgrad + edu_9th_somecoll + edu_lt_9th) * 100 end as pct_collgrad
        , case when pov_status = 0 then 0 else pov_below_100pct / pov_status::numeric * 100 end as pct_below_poverty
        , case when married + unmarried = 0 then 0 else married / (married + unmarried)::numeric * 100 end as pct_married
        , case when housingunits_renter + housingunits_owner = 0 then 0 else housingunits_renter / (housingunits_renter + housingunits_owner)::numeric * 100 end as pct_renter from pj join ct using(tract, zone_16th))
    select * from f;
    "
    
    yvar <- switch(normvar,
                   capita = "count_per_capita",
                   area = "count_per_mi2")
    
    ylab <- switch(normvar,
                   capita = "count per capita",
                   area = "count per square mile") 
    
    normfield <- switch(normvar,
                   capita = "total_pop",
                   area = "(area_ct / 5280^2)") 

    normalias <- switch(normvar,
                    capita = "count_per_capita",
                    area = "count_per_mi2")

    mySql <- gsub("xxxPTDATAxxx", pttablename, gsub("xxxNORMxxx", normfield, gsub("xxxNORMALIASxxx", normalias, gsub("xxxWHERExxx", where, ptsql))))
    
    dat <- dbGetQuery(conn = dbconn, statement = mySql)

    # plot
    xvars <- list(
        c('pct_age_lt_18', 'percent < 18 years'),
        c('pct_age_ge_65', 'percent >= 65 years'),
        c('median_hh_income', 'median household income'),
        c('median_family_income', 'median family income'),
        c('pct_hispanic', 'percent Hispanic'),
        c('pct_nonwhite', 'percent non-White'),
        c('pct_collgrad', 'percent college graduate'),
        c('pct_below_poverty', 'percent below poverty'),
        c('pct_married', 'percent married'),
        c('pct_renter', 'percent of housing renter-occupied'))
    
    for(i in xvars){
        #message(i)
        thetitle <- sprintf("%s x %s", mytitle, i[2])
        xlab <- gsub("income", "income ($)", gsub("pct_", "percent", i[2]))
        #ylab <- "count of calls per capita"
        g <- corplotgg_fill(dat = dat, xvar = i[1], yvar = yvar, fillvar = "zone_16th", mytitle = thetitle, xlab = xlab, ylab = ylab, legend_title = legend_title)
        print(g)
        imgfile <- tolower(gsub("_+", "_", file.path(imagedir, sprintf("%s_%s.png", gsub(" |[[:punct:]]", "_", mytitle), gsub(" |[[:punct:]]", "_", i[2])))))
        message(imgfile)
        ggsave(filename = imgfile, plot = g, width = ggsavewidth, height = ggsaveheight, units = "in")
    }
    return(dat)
}

1 Introduction

This document provides details on GIS data sources for the Yakima Equity Study. A number of different summaries and analyses were performed for various reasons. Basic descriptive data are provided for each data source. Some rudimentary overlay analyses were run to generate scatter plots between measures from the GIS data sets and administrative boundaries (i.e., Census tracts and the 16th Avenue dividing line).

Download this entire web site, including images and code: website.zip

2 Methods

2.1 Source data

2.1.1 City of Yakima GIS data

GIS source data for Yakima were provided by Tom Sellsted of the City of Yakima (YC) in a geodatabase. Data sets are listed in Table 1.

# source data 
t0cap <- table_nums(name="tab0", caption = "Data sets in geodatabase provided by City of Yakima")
src_str <- grep("^[[:digit:]]", system("ogrinfo /projects/yakima_equity/data/gis/YakimaEquityStudy.gdb", intern = TRUE), value = TRUE)
srcdat <- strsplit(src_str, ": ")
srcdat <- do.call(rbind, lapply(srcdat, function(x) x[-1]))
srcdat <- data.frame(do.call(rbind, strsplit(srcdat, "\\(|\\)")))
srcdat[,1] <- gsub(" $", "", srcdat[,1])
colnames(srcdat) <- c("data_source", "data_type")
srcdat <- srcdat[order(srcdat$data_source),]
kable(srcdat, format="html", caption = t0cap, row.names = FALSE)
Table 1: Data sets in geodatabase provided by City of Yakima
data_source data_type
ADARamps Point
ADARamps__ATTACH None
AnimalControl2015 Point
Annexations Multi Polygon
CDBGHomeRepairs Point
CensusBlocks2010 Multi Polygon
CodeCompliance2015 Point
Collisions Point
CouncilDistricts2015 Multi Polygon
ParcelAnalysis None
ParcelPoints_SpatialJoin Point
Parcels Multi Polygon
Parks Multi Polygon
PropertiesWithSepticSystems Multi Polygon
PropertiesWithWells Multi Polygon
SidewalksByCouncilDistrict Multi Line String
StreetLights Point
TransitRoutesByDistrict Multi Line String
YakBack2014to2016 Point
YKFDcalls Point
YKPDcalls Point

2.1.2 US Census data

Census data came from three distinct sources:

  1. 1970, 1980, 1990 time series data: NHGIS
  2. 2000 Decennial data, US Census: Summary File 3, 2000
  3. 2010, 2015 American Community Survey data: censusreporter.org

It should be noted that the 1970 census data contained no values for Yakima County, so analyses for 1970 were not possible.

The census data included these fields:

  1. total population
  2. persons by sex
  3. persons by age
  4. persons by race (white, nonwhite)
  5. persons of Hispanic or Latino origin
  6. total households
  7. total families
  8. persons by nativity
  9. persons 25 years and over by educational attainment
  10. household income in previous year
  11. family income in previous year
  12. occupied housing units by tenure
  13. marital status
  14. poverty (below poverty level)

2.2 Data conversion

Data obtained from YC GIS staff were converted to a PostGIS database using the command ogr2ogr -progress -skipfailures -f "PostgreSQL" PG:"host=localhost dbname=yakima_equity password=********" YakimaEquityStudy.gdb -t_srs EPSG:2286 -lco OVERWRITE=YES -lco SCHEMA=gis

skipfailures was applied because of a multicurve feature in one of the feature classes, which is not allowed in PostGIS. EPSG:2286 represents WA State Plane South HARN, which is the coordinate system/projection for the YC data.

2.3 Scatter plots

Overlay analyses were performed (e.g., point-in-polygon) to generate summaries by administrative unit. Administrative units were represented by sociodemographic variables, and bivariate scatter plots were generated for each pair of variables of interest.

2.4 GIS Analyses

2.4.1 Historical demographics

In the first set of analyses, historical boundaries (“annexations”) of YC were overlain with contemporaneous census data to provide estimates of the demographic conditions of YC as a whole, and also stratified by the 16th Avenue dividing line. In the GIS overlay process, census tracts that are straddled by the city limits are “clipped.” The ratio of clipped to original area gives a value that can be multiplied by the original census enumeration to produce an estimate of the enumeration within the clipped area (assuming a uniform distribution across the census tract). For example if a census tract had 4000 persons, and 75% of the tract was within the city limits, the estimate of the number of persons in the portion of that tract within the city limits would be 3000 (\(4000 \times 0.75 = 3000\)). For enumerated variables (i.e., counts of persons), the sum of these area-weighted estimates was generated.

Parcel-level data were used for historical analysis of property value and year built. Because each parcel is recorded with its year of construction and assessed value, it was possible to select parcels that were in existence at each census year. It should be noted that this analysis is not truly historical, in the sense that we did not have access to data representing parcels that were redeveloped between the original year built and the census year used for the analysis.

2.4.2 Historical infrastructure allocation

In order to perform longitudinal analysis of infrastructure data stored in the GIS, it is necessary to have GIS data sets that include variables that represent when a feature of infrastructure was created or installed. Most of the GIS data sets do not include these variables, with a few notable exceptions, that can be used for historical analysis: Parks (definitely), Police and Fire Department calls (possibly–depending on how far back the data go), Animal Control, and Code Compliance (same caveats as for PD and FD calls).

For these analyses, infrastructure data were selected to match the year of the census data, such that the GIS data selection represented those infrastructure features that existed at the time of the census. The infrastructure data were then overlain on the census data to generate tables in support of statistical analysis comparing potential infrastructure accessibility and demographic patterns.

2.4.2.1 Parks

Historical analysis of parks was done using two separate methods. For both methods, two runs were made, (1) incuding all parks, and (2) excluding any parks that had received any private funding:

  1. CHESTERLEY PARK
  2. FRANKLIN PARK & POOL
  3. HARMAN CENTER AT GAILLEON PARK
  4. KIWANIS PARK & GATEWAY SPORTS COMPLEX
  5. LARSON PARK
  6. MILLER PARK
  7. NORTH 44TH AVE PARK
  8. RANDALL PARK
  9. ROSALMA GARDEN CLUB PARK
  10. SOUTHEAST COMMUNITY PARK
2.4.2.1.1 Overlay of parks with census tracts

For both data sets, years were matched (e.g., for the 1980 census, only those parks that existed in 1980 were selected). A GIS intersection was performed to tabulate the total area of parks within each census tract. Demographic characteristics of the tract and the area of parks within the tract were graphed as scatter plots.

2.4.2.1.2 Buffer of park polygons by 1/4 mile

Buffers of 1/4 mile, as a proxy for locations within reasonable walking distance, were generated for the parks polygons; these buffers were then overlain on the census tracts to obtain estimated demographic counts within and outside the buffers. The relative proportion of persons in each demographic category were tabulated using the same year-to-year matching.

2.4.2.1.3 Stratification by 16th Avenue

Total area of parks per capita was tabulated for each year with stratification by 16th Avenue.

2.4.3 Current infrastructure/services allocation

Most of the GIS data sets are not encoded for historical analysis. To be used for historical analysis, each feature in each layer would require an attribute value representing the year in which the infrastructure feature was installed or created (this is not to be confused with the date in which a feature was added to a GIS layer). This is not a requirement for urban GIS, which typically is updated with new data to create a layer representing current conditions. Having historical data in the YC GIS would have required a decision to be made at the system’s initiation to store these dates.

2.4.3.1 Public safety calls for service

Yakima Fire and Police Department calls for service were tallied at the census tract level, normalized by the census count of persons per tract (i.e., resulting in the number of calls per person). Response times were not provided and were not available for any of the public safety calls for service.

2.4.3.2 Street lights

Street lights were tallied at the census tract level, normalized by the tract area (i.e., resulting in the number of street lights per unit area).

2.4.3.3 Code compliance requests

Code compliance requests were treated in the same manner as public safety calls (i.e., calls per capita).

2.4.3.4 Transit

Transit data were analyzed similarly, using density of counts of ridership, benches, and shelters per capita.

3 Results

Results are presented for each selected GIS data layer and demographic variable.

3.1 Census tract population

To aid in interpretation of the demographic graphs, the following table and set of maps enumerate census tracts with population (in the table) and tract IDs (in the table and on the maps). The tract IDs can be used to cross-reference the graphs and the maps. It should be noted that some tract IDs changed over time, such as tract 900 being split to 901 and 902 after the year 2000, and some tracts had no overlap with the city limits in earlier years (e.g., 2802).

censuspop <- dbGetQuery(conn = us_census, statement = "with 
y1980 as (select right(tract::text, 4)::int as tract, round(sum(total_pop)::numeric,0) as pop_1980 from yakima_equity.tract_1980 group by tract)
, y1990 as (select right(tract::text, 4)::int as tract, round(sum(total_pop)::numeric,0) as pop_1990 from yakima_equity.tract_1990 group by tract)
, y2000 as (select right(tract::text, 4)::int as tract, round(sum(total_pop)::numeric,0) as pop_2000 from yakima_equity.tract_2000 group by tract)
, y2010 as (select right(tract::text, 4)::int as tract, round(sum(total_pop)::numeric,0) as pop_2010 from yakima_equity.tract_2010 group by tract)
, y2015 as (select right(tract::text, 4)::int as tract, round(sum(total_pop)::numeric,0) as pop_2015 from yakima_equity.tract_2015 group by tract)
, f as (select * from y1980 full join y1990 using (tract) full join y2000 using (tract) full join y2010 using (tract) full join y2015 using (tract))
select tract::numeric, pop_1980, pop_1990, pop_2000, pop_2010, pop_2015 from f order by tract;")

census_descr_cap <- table_nums(name="census_descr_cap", caption = "Estimated census tract population over time")
kable(censuspop, format = "html", caption = census_descr_cap)
Table 2: Estimated census tract population over time
tract pop_1980 pop_1990 pop_2000 pop_2010 pop_2015
100 2113 2426 2778 3356 3094
200 3221 3665 5374 5787 5996
300 232 493 3905 4172 4216
400 1635 1719 2758 5011 5216
500 3327 3730 5011 5811 5141
600 3829 4566 6485 6866 7743
700 5995 6447 6684 6477 7520
800 5086 4822 4614 4398 4441
900 1325 1830 2596 NA NA
901 NA NA NA 7504 7464
902 NA NA NA 3341 4110
1000 5459 5689 5725 5541 5516
1100 2244 2749 4065 4460 4691
1200 5932 6509 9048 NA NA
1201 NA NA NA 3712 3859
1202 NA NA NA 6415 6344
1300 187 178 197 219 194
1400 630 591 660 674 726
1500 2841 6583 8380 NA NA
1501 NA NA NA 6132 7569
1502 NA NA NA 2616 2944
1600 42 154 215 NA NA
1602 NA NA NA 764 791
1702 NA NA NA 64 62
2802 NA NA NA 1784 1903
3100 NA NA 123 NA NA
3400 NA NA NA 182 212

3.1.1 Census tract maps

The set of Census tract maps display the census tracts with census tract identifier and “e” or “w” based on the 16th Avenue dividing line. These maps, along with the tables of Census tract demographic aggregates should be helpful in interpreting the scatter plots, which include text labels showing the tract identifiers. It should be noted that 16th Avenue divides some tracts; for those tracts that span 16th Avenue, there will be two data points on the map, each with an area-weighted estimate of the proportion of the both X and Y variables.

include_graphics(tractmaps)

3.2 Historical demograpics

3.2.1 City of Yakima as a whole

The graphs show in bar graphs the same set of demographic variables over time that were shown in the maps. However, the graphs show aggregate values for the entire city in this section and . Bars represent numerical quantities (i.e., percent of population, USD, year) on the y-axis, and years are indexed on the x-axis.

# Creates materialized views of median property value and mean year built per tract bsaed on parcel level data
sql <- "drop materialized view if exists yakima_equity.tract_propval_xxxYEARxxx cascade;
create materialized view yakima_equity.tract_propval_xxxYEARxxx as
with
--tracts
t as (select * from yakima_equity.tract_xxxYEARxxx)
--parcels
, p as (select tract, mkt_land + mkt_impvt as propvalue, year_blt::integer from yakima_equity.parcels as p, t where st_intersects(st_centroid(p.the_geom_2286), t.the_geom_2286) and year_blt::integer <= xxxYEARxxx and mkt_land > 0 and mkt_impvt > 0)
--median value and mean year built
, m as (select tract, PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY propvalue)::integer AS median_propvalue, avg(year_blt)::integer as mean_year_built from p group by tract)
--join back to tract
, j as (select row_number() over() as gid, tract, St_Multi(the_geom_2286)::geometry(MultiPolygon, 2286) as geom, median_propvalue, mean_year_built from t join m using(tract))
select * from j;"

for(i in years){
    sqli <- gsub("xxxYEARxxx", i, sql)
    O <- dbGetQuery(conn = dbconn, statement = sqli)
}

citysum <- ycsum.all(zone = FALSE)
# column names to titles
citysum <- sqldf("select year, pct_age_lt_18, pct_age_ge_65, median_hh_income, median_family_income, pct_nonwhite, pct_hispanic, pct_edu_coll_grad, pct_pov_below_100pct, pct_married, pct_housingunits_renter, propvalue, year_built from citysum")
titles <- c("year", "percent age < 18", "percent age >= 65", "median household income", "median family income", "percent nonwhite", "percent Hispanic", "percent college graduate", "percent below poverty", "percent married", "percent of housing units renter occupied", "property value", "mean year built")
for(i in 2:(ncol(citysum) - 2)){
    colname <- colnames(citysum)[i]
    title <- titles[i]    
    ylab <- switch(substr(colname, 1, 3),
        med="$",
        pct="percent",
        pro="$",
        yea="year")
    
    g <- ggplot(data = citysum, aes(factor(year), citysum[,i])) + geom_bar(stat = "identity") + labs(x="year", y=ylab, title=title) +
    theme(plot.title = element_text(size=12))
        
    if(colname=='year_built'){
        g <- g + coord_cartesian(ylim = c(1940, 1970))
    }
    if(colname=='propvalue'){
        g <- g + coord_cartesian(ylim = c(150000, 250000))
    }
    print(g)
    ggsave(filename = file.path(imagedir, sprintf("%s.png", colname)), plot = g, width = ggsavewidth, height = ggsaveheight, units = "in")
}

3.2.2 Sixteenth Avenue stratification

This section includes the same set of graphs as above, but stratified by 16th Avenue. Bars in one color represent the quantities on the east side, and the other color represents quantities on the west side.

ewsum <- ycsum.all(zone = TRUE)
ewsum <- sqldf("select zone_16th, year, pct_age_lt_18, pct_age_ge_65, median_hh_income, median_family_income, pct_nonwhite, pct_hispanic, pct_edu_coll_grad, pct_pov_below_100pct, pct_married, pct_housingunits_renter, propvalue, year_built from ewsum")
titles <- c("zone", "year", "percent < 18 years", "percent >= 65 years", "median household income", "median family income", "percent nonwhite", "percent Hispanic", "percent college graduate", "percent below poverty", "percent married", "percent of housing units renter occupied", "property value", "year built")
for(i in 3:(ncol(ewsum)-2)){
    colname <- colnames(ewsum)[i]
    title <- titles[i]
    ylab <- switch(substr(colname, 1, 3),
    med="$",
    pct="percent",
    pro="$",
    yea="year")
    
    g <- ggplot(data = ewsum, aes(factor(year), ewsum[,i], fill=zone_16th)) + geom_bar(stat = "identity", position = "dodge") + labs(x="year", y=ylab, title=title, fill="16th Ave divide") + 
    theme(plot.title = element_text(size=12))
        
    if(colname=='year_built'){
        g <- g + coord_cartesian(ylim = c(1940, 1970))
    }
    if(colname=='propvalue'){
        g <- g + coord_cartesian(ylim = c(150000, 250000))
    }
    print(g)
    ggsave(filename = file.path(imagedir, sprintf("%s_16th.png", colname)), plot = g, width = ggsavewidth, height = ggsaveheight, units = "in")

}

The set of demographic characteristic maps show changes in the values of demographic values over time.

include_graphics(othermaps)

3.3 Historical infrastructure/services allocation

ntracts80 <- dbGetQuery(conn = dbconn, statement = "with a as (select * from yakima_equity.tract_1980)
    , b as (select st_union(the_geom_2286) as geom from yakima_equity.annexations where extract('year' from annex_date) <= 1980)
    select count(a.*) from a, b where st_intersects(a.the_geom_2286, b.geom);")$count

ntractswithpark1980_all <- dbGetQuery(conn = dbconn, statement = "with a as (select * from yakima_equity.tract_1980)
    , b as (select st_union(the_geom_2286) as geom from yakima_equity.annexations where extract('year' from annex_date) <= 1980)
    , prp as (select * from yakima_equity.park_private)
    , p0 as (select * from yakima_equity.parksvalid where yearcreated <= 1980 and yearcreated is not null)
    , p as (select name, the_geom_2286, st_area(the_geom_2286) as park_area_orig from p0)
    , ai as (select a.* from a, b where st_intersects(a.the_geom_2286, b.geom))
    , f as (select * from ai, p where st_intersects(ai.the_geom_2286, p.the_geom_2286))
    select count(distinct tract) from f;")$count

ntractswithpark1980 <- dbGetQuery(conn = dbconn, statement = "with a as (select * from yakima_equity.tract_1980)
    , b as (select st_union(the_geom_2286) as geom from yakima_equity.annexations where extract('year' from annex_date) <= 1980)
    , prp as (select * from yakima_equity.park_private)
    , p0 as (select * from yakima_equity.parksvalid where yearcreated <= 1980 and yearcreated is not null and name not in (select name from prp))
    , p as (select name, the_geom_2286, st_area(the_geom_2286) as park_area_orig from p0)
    , ai as (select a.* from a, b where st_intersects(a.the_geom_2286, b.geom))
    , f as (select * from ai, p where st_intersects(ai.the_geom_2286, p.the_geom_2286))
    select count(distinct tract) from f;")$count

ntracts2015 <- dbGetQuery(conn = dbconn, statement = "with a as (select * from yakima_equity.tract_2015)
    , b as (select st_union(the_geom_2286) as geom from yakima_equity.annexations where extract('year' from annex_date) <= 2015)
    select count(a.*) from a, b where st_intersects(a.the_geom_2286, b.geom);")$count

ntractswithpark2015_all <- dbGetQuery(conn = dbconn, statement = "with a as (select * from yakima_equity.tract_2015)
    , b as (select st_union(the_geom_2286) as geom from yakima_equity.annexations where extract('year' from annex_date) <= 2015)
    , prp as (select * from yakima_equity.park_private)
    , p0 as (select * from yakima_equity.parksvalid where yearcreated <= 1980 and yearcreated is not null)
    , p as (select name, the_geom_2286, st_area(the_geom_2286) as park_area_orig from p0)
    , ai as (select a.* from a, b where st_intersects(a.the_geom_2286, b.geom))
    , f as (select * from ai, p where st_intersects(ai.the_geom_2286, p.the_geom_2286))
    select count(distinct tract) from f;")$count

ntractswithpark2015 <- dbGetQuery(conn = dbconn, statement = "with a as (select * from yakima_equity.tract_2015)
    , b as (select st_union(the_geom_2286) as geom from yakima_equity.annexations where extract('year' from annex_date) <= 2015)
    , prp as (select * from yakima_equity.park_private)
    , p0 as (select * from yakima_equity.parksvalid where yearcreated <= 1980 and yearcreated is not null and name not in (select name from prp))
    , p as (select name, the_geom_2286, st_area(the_geom_2286) as park_area_orig from p0)
    , ai as (select a.* from a, b where st_intersects(a.the_geom_2286, b.geom))
    , f as (select * from ai, p where st_intersects(ai.the_geom_2286, p.the_geom_2286))
    select count(distinct tract) from f;")$count

A set of graphs is shown below; each graph displays the data point along with the tract numeric identifier.

These graphs should be interpreted with caution due to implicit assumptions/limitations:

  1. The data shown were analyzed using census tracts. Use of census tracts for area-weighted summary assumes that the census population estimates are correct, and that populations are uniformly distributed and uniformly heterogeneous across the tract. It also assumes that all persons residing within the tract have equal access to the parks within the tract, and that persons residing in one tract have access to only those parks within that tract.
  2. The data are quite sparse; there were only 17 census tracts in 1980 and 24 tracts in 2015; only 10 of those in 1980 (and 12 in 2015) had parks overlapping the tract boundary. Because of the small sample sizes, no formal statistical tests were possible. Also, because of the small sample sizes, “outlier” points could have a strong leverage effect on any observed trends, so regression trend lines were not added to the graphs.

3.3.1 Parks

Parks are shown in the map:

include_graphics(parkmaps)

3.3.1.1 Area of parks in Yakima stratified by 16th Avenue

The graph shows the area of park per capita of tract across years and stratified by the 16th Avenue dividing line. There is more area in parks per capita west of 16th Avenue than east, a trend that continued over time; however, the relative difference lessened over time.

sql <- "
with
--parks
p0 as (select * from yakima_equity.parksvalid where yearcreated <= xxxYEARxxx)
, p as (select row_number() over() as pid, the_geom_2286 from p0)
--annexation as single poly
, anx as (select st_union(the_geom_2286) as geom from yakima_equity.annexations where extract('year' from annex_date) <= xxxYEARxxx)
--census tracts
, ct as (select tract, zone_16th, st_area(the_geom_2286) as area_ct, total_pop, the_geom_2286 from yakima_equity.tract_xxxYEARxxx)
--sum population e/w
, popew_sum as (select zone_16th, sum(total_pop) as pop from ct group by zone_16th)
--parks east west
, pew as (select zone_16th, pid, st_area(st_intersection(p.the_geom_2286, ct.the_geom_2286)) as area_park from p, ct where st_intersects(p.the_geom_2286, ct.the_geom_2286))
--sum park area e/w
, pew_sum as (select zone_16th, sum(area_park) as area_park from pew group by zone_16th)
select 'xxxYEARxxx'::text as year, *, area_park / pop as area_park_per_capita from pew_sum join popew_sum using(zone_16th)
;"

parkew <- NULL
for(y in c(1980, 1990, 2000, 2010, 2015)){
    sqly <- gsub("xxxYEARxxx", y, sql)
    parkew <- rbind(parkew, dbGetQuery(conn = dbconn, statement = sqly))
}

# plot
g <- ggplot(data = parkew, aes(x = year, y = area_park_per_capita, fill = zone_16th)) + 
    geom_bar(position = "dodge", stat = "identity") + 
    labs(y = "area (ft)", title = "Area of parks per capita stratified by 16th Avenue", fill = "16th Ave divide") +
    theme(plot.title = element_text(size=12))
    
print(g)

ggsave(filename = file.path(imagedir, "parkarea_16th.png"), plot = g, width = ggsavewidth, height = ggsaveheight, units = "in")

3.3.1.2 Park per capita area by census tract demographics

These graphs present demographic variables of interest on the X-axis and per-capita area of parks on the Y-axis, time-matched by year. Tracts were stratified by the 16th Avenue dividing line. The number of tracts per east/west group is shown in the legend.

It should be noted that for there is one census tract with a large per-capita area across each year; therefore a second set of graphs was prepared excluding the point with the greatest value in each year.

The observed stratification in the X-axis reflects general sociodemographic differences between eastern and western Yakima. The other trend seems to be that the amount of park area per capita was greater for western Yakima in 1980, but as the City grew over subsequent years, the area of park per capita became much more uniform across the 16th Avenue dividing line.

3.3.1.2.1 All tracts
3.3.1.2.1.1 Parks with no private funding
sqlbase <- "
with
--parks
prp as (select * from yakima_equity.park_private)
, p0 as (select * from yakima_equity.parksvalid where yearcreated <= xxxYEARxxx and name not in (select name from prp))
, p as (select the_geom_2286, st_area(the_geom_2286) as park_area_orig from p0)
--census tracts
, ct as (select tract
    , zone_16th
    , the_geom_2286
    , total_pop
    , age_lt_18 / total_pop::numeric * 100 as pct_lt_18
    , age_ge_65 / total_pop::numeric * 100 as pct_ge_65
    , median_hh_income
    , median_family_income
    , eth_hispanic / total_pop::numeric * 100 as pct_hispanic
    , race_nonwhite / total_pop::numeric * 100 as pct_nonwhite
    , case when (edu_collgrad + edu_9th_somecoll + edu_lt_9th) = 0 then 0 else edu_collgrad::numeric / (edu_collgrad + edu_9th_somecoll + edu_lt_9th) * 100 end as pct_collgrad
    , case when pov_status = 0 then 0 else pov_below_100pct / pov_status::numeric * 100 end as pct_below_poverty
    , case when married + unmarried = 0 then 0 else married / (married + unmarried)::numeric * 100 end as pct_married
    , case when housingunits_renter + housingunits_owner = 0 then 0 else housingunits_renter / (housingunits_renter + housingunits_owner)::numeric * 100 end as pct_renter
    from yakima_equity.tract_xxxYEARxxx)
--intersection
, i as (select tract, zone_16th, park_area_orig, st_intersection(p.the_geom_2286, ct.the_geom_2286) as geom_clip from p, ct where st_intersects(p.the_geom_2286, ct.the_geom_2286))
--area 
, a as (select tract, sum(st_area(geom_clip)) as area_ft from i group by tract order by tract)
--join
, j as (select tract, coalesce(area_ft, 0) as area_ft
    , zone_16th
    , total_pop
    , coalesce(area_ft / total_pop, 0) as sqft_per_person
    , pct_lt_18
    , pct_ge_65
    , median_hh_income
    , median_family_income
    , pct_hispanic
    , pct_nonwhite
    , pct_nonwhite
    , pct_collgrad
    , pct_below_poverty
    , pct_married
    , pct_renter
from a full join ct using(tract) order by tract)
select * from j where area_ft > 0;"

#for(y in c(2010)){
for(y in c(1980, 1990, 2000, 2010, 2015)){
#message(y)
    sqly <- gsub("xxxYEARxxx", y, sqlbase)
    
    assign("sqly", sqly, envir = .GlobalEnv)
    
    x <- dbGetQuery(conn = dbconn, statement = sqly)
    
    xvars <- list(
        c('pct_lt_18', 'percent < 18 years'),
        c('pct_ge_65', 'percent >= 65 years'),
        c('median_hh_income', 'median household income'),
        c('median_family_income', 'median family income'),
        c('pct_hispanic', 'percent Hispanic'),
        c('pct_nonwhite', 'percent non-White'),
        c('pct_collgrad', 'percent college graduate'),
        c('pct_below_poverty', 'percent below poverty'),
        c('pct_married', 'percent married'),
        c('pct_renter', 'percent of housing renter-occupied'))

    
    for(i in xvars){
    #message(i)
    mytitle <- sprintf("%s: park (no private) area per capita x %s", y, i[2])
    xlab <- gsub("income", "income ($)", gsub("pct_", "percent", i[2]))
    ylab <- "area (ft)"
    g <- corplotgg_fill(dat = x, xvar = i[1], yvar = 'sqft_per_person', fillvar = "zone_16th", mytitle = mytitle, xlab = xlab, ylab = ylab, legend_title = "16th Ave divide")
    print(g)
    ggsave(filename = file.path(imagedir, sprintf("%s_park_noprivate_%s.png", y, i[2])), width = ggsavewidth, height = ggsaveheight)
    }
}

3.3.1.2.1.2 All parks
sqlbase <- "
with
--parks
prp as (select * from yakima_equity.park_private)
, p0 as (select * from yakima_equity.parksvalid where yearcreated <= xxxYEARxxx)
, p as (select the_geom_2286, st_area(the_geom_2286) as park_area_orig from p0)
--census tracts
, ct as (select tract
    , zone_16th
    , the_geom_2286
    , total_pop
    , age_lt_18 / total_pop::numeric * 100 as pct_lt_18
    , age_ge_65 / total_pop::numeric * 100 as pct_ge_65
    , median_hh_income
    , median_family_income
    , eth_hispanic / total_pop::numeric * 100 as pct_hispanic
    , race_nonwhite / total_pop::numeric * 100 as pct_nonwhite
    , case when (edu_collgrad + edu_9th_somecoll + edu_lt_9th) = 0 then 0 else edu_collgrad::numeric / (edu_collgrad + edu_9th_somecoll + edu_lt_9th) * 100 end as pct_collgrad
    , case when pov_status = 0 then 0 else pov_below_100pct / pov_status::numeric * 100 end as pct_below_poverty
    , case when married + unmarried = 0 then 0 else married / (married + unmarried)::numeric * 100 end as pct_married
    , case when housingunits_renter + housingunits_owner = 0 then 0 else housingunits_renter / (housingunits_renter + housingunits_owner)::numeric * 100 end as pct_renter
    from yakima_equity.tract_xxxYEARxxx)
--intersection
, i as (select tract, zone_16th, park_area_orig, st_intersection(p.the_geom_2286, ct.the_geom_2286) as geom_clip from p, ct where st_intersects(p.the_geom_2286, ct.the_geom_2286))
--area 
, a as (select tract, sum(st_area(geom_clip)) as area_ft from i group by tract order by tract)
--join
, j as (select tract, coalesce(area_ft, 0) as area_ft
    , zone_16th
    , total_pop
    , coalesce(area_ft / total_pop, 0) as sqft_per_person
    , pct_lt_18
    , pct_ge_65
    , median_hh_income
    , median_family_income
    , pct_hispanic
    , pct_nonwhite
    , pct_nonwhite
    , pct_collgrad
    , pct_below_poverty
    , pct_married
    , pct_renter
from a full join ct using(tract) order by tract)
select * from j where area_ft > 0;"

#for(y in c(2010)){
for(y in c(1980, 1990, 2000, 2010, 2015)){
#message(y)
    sqly <- gsub("xxxYEARxxx", y, sqlbase)
    
    assign("sqly", sqly, envir = .GlobalEnv)
    
    x <- dbGetQuery(conn = dbconn, statement = sqly)
    
    xvars <- list(
        c('pct_lt_18', 'percent < 18 years'),
        c('pct_ge_65', 'percent >= 65 years'),
        c('median_hh_income', 'median household income'),
        c('median_family_income', 'median family income'),
        c('pct_hispanic', 'percent Hispanic'),
        c('pct_nonwhite', 'percent non-White'),
        c('pct_collgrad', 'percent college graduate'),
        c('pct_below_poverty', 'percent below poverty'),
        c('pct_married', 'percent married'),
        c('pct_renter', 'percent of housing renter-occupied'))

    
    for(i in xvars){
    #message(i)
    mytitle <- sprintf("%s: park area (all) per capita x %s", y, i[2])
    xlab <- gsub("income", "income ($)", gsub("pct_", "percent", i[2]))
    ylab <- "area (ft)"
    g <- corplotgg_fill(dat = x, xvar = i[1], yvar = 'sqft_per_person', fillvar = "zone_16th", mytitle = mytitle, xlab = xlab, ylab = ylab, legend_title = "16th Ave divide")
    print(g)
    ggsave(filename = file.path(imagedir, sprintf("%s_park_all_%s.png", y, i[2])), width = ggsavewidth, height = ggsaveheight)
    }
}

3.3.1.2.2 Tracts with “outlier” points dropped
#for(y in c(2010)){
for(y in c(1980, 1990, 2000, 2010, 2015)){
#message(y)
    sqly <- gsub("xxxYEARxxx", y, sqlbase)
    
    assign("sqly", sqly, envir = .GlobalEnv)
    
    x <- dbGetQuery(conn = dbconn, statement = sqly)
    
    # drop outlier
    x <- x[x$sqft_per_person < max(x$sqft_per_person),]
    
    xvars <- list(
        c('pct_lt_18', 'percent < 18 years'),
        c('pct_ge_65', 'percent >= 65 years'),
        c('median_hh_income', 'median household income'),
        c('median_family_income', 'median family income'),
        c('pct_hispanic', 'percent Hispanic'),
        c('pct_nonwhite', 'percent non-White'),
        c('pct_collgrad', 'percent college graduate'),
        c('pct_below_poverty', 'percent below poverty'),
        c('pct_married', 'percent married'),
        c('pct_renter', 'percent of housing renter-occupied'))

    
    for(i in xvars){
    #message(i)
    mytitle <- sprintf("%s: park area per capita x %s", y, i[2])
    xlab <- gsub("income", "income ($)", gsub("pct_", "percent", i[2]))
    ylab <- "area (ft)"
    g <- corplotgg_fill(dat = x, xvar = i[1], yvar = 'sqft_per_person', fillvar = "zone_16th", mytitle = mytitle, xlab = xlab, ylab = ylab, legend_title = "16th Ave divide")
    print(g)
    ggsave(filename = file.path(imagedir, sprintf("%s_park_dropoutlier_%s.png", y, i[2])), width = ggsavewidth, height = ggsaveheight)
    }
}

3.3.1.3 Buffer of park polygons by 1/4 mile

The graphs show the estimate of the relative percent of persons in specified demographic groups residing within 1/4 mile of any park for each of the years 1980, 1990, 2000, 2010, and 2015. While this may serve as a rough measure of “accessibility,” it does not consider park count, size, quality, or amenities.

Parks with no private funding

sql <- "
with
--parks
prp as (select * from yakima_equity.park_private)
, p0 as (select * from yakima_equity.parksvalid where yearcreated <= xxxYEARxxx and name not in (select name from prp))
, p1 as (select the_geom_2286, st_area(the_geom_2286) as park_area_orig from p0)
--park buffer
, pb as (select true as buffer, st_union(st_buffer(the_geom_2286, 5280/4)) as p_geom from p1)
--annexation
, anx as (select st_union(the_geom_2286) as geom from yakima_equity.annexations where extract('year' from annex_date) <= xxxYEARxxx)
--non-buffer
, pnb as (select false as buffer, st_difference(anx.geom, pb.p_geom) as p_geom from pb, anx where st_intersects(anx.geom, pb.p_geom))
--combined
, p as (select * from pb union all select * from pnb)
--census tracts
, ct as (select st_area(the_geom_2286) as area_orig, * from yakima_equity.tract_xxxYEARxxx)
, cta as (select
    tract
    , total_pop
    , age_lt_18
    , age_lt_65
    , age_ge_65
    , median_hh_income
    , median_family_income    
    , eth_hispanic
    , race_nonwhite
    , edu_collgrad
    , edu_9th_somecoll
    , edu_lt_9th
    , pov_status
    , pov_below_100pct
    , married
    , unmarried
    , housingunits_renter
    , housingunits_owner 
    from ct)
--clipped by anx
, ctanx as (select area_orig
    , st_intersection(ct.the_geom_2286, anx.geom) as ctanx_geom
    , tract
    from ct, anx where st_intersects(ct.the_geom_2286, anx.geom))
--intersection of ctanx and parkbuf
, i as (select 
    st_area(st_intersection(ctanx.ctanx_geom, p.p_geom)) as area_clip
    , *
    from p, ctanx where st_intersects(p.p_geom, ctanx.ctanx_geom))
--ratio
, ir as (select tract, buffer, area_clip / area_orig as area_prop from i)
--proportional counts insid vs outside buffer
, props as (select tract, buffer
    , total_pop * area_prop as total_pop
    , age_lt_18 * area_prop as age_lt_18
    , age_lt_65 * area_prop as age_lt_65
    , age_ge_65 * area_prop as age_ge_65
    , median_hh_income
    , median_family_income
    , eth_hispanic * area_prop as eth_hispanic
    , race_nonwhite * area_prop as race_nonwhite
    , edu_collgrad * area_prop as edu_collgrad
    , edu_9th_somecoll * area_prop as edu_9th_somecoll
    , edu_lt_9th * area_prop as edu_lt_9th
    , pov_status * area_prop as pov_status
    , pov_below_100pct * area_prop as pov_below_100pct
    , married * area_prop as married
    , unmarried * area_prop as unmarried
    , housingunits_renter * area_prop as housingunits_renter
    , housingunits_owner  * area_prop as housingunits_owner 
     from ir left join cta using(tract))
 --sum insie vs outside buffer
 , ssum as (select
    buffer
    , sum(total_pop) as total_pop
    , sum(eth_hispanic) as eth_hispanic
    , sum(race_nonwhite) as race_nonwhite
    , sum(age_lt_18) as age_lt_18
    , sum(age_lt_65) as age_lt_65
    , sum(age_ge_65) as age_ge_65
    , sum(edu_collgrad) as edu_collgrad
    , sum(edu_9th_somecoll) as edu_9th_somecoll
    , sum(edu_lt_9th) as edu_lt_9th
    , sum(pov_status) as pov_status
    , sum(pov_below_100pct) as pov_below_100pct
    , sum(married) as married
    , sum(unmarried) as unmarried
    , sum(housingunits_renter) as housingunits_renter
    , sum(housingunits_owner ) as housingunits_owner 
    from props
    group by buffer)
--proportion with access
, paxs as (select xxxYEARxxx::integer as year
    , buffer
    , 100 * total_pop / sum(total_pop) over() as total_pop
    , 100 * eth_hispanic / sum(eth_hispanic) over() as eth_hispanic
    , 100 * race_nonwhite / sum(race_nonwhite) over() as race_nonwhite
    , 100 * age_lt_18 / sum(age_lt_18) over() as age_lt_18
    , 100 * age_lt_65 / sum(age_lt_65) over() as age_lt_65
    , 100 * age_ge_65 / sum(age_ge_65) over() as age_ge_65
    , 100 * edu_collgrad / sum(edu_collgrad) over() as edu_collgrad
    , 100 * edu_9th_somecoll / sum(edu_9th_somecoll) over() as edu_9th_somecoll
    , 100 * edu_lt_9th / sum(edu_lt_9th) over() as edu_lt_9th
    , 100 * pov_status / sum(pov_status) over() as pov_status
    , 100 * pov_below_100pct / sum(pov_below_100pct) over() as pov_below_100pct
    , 100 * married / sum(married) over() as married
    , 100 * unmarried / sum(unmarried) over() as unmarried
    , 100 * housingunits_renter / sum(housingunits_renter) over() as housingunits_renter
    , 100 * housingunits_owner  / sum(housingunits_owner ) over() as housingunits_owner 
    from ssum)
select * from paxs"

ydat <- NULL
for(y in c(1980, 1990, 2000, 2010, 2015)){
    sqly <- gsub("xxxYEARxxx", y, sql)
    ydat <- rbind(ydat, dbGetQuery(conn = dbconn, statement = sqly))
}

# drop some columns, rename some.
ydat$pov_status <- ydat$unmarried <- ydat$total_pop <- ydat$age_lt_65 <- ydat$edu_9th_somecoll <- ydat$edu_9th_somecoll <- NULL
names(ydat) <- c("year", "buffer", "total population", "Hispanic", "nonwhite", "age < 18", "age >= 65", "college graduate", "below poverty", "married", "renter", "owner")

# melt 
ydatl <- ydatl0 <- melt(ydat, id.vars=c("year","buffer"), value.name = "percent")
# ydatl0$year <- factor(ydatl0$year)
# g <- ggplot(data = ydatl0, aes(x = year, y = percent, fill = buffer)) + 
#     geom_bar(position = "dodge", stat = "identity") + 
#     facet_wrap(~variable, ncol = 2) + 
#     scale_fill_discrete(name="in 1/4 mile buffer") 
# print(g)
# ggsave(filename = file.path(imagedir, "parks_demographics_16th.png"), plot = g, width = ggsavewidth, height = ggsaveheight, units = "in")

ydatl <- ydatl[ydatl$buffer,]
g <- ggplot(data = ydatl, aes(x = year, y = percent)) + 
    geom_line() + 
    facet_wrap(~variable, ncol = 2) +
    labs(title = "percent of persons residing within 1/4 mile of any park (non private)") +
    theme(plot.title = element_text(size=10))

print(g)

ggsave(filename = file.path(imagedir, "parks_noprivate_demographics.png"), plot = g, width = 4, height = 6, units = "in")

All parks

sql <- "
with
--parks
prp as (select * from yakima_equity.park_private)
, p0 as (select * from yakima_equity.parksvalid where yearcreated <= xxxYEARxxx)
, p1 as (select the_geom_2286, st_area(the_geom_2286) as park_area_orig from p0)
--park buffer
, pb as (select true as buffer, st_union(st_buffer(the_geom_2286, 5280/4)) as p_geom from p1)
--annexation
, anx as (select st_union(the_geom_2286) as geom from yakima_equity.annexations where extract('year' from annex_date) <= xxxYEARxxx)
--non-buffer
, pnb as (select false as buffer, st_difference(anx.geom, pb.p_geom) as p_geom from pb, anx where st_intersects(anx.geom, pb.p_geom))
--combined
, p as (select * from pb union all select * from pnb)
--census tracts
, ct as (select st_area(the_geom_2286) as area_orig, * from yakima_equity.tract_xxxYEARxxx)
, cta as (select
    tract
    , total_pop
    , age_lt_18
    , age_lt_65
    , age_ge_65
    , median_hh_income
    , median_family_income    
    , eth_hispanic
    , race_nonwhite
    , edu_collgrad
    , edu_9th_somecoll
    , edu_lt_9th
    , pov_status
    , pov_below_100pct
    , married
    , unmarried
    , housingunits_renter
    , housingunits_owner 
    from ct)
--clipped by anx
, ctanx as (select area_orig
    , st_intersection(ct.the_geom_2286, anx.geom) as ctanx_geom
    , tract
    from ct, anx where st_intersects(ct.the_geom_2286, anx.geom))
--intersection of ctanx and parkbuf
, i as (select 
    st_area(st_intersection(ctanx.ctanx_geom, p.p_geom)) as area_clip
    , *
    from p, ctanx where st_intersects(p.p_geom, ctanx.ctanx_geom))
--ratio
, ir as (select tract, buffer, area_clip / area_orig as area_prop from i)
--proportional counts insid vs outside buffer
, props as (select tract, buffer
    , total_pop * area_prop as total_pop
    , age_lt_18 * area_prop as age_lt_18
    , age_lt_65 * area_prop as age_lt_65
    , age_ge_65 * area_prop as age_ge_65
    , median_hh_income
    , median_family_income
    , eth_hispanic * area_prop as eth_hispanic
    , race_nonwhite * area_prop as race_nonwhite
    , edu_collgrad * area_prop as edu_collgrad
    , edu_9th_somecoll * area_prop as edu_9th_somecoll
    , edu_lt_9th * area_prop as edu_lt_9th
    , pov_status * area_prop as pov_status
    , pov_below_100pct * area_prop as pov_below_100pct
    , married * area_prop as married
    , unmarried * area_prop as unmarried
    , housingunits_renter * area_prop as housingunits_renter
    , housingunits_owner  * area_prop as housingunits_owner 
     from ir left join cta using(tract))
 --sum insie vs outside buffer
 , ssum as (select
    buffer
    , sum(total_pop) as total_pop
    , sum(eth_hispanic) as eth_hispanic
    , sum(race_nonwhite) as race_nonwhite
    , sum(age_lt_18) as age_lt_18
    , sum(age_lt_65) as age_lt_65
    , sum(age_ge_65) as age_ge_65
    , sum(edu_collgrad) as edu_collgrad
    , sum(edu_9th_somecoll) as edu_9th_somecoll
    , sum(edu_lt_9th) as edu_lt_9th
    , sum(pov_status) as pov_status
    , sum(pov_below_100pct) as pov_below_100pct
    , sum(married) as married
    , sum(unmarried) as unmarried
    , sum(housingunits_renter) as housingunits_renter
    , sum(housingunits_owner ) as housingunits_owner 
    from props
    group by buffer)
--proportion with access
, paxs as (select xxxYEARxxx::integer as year
    , buffer
    , 100 * total_pop / sum(total_pop) over() as total_pop
    , 100 * eth_hispanic / sum(eth_hispanic) over() as eth_hispanic
    , 100 * race_nonwhite / sum(race_nonwhite) over() as race_nonwhite
    , 100 * age_lt_18 / sum(age_lt_18) over() as age_lt_18
    , 100 * age_lt_65 / sum(age_lt_65) over() as age_lt_65
    , 100 * age_ge_65 / sum(age_ge_65) over() as age_ge_65
    , 100 * edu_collgrad / sum(edu_collgrad) over() as edu_collgrad
    , 100 * edu_9th_somecoll / sum(edu_9th_somecoll) over() as edu_9th_somecoll
    , 100 * edu_lt_9th / sum(edu_lt_9th) over() as edu_lt_9th
    , 100 * pov_status / sum(pov_status) over() as pov_status
    , 100 * pov_below_100pct / sum(pov_below_100pct) over() as pov_below_100pct
    , 100 * married / sum(married) over() as married
    , 100 * unmarried / sum(unmarried) over() as unmarried
    , 100 * housingunits_renter / sum(housingunits_renter) over() as housingunits_renter
    , 100 * housingunits_owner  / sum(housingunits_owner ) over() as housingunits_owner 
    from ssum)
select * from paxs"

ydat <- NULL
for(y in c(1980, 1990, 2000, 2010, 2015)){
    sqly <- gsub("xxxYEARxxx", y, sql)
    ydat <- rbind(ydat, dbGetQuery(conn = dbconn, statement = sqly))
}

# drop some columns, rename some.
ydat$pov_status <- ydat$unmarried <- ydat$total_pop <- ydat$age_lt_65 <- ydat$edu_9th_somecoll <- ydat$edu_9th_somecoll <- NULL
names(ydat) <- c("year", "buffer", "total population", "Hispanic", "nonwhite", "age < 18", "age >= 65", "college graduate", "below poverty", "married", "renter", "owner")

# melt 
ydatl <- ydatl0 <- melt(ydat, id.vars=c("year","buffer"), value.name = "percent")
# ydatl0$year <- factor(ydatl0$year)
# g <- ggplot(data = ydatl0, aes(x = year, y = percent, fill = buffer)) + 
#     geom_bar(position = "dodge", stat = "identity") + 
#     facet_wrap(~variable, ncol = 2) + 
#     scale_fill_discrete(name="in 1/4 mile buffer") 
# print(g)
# ggsave(filename = file.path(imagedir, "parks_demographics_16th.png"), plot = g, width = ggsavewidth, height = ggsaveheight, units = "in")

ydatl <- ydatl[ydatl$buffer,]
g <- ggplot(data = ydatl, aes(x = year, y = percent)) + 
    geom_line() + 
    facet_wrap(~variable, ncol = 2) +
    labs(title = "percent of persons residing within 1/4 mile of any park (all)") +
    theme(plot.title = element_text(size=10))

print(g)

ggsave(filename = file.path(imagedir, "parks_all_demographics.png"), plot = g, width = 4, height = 6, units = "in")

Furthermore, when stratified by 16th Avenue, there were more persons residing within 1/4 mile of park on the east side than on the west side.

Parks with no private funding

sql <- "
with
--parks
prp as (select * from yakima_equity.park_private)
, p0 as (select * from yakima_equity.parksvalid where yearcreated <= xxxYEARxxx and name not in (select name from prp))
, p1 as (select the_geom_2286, st_area(the_geom_2286) as park_area_orig from p0)
--annexation
, anx as (select st_union(the_geom_2286) as geom from yakima_equity.annexations where extract('year' from annex_date) <= xxxYEARxxx)
--park buffer
, pb0 as (select true as buffer, st_union(st_buffer(the_geom_2286, 5280/4)) as geom from p1)
----clip by annexation
, pb as (select buffer, (st_multi(st_intersection(pb0.geom, anx.geom)))::geometry(multipolygon, 2286) as geom from pb0, anx where st_intersects(pb0.geom, anx.geom))
--e/w
, ew as (select zone_16th, the_geom_2286 as geom from yakima_equity.ew_16th)
--non-buffer
, pnb as (select false as buffer, (st_multi(st_difference(anx.geom, pb.geom)))::geometry(multipolygon, 2286) as geom from pb, anx where st_intersects(anx.geom, pb.geom))
--combined
, pb_pnb as (select * from pb union all select * from pnb)
--overlay e/w    --note use of polygonalintersection function
, pball as (select buffer, zone_16th, polygonalintersection(ew.geom, pb_pnb.geom, 2286) as geom from ew, pb_pnb)
--census tracts
, ct as (select the_geom_2286 as geom, tract::text from yakima_equity.tract_xxxYEARxxx)
--attributes only
, cta as (select
    tract::text
    , st_area(the_geom_2286) as area_orig
    , total_pop
    , age_lt_18
    , age_lt_65
    , age_ge_65
    , median_hh_income
    , median_family_income    
    , eth_hispanic
    , race_nonwhite
    , edu_collgrad
    , edu_9th_somecoll
    , edu_lt_9th
    , pov_status
    , pov_below_100pct
    , married
    , unmarried
    , housingunits_renter
    , housingunits_owner
    from yakima_equity.tract_xxxYEARxxx)
--intersection of ct and pball
, i0 as (select 
    polygonalintersection(ct.geom, pball.geom, 2286) as geom
    , st_area(st_intersection(ct.geom, pball.geom)) as area_int
    , buffer
    , zone_16th
    , tract
    from pball, ct where st_intersects(pball.geom, ct.geom))
, i as (select 
    area_int
    , buffer
    , zone_16th
    , tract::text
    --ratio
    , area_int / area_orig as area_prop
    from i0 join cta using (tract) where area_int > 0)
--proportional counts inside vs outside buffer
, props as (select tract, buffer, zone_16th
    , total_pop * area_prop as total_pop
    , age_lt_18 * area_prop as age_lt_18
    , age_lt_65 * area_prop as age_lt_65
    , age_ge_65 * area_prop as age_ge_65
    , median_hh_income
    , median_family_income
    , eth_hispanic * area_prop as eth_hispanic
    , race_nonwhite * area_prop as race_nonwhite
    , edu_collgrad * area_prop as edu_collgrad
    , edu_9th_somecoll * area_prop as edu_9th_somecoll
    , edu_lt_9th * area_prop as edu_lt_9th
    , pov_status * area_prop as pov_status
    , pov_below_100pct * area_prop as pov_below_100pct
    , married * area_prop as married
    , unmarried * area_prop as unmarried
    , housingunits_renter * area_prop as housingunits_renter
    , housingunits_owner  * area_prop as housingunits_owner 
     from i join cta using(tract))

-- select * from props order by tract, buffer
 --sum insie vs outside buffer
 , ssum as (select
    buffer
    , zone_16th
    , sum(total_pop) as total_pop
    , sum(eth_hispanic) as eth_hispanic
    , sum(race_nonwhite) as race_nonwhite
    , sum(age_lt_18) as age_lt_18
    , sum(age_lt_65) as age_lt_65
    , sum(age_ge_65) as age_ge_65
    , sum(edu_collgrad) as edu_collgrad
    , sum(edu_9th_somecoll) as edu_9th_somecoll
    , sum(edu_lt_9th) as edu_lt_9th
    , sum(pov_status) as pov_status
    , sum(pov_below_100pct) as pov_below_100pct
    , sum(married) as married
    , sum(unmarried) as unmarried
    , sum(housingunits_renter) as housingunits_renter
    , sum(housingunits_owner ) as housingunits_owner 
    from props
    group by buffer, zone_16th)
--proportion with access
, paxs as (select xxxYEARxxx::integer as year
    , buffer
    , zone_16th
    , 100 * total_pop / sum(total_pop) over() as total_pop
    , 100 * eth_hispanic / sum(eth_hispanic) over() as eth_hispanic
    , 100 * race_nonwhite / sum(race_nonwhite) over() as race_nonwhite
    , 100 * age_lt_18 / sum(age_lt_18) over() as age_lt_18
    , 100 * age_lt_65 / sum(age_lt_65) over() as age_lt_65
    , 100 * age_ge_65 / sum(age_ge_65) over() as age_ge_65
    , 100 * edu_collgrad / sum(edu_collgrad) over() as edu_collgrad
    , 100 * edu_9th_somecoll / sum(edu_9th_somecoll) over() as edu_9th_somecoll
    , 100 * edu_lt_9th / sum(edu_lt_9th) over() as edu_lt_9th
    , 100 * pov_status / sum(pov_status) over() as pov_status
    , 100 * pov_below_100pct / sum(pov_below_100pct) over() as pov_below_100pct
    , 100 * married / sum(married) over() as married
    , 100 * unmarried / sum(unmarried) over() as unmarried
    , 100 * housingunits_renter / sum(housingunits_renter) over() as housingunits_renter
    , 100 * housingunits_owner  / sum(housingunits_owner ) over() as housingunits_owner 
    from ssum)
select * from paxs;"

ydat <- NULL
for(y in c(1980, 1990, 2000, 2010, 2015)){
    sqly <- gsub("xxxYEARxxx", y, sql)
    ydat <- rbind(ydat, dbGetQuery(conn = dbconn, statement = sqly))
}

# drop some columns, rename some.
ydat$pov_status <- ydat$unmarried <- ydat$total_pop <- ydat$age_lt_65 <- ydat$edu_9th_somecoll <- ydat$edu_9th_somecoll <- NULL
names(ydat) <- c("year", "buffer", "zone_16th", "total population", "Hispanic", "nonwhite", "age < 18", "age >= 65", "college graduate", "below poverty", "married", "renter", "owner")

# melt 
ydatl <- ydatl0 <- melt(ydat, id.vars=c("year","buffer","zone_16th"), value.name = "percent")
# ydatl0$year <- factor(ydatl0$year)
# g <- ggplot(data = ydatl0, aes(x = year, y = percent, fill = buffer)) + 
#     geom_bar(position = "dodge", stat = "identity") + 
#     facet_wrap(~variable, ncol = 2) + 
#     scale_fill_discrete(name="in 1/4 mile buffer") 
# print(g)
# ggsave(filename = file.path(imagedir, "parks_demographics_16th.png"), plot = g, width = ggsavewidth, height = ggsaveheight, units = "in")

ydatl <- ydatl[ydatl$buffer,]
l <- levels(ydatl$variable)
lx <- gsub(" ", "\n", l)
ydatl$vname <- factor(gsub(" ", "\n", ydatl$variable), levels = lx)
#ydatl$year <- factor(ydatl$year)
g <- ggplot(data = ydatl, aes(x = year, y = percent)) + 
    geom_line() + 
    #facet_wrap(variable ~ zone_16th, ncol = 2) +
    facet_grid(vname ~ zone_16th) + 
    labs(title = "percent of persons residing within 1/4 mile of any park (non private)") +
    theme(plot.title = element_text(size=10))
print(g)

ggsave(filename = file.path(imagedir, "parks_noprivate_demographics_16th.png"), plot = g, width = 4, height = 8, units = "in")

All parks

sql <- "
with
--parks
prp as (select * from yakima_equity.park_private)
, p0 as (select * from yakima_equity.parksvalid where yearcreated <= xxxYEARxxx)
, p1 as (select the_geom_2286, st_area(the_geom_2286) as park_area_orig from p0)
--annexation
, anx as (select st_union(the_geom_2286) as geom from yakima_equity.annexations where extract('year' from annex_date) <= xxxYEARxxx)
--park buffer
, pb0 as (select true as buffer, st_union(st_buffer(the_geom_2286, 5280/4)) as geom from p1)
----clip by annexation
, pb as (select buffer, (st_multi(st_intersection(pb0.geom, anx.geom)))::geometry(multipolygon, 2286) as geom from pb0, anx where st_intersects(pb0.geom, anx.geom))
--e/w
, ew as (select zone_16th, the_geom_2286 as geom from yakima_equity.ew_16th)
--non-buffer
, pnb as (select false as buffer, (st_multi(st_difference(anx.geom, pb.geom)))::geometry(multipolygon, 2286) as geom from pb, anx where st_intersects(anx.geom, pb.geom))
--combined
, pb_pnb as (select * from pb union all select * from pnb)
--overlay e/w    --note use of polygonalintersection function
, pball as (select buffer, zone_16th, polygonalintersection(ew.geom, pb_pnb.geom, 2286) as geom from ew, pb_pnb)
--census tracts
, ct as (select the_geom_2286 as geom, tract::text from yakima_equity.tract_xxxYEARxxx)
--attributes only
, cta as (select
    tract::text
    , st_area(the_geom_2286) as area_orig
    , total_pop
    , age_lt_18
    , age_lt_65
    , age_ge_65
    , median_hh_income
    , median_family_income    
    , eth_hispanic
    , race_nonwhite
    , edu_collgrad
    , edu_9th_somecoll
    , edu_lt_9th
    , pov_status
    , pov_below_100pct
    , married
    , unmarried
    , housingunits_renter
    , housingunits_owner
    from yakima_equity.tract_xxxYEARxxx)
--intersection of ct and pball
, i0 as (select 
    polygonalintersection(ct.geom, pball.geom, 2286) as geom
    , st_area(st_intersection(ct.geom, pball.geom)) as area_int
    , buffer
    , zone_16th
    , tract
    from pball, ct where st_intersects(pball.geom, ct.geom))
, i as (select 
    area_int
    , buffer
    , zone_16th
    , tract::text
    --ratio
    , area_int / area_orig as area_prop
    from i0 join cta using (tract) where area_int > 0)
--proportional counts inside vs outside buffer
, props as (select tract, buffer, zone_16th
    , total_pop * area_prop as total_pop
    , age_lt_18 * area_prop as age_lt_18
    , age_lt_65 * area_prop as age_lt_65
    , age_ge_65 * area_prop as age_ge_65
    , median_hh_income
    , median_family_income
    , eth_hispanic * area_prop as eth_hispanic
    , race_nonwhite * area_prop as race_nonwhite
    , edu_collgrad * area_prop as edu_collgrad
    , edu_9th_somecoll * area_prop as edu_9th_somecoll
    , edu_lt_9th * area_prop as edu_lt_9th
    , pov_status * area_prop as pov_status
    , pov_below_100pct * area_prop as pov_below_100pct
    , married * area_prop as married
    , unmarried * area_prop as unmarried
    , housingunits_renter * area_prop as housingunits_renter
    , housingunits_owner  * area_prop as housingunits_owner 
     from i join cta using(tract))

-- select * from props order by tract, buffer
 --sum insie vs outside buffer
 , ssum as (select
    buffer
    , zone_16th
    , sum(total_pop) as total_pop
    , sum(eth_hispanic) as eth_hispanic
    , sum(race_nonwhite) as race_nonwhite
    , sum(age_lt_18) as age_lt_18
    , sum(age_lt_65) as age_lt_65
    , sum(age_ge_65) as age_ge_65
    , sum(edu_collgrad) as edu_collgrad
    , sum(edu_9th_somecoll) as edu_9th_somecoll
    , sum(edu_lt_9th) as edu_lt_9th
    , sum(pov_status) as pov_status
    , sum(pov_below_100pct) as pov_below_100pct
    , sum(married) as married
    , sum(unmarried) as unmarried
    , sum(housingunits_renter) as housingunits_renter
    , sum(housingunits_owner ) as housingunits_owner 
    from props
    group by buffer, zone_16th)
--proportion with access
, paxs as (select xxxYEARxxx::integer as year
    , buffer
    , zone_16th
    , 100 * total_pop / sum(total_pop) over() as total_pop
    , 100 * eth_hispanic / sum(eth_hispanic) over() as eth_hispanic
    , 100 * race_nonwhite / sum(race_nonwhite) over() as race_nonwhite
    , 100 * age_lt_18 / sum(age_lt_18) over() as age_lt_18
    , 100 * age_lt_65 / sum(age_lt_65) over() as age_lt_65
    , 100 * age_ge_65 / sum(age_ge_65) over() as age_ge_65
    , 100 * edu_collgrad / sum(edu_collgrad) over() as edu_collgrad
    , 100 * edu_9th_somecoll / sum(edu_9th_somecoll) over() as edu_9th_somecoll
    , 100 * edu_lt_9th / sum(edu_lt_9th) over() as edu_lt_9th
    , 100 * pov_status / sum(pov_status) over() as pov_status
    , 100 * pov_below_100pct / sum(pov_below_100pct) over() as pov_below_100pct
    , 100 * married / sum(married) over() as married
    , 100 * unmarried / sum(unmarried) over() as unmarried
    , 100 * housingunits_renter / sum(housingunits_renter) over() as housingunits_renter
    , 100 * housingunits_owner  / sum(housingunits_owner ) over() as housingunits_owner 
    from ssum)
select * from paxs;"

ydat <- NULL
for(y in c(1980, 1990, 2000, 2010, 2015)){
    sqly <- gsub("xxxYEARxxx", y, sql)
    ydat <- rbind(ydat, dbGetQuery(conn = dbconn, statement = sqly))
}

# drop some columns, rename some.
ydat$pov_status <- ydat$unmarried <- ydat$total_pop <- ydat$age_lt_65 <- ydat$edu_9th_somecoll <- ydat$edu_9th_somecoll <- NULL
names(ydat) <- c("year", "buffer", "zone_16th", "total population", "Hispanic", "nonwhite", "age < 18", "age >= 65", "college graduate", "below poverty", "married", "renter", "owner")

# melt 
ydatl <- ydatl0 <- melt(ydat, id.vars=c("year","buffer","zone_16th"), value.name = "percent")
# ydatl0$year <- factor(ydatl0$year)
# g <- ggplot(data = ydatl0, aes(x = year, y = percent, fill = buffer)) + 
#     geom_bar(position = "dodge", stat = "identity") + 
#     facet_wrap(~variable, ncol = 2) + 
#     scale_fill_discrete(name="in 1/4 mile buffer") 
# print(g)
# ggsave(filename = file.path(imagedir, "parks_demographics_16th.png"), plot = g, width = ggsavewidth, height = ggsaveheight, units = "in")

ydatl <- ydatl[ydatl$buffer,]
l <- levels(ydatl$variable)
lx <- gsub(" ", "\n", l)
ydatl$vname <- factor(gsub(" ", "\n", ydatl$variable), levels = lx)
#ydatl$year <- factor(ydatl$year)
g <- ggplot(data = ydatl, aes(x = year, y = percent)) + 
    geom_line() + 
    #facet_wrap(variable ~ zone_16th, ncol = 2) +
    facet_grid(vname ~ zone_16th) + 
    labs(title = "percent of persons residing within 1/4 mile of any park (all)") +
    theme(plot.title = element_text(size=10))
print(g)

ggsave(filename = file.path(imagedir, "parks_all_demographics_16th.png"), plot = g, width = 4, height = 8, units = "in")

3.4 Built environment

for(i in (ncol(citysum) - 1):ncol(citysum)){
    colname <- colnames(citysum)[i]
    title <- titles[i]    
    ylab <- switch(substr(colname, 1, 3),
        med="$",
        pct="percent",
        pro="$",
        yea="year")
    
    g <- ggplot(data = citysum, aes(factor(year), citysum[,i])) + geom_bar(stat = "identity") + labs(x="year", y=ylab, title=title) +
    theme(plot.title = element_text(size=12))
        
    if(colname=='year_built'){
        g <- g + coord_cartesian(ylim = c(1940, 1970))
    }
    if(colname=='propvalue'){
        g <- g + coord_cartesian(ylim = c(150000, 250000))
    }
    print(g)
    ggsave(filename = file.path(imagedir, sprintf("%s.png", colname)), plot = g, width = ggsavewidth, height = ggsaveheight, units = "in")
}

for(i in (ncol(ewsum)-1):ncol(ewsum)){
    colname <- colnames(ewsum)[i]
    title <- titles[i]
    ylab <- switch(substr(colname, 1, 3),
    med="$",
    pct="percent",
    pro="$",
    yea="year")
    
    g <- ggplot(data = ewsum, aes(factor(year), ewsum[,i], fill=zone_16th)) + geom_bar(stat = "identity", position = "dodge") + labs(x="year", y=ylab, title=title, fill="16th Ave divide") + 
    theme(plot.title = element_text(size=12))
        
    if(colname=='year_built'){
        g <- g + coord_cartesian(ylim = c(1940, 1970))
    }
    if(colname=='propvalue'){
        g <- g + coord_cartesian(ylim = c(150000, 250000))
    }
    print(g)
    ggsave(filename = file.path(imagedir, sprintf("%s_16th.png", colname)), plot = g, width = ggsavewidth, height = ggsaveheight, units = "in")

}

The set of built environment maps show changes in median property value and mean year built over time. The property value maps have the median value printed on each tract.

include_graphics(bemaps)

3.5 Current infrastructure allocation

The data in this section represent the most recent GIS data from YC and US Census data from 2015.

3.5.1 Public safety calls for service

It should be noted that no information was provided on response times ,so no analysis was possible. Also, given that there were 140 call types, no analysis was performed on any specific call type.

3.5.1.1 Police Department

sql <- "
    with
    --substitute input table and where clause
    p as (select the_geom_2286, callsforservice_opened as opened, callsforservice_closed as closed, extract(epoch from callsforservice_closed - callsforservice_opened)/60 as response_cycle_minutes from yakima_equity.ykpdcfs)
    --census tracts
    , ct as (select st_area(the_geom_2286) as area_ct, * from yakima_equity.tract_2015)
    --population east vs west
    , tpop as (select zone_16th, sum(total_pop) as total_pop from ct group by zone_16th)
    --intersect points and tracts
    , px as (select opened, closed, response_cycle_minutes, zone_16th from ct, p where st_intersects(p.the_geom_2286, ct.the_geom_2286))
    , px2 as (select * from px join tpop using(zone_16th))
    --select * from f;
    select * from px2"
pdcalls_resp <- dbGetQuery(conn = dbconn, statement = sql)
minpddate <- as.Date(min(pdcalls_resp$opened))
maxpddate <- as.Date(max(pdcalls_resp$closed))

ew_sum_pdcalls <- sqldf("with a as (select zone_16th, total_pop, round(avg(response_cycle_minutes)::numeric, 1) as avg_response_cycle_minutes, round(stddev(response_cycle_minutes)::numeric, 1) as sd_response_cycle_minutes, count(*) from pdcalls_resp group by zone_16th, total_pop order by zone_16th) select *, count / total_pop as calls_per_capita from a;")
pdew <- split(ew_sum_pdcalls, ew_sum_pdcalls$zone_16th)

Count of Police Department calls (n = 534566 from 2010-12-08 to 2017-09-11 were stratified by 16th Avenue as well as overlain on Census tracts. There were 364,166 calls (7.6 per capita) from the area east of 16th Avenue, and 170,400 calls (4.1 per capita) from the west side. There were no apparent patterns with respect to counts of calls per capita and Census demographic characteristics, either as a whole or with stratification by 16th Avenue. There are several census tracts on the east side with quite higher per capita calls, a situation that potentially warrants further investigation.

# count per capita
pdcalls <- ptfcn(pttablename = "ykpdcfs", normvar = "capita", mytitle = "YKPD calls", legend_title = "16th Ave divide")

3.5.1.2 Fire Department

sql <- "
    with
    --substitute input table and where clause
    p as (select the_geom_2286, callsforservice_opened as opened, callsforservice_closed as closed, extract(epoch from callsforservice_closed - callsforservice_opened)/60 as response_cycle_minutes from yakima_equity.ykfdcfs)
    --census tracts
    , ct as (select st_area(the_geom_2286) as area_ct, * from yakima_equity.tract_2015)
    --population east vs west
    , tpop as (select zone_16th, sum(total_pop) as total_pop from ct group by zone_16th)
    --intersect points and tracts
    , px as (select opened, closed, response_cycle_minutes, zone_16th from ct, p where st_intersects(p.the_geom_2286, ct.the_geom_2286))
    , px2 as (select * from px join tpop using(zone_16th))
    --select * from f;
    select * from px2"
fdcalls_resp <- dbGetQuery(conn = dbconn, statement = sql)
minfddate <- as.Date(min(fdcalls_resp$opened))
maxfddate <- as.Date(max(fdcalls_resp$closed))

ew_sum_fdcalls <- sqldf("with a as (select zone_16th, total_pop, round(avg(response_cycle_minutes)::numeric, 1) as avg_response_cycle_minutes, round(stddev(response_cycle_minutes)::numeric, 1) as sd_response_cycle_minutes, count(*) from fdcalls_resp group by zone_16th, total_pop order by zone_16th) select *, count / total_pop as calls_per_capita from a;")
fdew <- split(ew_sum_fdcalls, ew_sum_fdcalls$zone_16th)

Fire Department calls were nearly evenly split, with 50,223 calls (1 per capita) from the area east of 16th Avenue, and 49,332 calls (1.2 per capita) from the west side. Response cycle times were nearly 10 minutes shorter on the east side, with a mean of 37.8 minutes (SD=42.5) versus 48.4 minutes (SD = 58.8) on the west side. There were no apparent patterns with respect to counts of calls per capita and Census demographic characteristics, either as a whole or with stratification by 16th Avenue. However, there is a single tract with > 8 calls per capita, which is twice as high as the next highest tracts; this is a situation that probably warrants further investigation.

fdcalls <- ptfcn(pttablename = "ykfdcfs", normvar = "capita", mytitle = "YKFD calls", legend_title = "16th Ave divide")

3.5.2 Street lights

Street lights

strtlights <- ptfcn(pttablename = "streetlights", normvar = "area", mytitle = "Street lights", legend_title = "16th Ave divide")

3.5.3 Code compliance requests

Counts of code compliance requests per capita

codecompliance <- ptfcn(pttablename = "codecompliance2015", normvar = "capita", mytitle = "Code compliance requests", legend_title = "16th Ave divide")

3.5.4 Transit

3.5.4.1 Ridership

Bus ridership (summed alightings) per capita

riderfcn <- function(pttablename, where = "", normvar, mytitle){
    rdsql <- "
    with
    --substitute input table and where clause
    p as (select * from yakima_equity.ridershipstatistics)
    --census tracts
    , ct as (select st_area(the_geom_2286) as area_ct, * from yakima_equity.tract_2015)
    --intersect points and tracts
    , px as (select tract, zone_16th, alightings from ct, p where st_intersects(p.the_geom_2286, ct.the_geom_2286))
    --jsum by tract
    , pj as (select 
        --normalization
        sum(alightings) / xxxNORMxxx as xxxNORMALIASxxx
        , tract
        , ct.zone_16th
        from px left join ct using(tract)
        group by tract, xxxNORMxxx
        , ct.zone_16th
        ) 
    --join to tract data
     , f as (select pj.*
        , median_hh_income
        , median_family_income
        , age_lt_18 / total_pop::numeric * 100 as pct_age_lt_18
        , age_ge_65 / total_pop::numeric * 100 as pct_age_ge_65
        , eth_hispanic / total_pop::numeric * 100 as pct_hispanic
        , race_nonwhite / total_pop::numeric * 100 as pct_nonwhite
        , case when (edu_collgrad + edu_9th_somecoll + edu_lt_9th) = 0 then 0 else edu_collgrad::numeric / (edu_collgrad + edu_9th_somecoll + edu_lt_9th) * 100 end as pct_collgrad
        , case when pov_status = 0 then 0 else pov_below_100pct / pov_status::numeric * 100 end as pct_below_poverty
        , case when married + unmarried = 0 then 0 else married / (married + unmarried)::numeric * 100 end as pct_married
        , case when housingunits_renter + housingunits_owner = 0 then 0 else housingunits_renter / (housingunits_renter + housingunits_owner)::numeric * 100 end as pct_renter from pj join ct using(tract, zone_16th))
    select * from f;
    "

    
    yvar <- switch(normvar,
                   capita = "count_per_capita",
                   area = "count_per_mi2")
    
    ylab <- switch(normvar,
                   capita = "count per capita",
                   area = "count per square mile") 
    
    normfield <- switch(normvar,
                   capita = "total_pop",
                   area = "(area_ct / 5280^2)") 

    normalias <- switch(normvar,
                    capita = "count_per_capita",
                    area = "count_per_mi2")
    
    mySql <- gsub("xxxPTDATAxxx", pttablename, gsub("xxxNORMxxx", normfield, gsub("xxxNORMALIASxxx", normalias, gsub("xxxWHERExxx", where, rdsql))))
    
    assign("mySQL", mySql, envir = .GlobalEnv)
    
    dat <- dbGetQuery(conn = dbconn, statement = mySql)        
    
    # plot
    xvars <- list(
        c('pct_age_lt_18', 'percent < 18 years'),
        c('pct_age_ge_65', 'percent >= 65 years'),
        c('median_hh_income', 'median household income'),
        c('median_family_income', 'median family income'),
        c('pct_hispanic', 'percent Hispanic'),
        c('pct_nonwhite', 'percent non-White'),
        c('pct_collgrad', 'percent college graduate'),
        c('pct_below_poverty', 'percent below poverty'),
        c('pct_married', 'percent married'),
        c('pct_renter', 'percent of housing renter-occupied'))
    
    for(i in xvars){
        #message(i)
        thetitle <- sprintf("%s x %s", mytitle, i[2])
        xlab <- gsub("income", "income ($)", gsub("pct_", "percent", i[2]))
        g <- corplotgg_fill(dat = dat, xvar = i[1], yvar = yvar, fillvar = "zone_16th", mytitle = thetitle, xlab = xlab, ylab = ylab, legend_title = "16th Ave divide")
        print(g)
        imgfile <- tolower(gsub("x_", "", gsub("_+", "_", gsub(" |[[:punct:]]", "_", thetitle))))
        ggsave(filename = file.path(imagedir, sprintf("%s.png", imgfile)), plot = g, width = ggsavewidth, height = ggsaveheight, units = "in")
    }
}

riderfcn(pttablename = "ridershipstatistics", normvar = "capita", mytitle = "Bus ridership")

Bus ridership normalized by area:

riderfcn(pttablename = "ridershipstatistics", normvar = "area", mytitle = "Bus ridership")

3.5.4.2 Benches

Bus benches per capita

busbenches <- ptfcn(pttablename = "busstops", where = "where bench = 'Y'", normvar = "capita", mytitle = "Bus stop benches", legend_title = "16th Ave divide")

Bus benches normalized by area

busbench_norm <- ptfcn(pttablename = "busstops", where = "where bench = 'Y'", normvar = "area", mytitle = "Bus stop benches", legend_title = "16th Ave divide")

3.5.4.3 Shelters

Bus shelters per capita

busshelter <- ptfcn(pttablename = "busstops", where = "where shelter = 'Y'", normvar = "capita", mytitle = "Bus stop shelters", legend_title = "16th Ave divide")

Bus shelters normalized by area

busshelter_norm <- ptfcn(pttablename = "busstops", where = "where shelter = 'Y'", normvar = "area", mytitle = "Bus stop shelters", legend_title = "16th Ave divide")

# zip the graphs
system("cd /projects/yakima_equity/www/images; zip 00_all_images.zip *png *pdf")

# pdf the graphs
system("cd /projects/yakima_equity/www/images; convert *.png -quality 100 00_all_images.pdf")

# zip the maps
system("cd /projects/yakima_equity/www/maps; zip 00_all_maps.zip *.png")

# PDF the maps
system("cd /projects/yakima_equity/www/maps; convert *.png -quality 100 00_all_maps.pdf")

4 Downloads

Downloadable maps are available as a single PDF, or as a zip file.

Graphs are available as a single PDF or as a zip file.