Evaluating Federal Programs

CPP 528 Final Project Spring 2020

Descriptive Analysis of Community Change

For the Descriptive Analysis of Community Change, this part of the project uses:

Census data to calculate change in the MHV (Median Home Values) variable between 1990 and 2000.
Measure gentrification that occurs between 2000-2010.
Create a new dorling cartogram to visualize the data for Phoenix Metro Area.
Prepare descriptive statistics and chloropleth maps describing the MHV change variable, gentrification, and neighborhood health metrics.

Set-Up

Library

# load Library

library(dplyr)  # data wrangling
library(stargazer)  # table formatting
library(scales)  # centers data
library(here)  # creates relative file paths
library(geojsonio)  # read geoJSON map files from GitHub
library(sp)  # spatial data class sp for shapefiles
library(cartogram)  # spatial maps w/ tract size bias reduction
library(tmap)  # thematic maps
library(maptools)  # spatial object manipulation 
library(sf)  # 'simple features' flavor of shapefiles

# set stargazer type to text for previewing in RMD docs but convert to type HTML
# when knitting (next code chunk)

s.type <- "html"

Helper Functions

# Load helper functions for graphing purposes

source(here("functions/lab-05_helper_functions_cs.R"))

Data

Load Data sets for 1990 and 2000 and give the number of tracts present.

# load data
d1 <- readRDS(here("data/processed/LTDB-1990.rds"))
d2 <- readRDS(here("data/processed/LTDB-2000.rds"))
md <- readRDS(here("data/processed/LTDB-META-DATA.rds"))


d1 <- select(d1, -year)
d2 <- select(d2, -year)

# merge three tables by tract-id
d <- merge(d1, d2, by = "tractid")
d <- merge(d, md, by = "tractid")

nrow(d)

[1] 72693

Keep only urban data for the analysis

# filter rural districts
table(d$urban.x)

rural urban 
12971 59722

d <- filter(d, urban.x == "urban")

Find common variables present in 1990 and 2000 Census Data

# find variables that are in both files
compare_dfs <- function(df1, df2) {
    # use regular expressions to remove numeric suffixes
    var.names.1 <- names(df1)
    var.names.1 <- gsub("[.][xy]$", "", var.names.1)
    var.names.1 <- gsub("[0-9]{2}$", "", var.names.1)

    var.names.2 <- names(df2)
    var.names.2 <- gsub("[.][xy]$", "", var.names.2)
    var.names.2 <- gsub("[0-9]{2}$", "", var.names.2)

    shared <- intersect(var.names.1, var.names.2) %>% sort()
    print("SHARED VARIABLES:")
    print(shared)

    not.shared <- c(setdiff(var.names.1, var.names.2), setdiff(var.names.2, var.names.1)) %>%
        sort()

    print("NOT SHARED:")
    print(not.shared)

    d.vars1 <- data.frame(type = "shared", variables = shared, stringsAsFactors = F)
    d.vars2 <- data.frame(type = "not shared", variables = not.shared, stringsAsFactors = F)
    dd <- rbind(d.vars1, d.vars2)

    return(dd)
}

vars <- compare_dfs(df1 = d1, df2 = d2)

[1] "SHARED VARIABLES:"
 [1] "a15asn" "a15blk" "a15hsp" "a15ntv" "a15wht" "a18und" "a60asn" "a60blk"
 [9] "a60hsp" "a60ntv" "a60up"  "a60wht" "a75up"  "ag15up" "ag25up" "ag5up" 
[17] "ageasn" "ageblk" "agehsp" "agentv" "agewht" "asian"  "china"  "clf"   
[25] "cni16u" "col"    "cuban"  "dapov"  "dbpov"  "dflabf" "dfmpov" "dhpov" 
[33] "dis"    "dmulti" "dnapov" "dpov"   "dwpov"  "empclf" "family" "fb"    
[41] "fhh"    "filip"  "flabf"  "geanc"  "gefb"   "h10yrs" "h30old" "haw"   
[49] "hh"     "hha"    "hhb"    "hhh"    "hhw"    "hinc"   "hinca"  "hincb" 
[57] "hinch"  "hincw"  "hisp"   "hs"     "hu"     "incpc"  "india"  "iranc" 
[65] "irfb"   "itanc"  "itfb"   "japan"  "korea"  "lep"    "manuf"  "mar"   
[73] "mex"    "mhmval" "mrent" 
 [ reached getOption("max.print") -- omitted 32 entries ]
[1] "NOT SHARED:"
[1] "ag16cv"  "ag18cv"  "hu00sp"  "hu90sp"  "ohu00sp" "ohu90sp" "pop90.1"

# list common variable in a table form
head(vars)

    type variables
1 shared    a15asn
2 shared    a15blk
3 shared    a15hsp
4 shared    a15ntv
5 shared    a15wht
6 shared    a18und

d.full <- d  # keep a copy so don't have to reload

d <- d.full  # store original in case you need to reset anything

Create a subset of data for analysis

# create data set for analysis

d <- select(d, tractid, mhmval90, mhmval90, hinc90, hu90, own90, rent90, empclf90,
    clf90, unemp90, prof90, dpov90, npov90, ag25up90, hs90, col90, pop90.x, nhwht90,
    nhblk90, hisp90, asian90, cbsa.x, cbsaname.x)
d <- d %>% mutate(p.white = 100 * nhwht90/pop90.x, p.black = 100 * nhblk90/pop90.x,
    p.hisp = 100 * hisp90/pop90.x, p.asian = 100 * asian90/pop90.x, p.hs = 100 *
        (hs90 + col90)/ag25up90, p.col = 100 * col90/ag25up90, p.prof = 100 * prof90/empclf90,
    p.unemp = 100 * unemp90/clf90, pov.rate = 100 * npov90/dpov90)
colnames(d)

 [1] "tractid"    "mhmval90"   "hinc90"     "hu90"       "own90"     
 [6] "rent90"     "empclf90"   "clf90"      "unemp90"    "prof90"    
[11] "dpov90"     "npov90"     "ag25up90"   "hs90"       "col90"     
[16] "pop90.x"    "nhwht90"    "nhblk90"    "hisp90"     "asian90"   
[21] "cbsa.x"     "cbsaname.x" "p.white"    "p.black"    "p.hisp"    
[26] "p.asian"    "p.hs"       "p.col"      "p.prof"     "p.unemp"   
[31] "pov.rate"

Regression analysis for variables based on 1990 census data

# regression analysis of the variable

stargazer(d, type = s.type, digits = 0, summary.stat = c("min", "p25", "median",
    "mean", "p75", "max"))


Statistic	Min	Pctl(25)	Median	Mean	Pctl(75)	Max

mhmval90	0	58,800	86,500	112,399	141,800	500,001
hinc90	4,999	24,355	32,033	34,160	41,520	150,001
hu90	0	916	1,337	1,382	1,804	11,003
own90	0	422	744	788	1,093	8,180
rent90	0	168	358	476	663	8,653
empclf90	0	1,015	1,555	1,625	2,157	11,816
clf90	0	1,101	1,664	1,732	2,291	12,497
unemp90	0	49	88	107	143	1,165
prof90	0	192	359	451	616	6,290
dpov90	0	2,219	3,250	3,332	4,365	23,619
npov90	0	112	248	406	526	5,890
ag25up90	0	1,444	2,121	2,187	2,865	18,461
hs90	0	626	1,064	1,141	1,559	12,383
col90	0	162	335	483	652	8,575
pop90.x	0	2,283	3,325	3,419	4,464	35,721
nhwht90	0	1,262	2,424	2,513	3,575	21,561
nhblk90	0	18	76	426	331	12,121
hisp90	0	27	75	347	280	13,873
asian90	0	9	31	111	94	7,899
p.white	0	64	87	74	95	100
p.black	0	1	3	12	10	100
p.hisp	0	1	3	10	9	100
p.asian	0	0	1	3	3	94
p.hs	0	69	74	74	80	100
p.col	0	10	18	22	30	100
p.prof	0	17	25	27	34	100
p.unemp	0	4	5	7	8	64
pov.rate	0	4	8	12	16	100

Exploration of Median Home value

Intial condition in 1990 after adjusting for inflation.

# intial condition in 1990 adjust 1900 home values for inflation 2.3 % was the
# average inflation rate in 1990, adjusting it to 10 year inflation factor
# (1.023)^10 = 1.255325
d <- d.full
mhv.90 <- d$mhmval90 * 1.255325
mhv.00 <- d$mhmval00

mhv.change <- mhv.00 - mhv.90

df <- data.frame(MedianHomeValue1990 = mhv.90, MedianHomeValue2000 = mhv.00, Change.90.to.00 = mhv.change)

stargazer(df, type = s.type, digits = 0, summary.stat = c("min", "p25", "median",
    "mean", "p75", "max"))


Statistic	Min	Pctl(25)	Median	Mean	Pctl(75)	Max

MedianHomeValue1990	0	73,813	108,586	141,098	178,005	627,664
MedianHomeValue2000	0	81,600	119,900	144,738	173,894	1,000,001
Change.90.to.00	-627,664	-15,590	5,292	3,640	24,312	1,000,001

Histogram representing MHV (absolute value) in 1990

# histogram of MHV

hist(mhv.change/1000, breaks = 500, xlim = c(-100, 500), yaxt = "n", xaxt = "n",
    xlab = "Thousand of US Dollars (adjusted to 2000)", cex.lab = 1.5, ylab = "",
    main = "Change in Median Home Value 1990 to 2000", col = "gray20", border = "white")

axis(side = 1, at = seq(from = -100, to = 500, by = 100), labels = paste0("$", seq(from = -100,
    to = 500, by = 100), "k"))

mean.x <- mean(mhv.change/1000, na.rm = T)
abline(v = mean.x, col = "darkorange", lwd = 2, lty = 2)
text(x = 200, y = 1500, labels = paste0("Mean = ", dollar(round(1000 * mean.x, 0))),
    col = "darkorange", cex = 1.8, pos = 3)

median.x <- median(mhv.change/1000, na.rm = T)
abline(v = median.x, col = "dodgerblue", lwd = 2, lty = 2)
text(x = 200, y = 2000, labels = paste0("Median = ", dollar(round(1000 * median.x,
    0))), col = "dodgerblue", cex = 1.8, pos = 3)

Compare 1990 and 2000 MHV (absolute values)

# compare 1990 and 2000
layout.matrix <- matrix( c( 1,3,
                            2,3 ),
                nrow=2, ncol=2, byrow=T )

layout( mat = layout.matrix,
        heights = c(2,2), # Heights of the two rows
        widths =  c(3,4)) # Widths of the two columns

# layout.show(3)

par( mar=c(4,0,0,2) )

hist( mhv.00/1000, breaks=50,
      xlim=c(-200,800), yaxt="n", xaxt="n",
      xlab="", cex.lab=1,
      ylab="", main="",
      col="darkslateblue", border="white" )

axis( side=1, at=seq( from=0, to=1000, by=100 ),
      labels=paste0( "$", seq( from=0, to=1000, by=100 ), "k" ) )

abline( v=seq(0,1000,100), lty=2, col="gray80" )

text( 550, 4000, labels="Median Home \nValue in 1990",
      col="darkslateblue", cex=1.8 )



hist( mhv.00/1000, breaks=50,
      xlim=c(-200,800), yaxt="n", xaxt="n",
      xlab="", cex.lab=1,
      ylab="", main="",
      col="darkslateblue", border="white" )

abline( v=seq(0,1000, 100 ), lty=2, col="gray80" )

text( 550, 3500, labels="Median Home \nValue in 2000",
      col="darkslateblue", cex=1.8 )

axis( side=1, at=seq( from=0, to=1000, by=100 ),
      labels=paste0( "$", seq( from=0, to=1000, by=100 ), "k" ) )


# data reduction - filter 1,000 observations

df <- data.frame( v90=mhv.90/1000, v00=mhv.00/1000 )
df <- sample_n( df, 1000 )

par( mar=c(4,5,3,2) )

jplot( df$v90, df$v00,
       lab1="MHV in 1990", lab2="MHV in 2000",
       xlim=c(0,1000), ylim=c(0,1000),
       axes=F )

abline( a=0, b=1, lty=2, col="gray" )
axis( side=1, at=seq( from=0, to=1000, by=200 ),
      labels=paste0( "$", seq( from=0, to=1000, by=200 ), "k" ) )
axis( side=2, at=seq( from=0, to=1000, by=200 ),
      labels=paste0( "$", seq( from=0, to=1000, by=200 ), "k" ) )

Calculate summary table for percentage change in MHV 1990 and 2000

# % change in MHV 1990 and 2000
mhv.90[mhv.90 < 1000] <- NA
pct.change <- mhv.change/mhv.90
summary(pct.change)

     Min.   1st Qu.    Median      Mean   3rd Qu.      Max.      NA's 
 -1.00000  -0.11611   0.05614   0.10823   0.26418 214.79478       141

Number of tracts to experience more than 500% change in MHV

# how many cases had increases above 500%
sum(pct.change > 5, na.rm = T)

[1] 30

Showing 6 of 30 tracts to experience more than 500% change in MHV

# preview tracts with large increases in home values to see if increases make
# sense

d %>% filter(pct.change > 5) %>% head()

     tractid pop90.x nhwht90 nhblk90 ntv90 asian90 hisp90 haw90 india90 china90
     filip90 japan90 korea90 viet90 mex90 pr90 cuban90 hu90 vac90 ohu90
     a18und90 a60up90 a75up90 agewht90 a15wht90 a60wht90 ageblk90 a15blk90
     a60blk90 agehsp90 a15hsp90 a60hsp90 agentv90 a15ntv90 a60ntv90 ageasn90
     a15asn90 a60asn90 ag15up90 mar90 wds90 mhmval90 mrent90 own90 rent90
     family90 fhh90 pop90.y pop90.1 ruanc90 itanc90 geanc90 iranc90 scanc90
     rufb90 itfb90 gefb90 irfb90 scfb90 fb90 nat90 n10imm90 ag5up90 olang90
     lep90 ag25up90 hs90 col90 clf90 unemp90 dflabf90 flabf90 empclf90 prof90
     manuf90 semp90 ag16cv90 vet90 cni16u90 dis90 dpov90 npov90 n65pov90
     dfmpov90 nfmpov90 dwpov90 nwpov90 dbpov90 nbpov90 dnapov90 nnapov90
     dhpov90 nhpov90 dapov90 napov90 incpc90 hu90sp h30old90 ohu90sp h10yrs90
     dmulti90 multi90 hinc90 hincw90 hincb90 hinch90 hinca90 hh90 hhw90 hhb90
     hhh90 hha90 pop00.x nhwht00 nhblk00 ntv00 asian00 hisp00 haw00 india00
     china00 filip00 japan00 korea00 viet00 mex00 pr00 cuban00 hu00 vac00 ohu00
     a18und00 a60up00 a75up00 agewht00 a15wht00 a60wht00 ageblk00 a15blk00
     a60blk00 agehsp00 a15hsp00 a60hsp00 agentv00 a15ntv00 a60ntv00 ageasn00
     a15asn00 a60asn00 family00 fhh00 own00 rent00 pop00.y ruanc00 itanc00
     geanc00 iranc00 scanc00 rufb00 itfb00 gefb00 irfb00 scfb00 fb00 nat00
     n10imm00 ag5up00 olang00 lep00 ag25up00 hs00 col00 ag15up00 mar00 wds00
     clf00 unemp00 dflabf00 flabf00 empclf00 prof00 manuf00 semp00 ag18cv00
     vet00 cni16u00 dis00 dpov00 npov00 n65pov00 dfmpov00 nfmpov00 dwpov00
     nwpov00 dbpov00 nbpov00 dnapov00 nnapov00 dhpov00 nhpov00 dapov00 napov00
     incpc00 hu00sp h30old00 ohu00sp h10yrs00 dmulti00 multi00 hinc00 hincw00
     hincb00 hinch00 hinca00 mhmval00 mrent00 hh00 hhw00 hhb00 hhh00 hha00
     fipscounty state county tract placefp10 cbsa10 metdiv10 ccflag10 globd00
     globg00 globd90 globg90 globd80 globg80 msa.x msaname.x cbsa.x cbsaname.x
     urban.x msa.y msaname.y cbsa.y cbsaname.y urban.y
 [ reached 'max' / getOption("max.print") -- omitted 6 rows ]

Plot to show percentage change in MHV between 1990-2000

# Plot the percent change variable:
hg <- hist(pct.change, breaks = 2000, xlim = c(-1, 2), yaxt = "n", xaxt = "n", xlab = "",
    cex.main = 1.5, ylab = "", main = "Growth in Home Value by Census Tract 1990 to 2000",
    col = "gray40", border = "white")

axis(side = 1, at = seq(from = -1, to = 2, by = 0.5), labels = paste0(seq(from = -100,
    to = 200, by = 50), "%"))

ymax <- max(hg$count)

mean.x <- mean(pct.change, na.rm = T)
abline(v = mean.x, col = "darkorange", lwd = 2, lty = 2)
text(x = 1, y = (0.5 * ymax), labels = paste0("Mean = ", round(100 * mean.x, 0),
    "%"), col = "darkorange", cex = 1.8, pos = 4)

median.x <- median(pct.change, na.rm = T)
abline(v = median.x, col = "dodgerblue", lwd = 2, lty = 2)
text(x = 1, y = (0.6 * ymax), labels = paste0("Median = ", round(100 * median.x,
    0), "%"), col = "dodgerblue", cex = 1.8, pos = 4)

Group Growth Rate by Metro-area for the time period 1990-2000 (showing highest 25)

# Group Growth Rates By Metro Area
d$mhv.change <- mhv.change
d$pct.change <- pct.change
d$mhv.00 <- mhv.00
d$mhv.90 <- mhv.90

d %>% group_by(cbsaname.x) %>% summarize(ave.change = median(mhv.change, na.rm = T),
    ave.change.d = dollar(round(ave.change, 0)), growth = 100 * median(pct.change,
        na.rm = T)) %>% ungroup() %>% arrange(-growth) %>% select(-ave.change) %>%
    head(25)

# A tibble: 25 x 3
   cbsaname.x                          ave.change.d growth
   <chr>                               <chr>         <dbl>
 1 Corvallis, OR                       $78,276        84.5
 2 Portland-Vancouver-Beaverton, OR-WA $72,661        80.6
 3 Salt Lake City, UT                  $65,864        77.7
 4 Boulder, CO                         $101,351       77.2
 5 Provo-Orem, UT                      $64,922        76.5
 6 Salem, OR                           $52,248        69.6
 7 Eugene-Springfield, OR              $55,029        69.4
 8 Fort Collins-Loveland, CO           $61,268        64.4
 9 Longview, WA                        $44,733        63.0
10 Missoula, MT                        $49,358        62.3
# … with 15 more rows

Analysis

How do changes in home value differ between the 1990-2000 period and 2000-2010?

The changes in home value between 1990-2000 and 2000-2010 are very different. 2000-2010 has witnessed some of the major historic economic events. The tech-bubble and housing-bubble in early and mid part of this decade has shaped the changes in housing market as well. 2005 witnesses United States Housig market correction, even after the correction the changes in home value between 2000-2010 was significantly higher than 1990-2000.

The changes in MHV in absolute terms 1990-2000 was $3,640 in terms of mean value and $5,292 in terms of median value. The same values for 2000-2010 were $60,047 and $36,268. The mean is greater than the median which shows that the data is skewed toward the right.

The percentage change in MHV which offers more meaningful picture of the change shows that between 1990-2000 the mean change in percent of MHV was 10.82% while between 2000-2010 it was 33.80%. The percentage in the median price was 5.61% and 25.40% respectively.

The difference in the nature of changes in home value is further observed while comparing the number of tracts to witness changes greater than 500%. Between 1990-2000 there were only 30 such tracts whereas between 2000-2010 there were 115.

What do the authors suggest would predict fall in central city home values between 1990 and 2000?

According to Baum-Snow & Hartley, "Accounting for central neighborhood change, 1980-2010", 2016 the census variable which can predict the fall in central city home values between 1990-2000 are:

Percent of population with High school diploma or less
Percent of umemployed population
Percent of white population.

A decline in residents without college degree because of the decline in job opportunities for lesser skilled workers in urban areas caused such population to move out. According to the article lesser skilled white population moved out so a decrease in white population during the period is measured in population loss. Less population translates into less demand for housing which in turn means fall in housing value. So analysing these variables would predict the fall in home values between 1990 and 2000 in urban areas.

Change in Neighborhood Vitality

Data

Load data sets for 2000 and 2010 and give the number of tracts present

# load data

d3 <- readRDS(here("data/processed/LTDB-2000.rds"))
d4 <- readRDS(here("data/processed/LTDB-2010.rds"))
md <- readRDS(here("data/processed/LTDB-META-DATA.rds"))


d3 <- select(d3, -year)
d4 <- select(d4, -year)

# merge three tables by tract-id
d5 <- merge(d3, d4, by = "tractid")
d5 <- merge(d5, md, by = "tractid")

nrow(d4)

[1] 74022

Keep only urban data

# filter rural districts
table(d$urban.x)

urban 
59722

d5 <- filter(d5, urban.x == "urban")

d.full1 <- d5  # keep a copy so don't have to reload

d5 <- d.full1  # story original in case you need to reset anything

Create a subset of data for analysis and select variable needed to observe neighborhood vitality

# create data set for analysis

d5 <- select(d5, tractid, mhmval00, mhmval12, prof00, prof12, hinc00, hinc12, col00,
    col12, incpc00, incpc12, hs00, hs12, unemp00, unemp12, wds00, wds12, clf00, cbsa.x,
    cbsaname.x, ag25up00, empclf00, empclf12, ag25up12, clf12, ag15up12, ag15up00) %>%
    filter(incpc00 != 0, ag25up00 != 0, ag25up12 != 0, empclf00 != 0, empclf12 !=
        0, clf00 != 0, clf12 != 0, ag15up00 != 0, ag15up12 != 0, prof00 != 0, prof12 !=
        0, col00 != 0, col12 != 0, hs00 != 0, hs12 != 0, unemp00 != 0, unemp12 !=
        0, wds00 != 0, wds12 != 0)
d5 <- d5 %>% mutate(p.hs.00 = 100 * (hs00 + col00)/ag25up00, p.hs.10 = 100 * (hs12 +
    col12)/ag25up12, p.col.00 = 100 * col00/ag25up00, p.col.10 = 100 * col12/ag25up12,
    p.prof.00 = 100 * prof00/empclf00, p.prof.10 = 100 * prof12/empclf12, p.unemp.00 = 100 *
        unemp00/clf00, p.unemp.10 = 100 * unemp12/clf12, p.wds.00 = 100 * wds00/ag15up00,
    p.wds.10 = 100 * wds12/ag15up12) %>%
mutate_if(is.numeric, list(~na_if(., Inf))) %>% mutate_if(is.numeric, list(~na_if(.,
    -Inf))) %>%
na.omit()

Regression analysis of the variables in 2000 and 2010

reg.data.00 <- d5 %>% select(mhmval00, mhmval12, hinc00, hinc12, incpc00, incpc12,
    p.hs.00, p.hs.10, p.col.00, p.col.10, p.prof.00, p.prof.10, p.unemp.00, p.unemp.10,
    p.wds.00, p.wds.10)
stargazer(reg.data.00, type = s.type, digits = 0, summary.stat = c("min", "p25",
    "median", "mean", "p75"))


Statistic	Min	Pctl(25)	Median	Mean	Pctl(75)

mhmval00	0	81,700	119,800	144,714	174,000
mhmval12	9,999	122,500	191,000	246,312	313,325
hinc00	2,797	33,000	43,761	47,665	58,069
hinc12	2,499	39,124	53,918	59,549	73,792
incpc00	1,866	15,788	20,522	22,668	26,604
incpc12	1,269	19,301	26,102	29,122	35,005.2
p.hs.00	26	67	72	72	77
p.hs.10	25	66	71	71	77
p.col.00	0	12	21	26	36
p.col.10	0	15	25	30	41
p.prof.00	1	23	31	34	43
p.prof.10	0	24	34	35	46
p.unemp.00	0	3	5	6	8
p.unemp.10	0	6	9	10	13
p.wds.00	0	14	18	19	22
p.wds.10	0	15	19	19	23

Initial regression in 2000

# median home value in 2000, 2010, change and percentage change 2000 values
# adjusted for inflation
mhmval.00 <- d5$mhmval00 * 1.28855
mhmval.10 <- d5$mhmval12
mhmval.change <- mhmval.10 - mhmval.00
pct.mhmval <- mhmval.change/mhmval.00

# Percapita income in 2000, 2010, change and percentage chnage 2000 values
# adjusted for inflation
incp.00 <- d5$incpc00 * 1.28855
incpc.10 <- d5$incpc12
incpc.change <- incpc.10 - incp.00
pct.incpc.change <- incpc.change/incp.00

# Median House Hold income income in 2000, 2010, change and percentage chnage
# 2000 values adjusted for inflation
hinc.00 <- d5$hinc00 * 1.28855
hinc.10 <- d5$hinc12
hinc.change <- hinc.10 - hinc.00
pct.hinc.change <- hinc.change/hinc.00

# % of professional employees in 2000, 2010, change and percentage chnage
pprof.00 <- d5$p.prof.00
pprof.10 <- d5$p.prof.10
pprof.change <- pprof.10 - pprof.00
pct.pprof.change <- pprof.change/pprof.00

# % of popualtion with High school degree or less in 2000, 2010, change and
# percentage chnage
phs.00 <- d5$p.hs.00
phs.10 <- d5$p.hs.10
phs.change <- phs.10 - phs.00
pct.phs.change <- phs.change/phs.00
# % of population with four years of college or more in 2000, 2010, change and
# percentage chnage
pcol.00 <- d5$p.col.00
pcol.10 <- d5$p.col.10
pcol.change <- pcol.10 - pcol.00
pct.pcol.change <- pcol.change/pcol.00

# % of unemployed population in 2000, 2010, change and percentage chnage
punemp.00 <- d5$p.unemp.00
punemp.10 <- d5$p.unemp.10
punemp.change <- punemp.10 - punemp.00
pct.punemp.change <- punemp.change/punemp.00

# % of widowed, divorced and single population in 2000, 2010, change and
# percentage chnage
pwds.00 <- d5$p.wds.00
pwds.10 <- d5$p.wds.10
pwds.change <- pwds.10 - pwds.00
pct.pwds.change <- pwds.change/pwds.00

df1 <- data.frame(MedianHomeValue2000 = mhmval.00, MedianHomeValue2010 = mhmval.10,
    PercentGrowthMedianHomeValue.00.to.10 = pct.mhmval, HouseHoldIncome2000 = hinc.00,
    HouseHoldIncome2010 = hinc.10, HouseHoldIncomeChange.00.to.10 = hinc.change,
    PerCapitaIncome2000 = incp.00, PerCapitaIncome2010 = incpc.10, PerCaptiaIncomeChange.00.to.10 = incpc.change,
    PercentOfProfessionalEmployyes2000 = pprof.00, PercentOfProfessionalEmployyes2010 = pprof.10,
    ProfessionalChange.00.to.10 = pct.pprof.change, PercentWithHighSchoolDegreeOrLess2000 = phs.00,
    PercentWithHighSchoolDegreeOrLess2010 = phs.00, PercentWithHighSchoolDegreeOrLessChange.00.to.10 = pct.phs.change,
    PercentWithFourYearCollegeDegreeOrMore2000 = pcol.00, PercentWithFourYearCollegeDegreeOrMore2010 = pcol.10,
    PercentWithFourYearCollegeDegreeOrMoreChange00.to.10 = pct.pcol.change, Percentunemployed2000 = punemp.00,
    Percentunemployed2010 = punemp.10, PercentunemployedChange.00to.10 = pct.punemp.change,
    PercentWidowedDivorced2000 = pwds.00, PercentWidowedDivorced2010 = pwds.10, PercentWidowedDivorcedChange.00.to.1o = pct.pwds.change) %>%

mutate_if(is.numeric, list(~na_if(., Inf))) %>% mutate_if(is.numeric, list(~na_if(.,
    -Inf))) %>% na.omit()




df.initial <- data.frame(MedianHomevalue2000 = mhmval.00, HouseHoldIncome2000 = hinc.00,
    PerCapitaIncome2000 = incp.00, PercentOfProfessionalEmployyes2000 = pprof.00,
    PercentWithHighSchoolDegreeOrLess2000 = phs.00, PercentWithFourYearCollegeDegreeOrMore2000 = pcol.00,
    Percentunemployed2000 = punemp.00, PercentWidowedDivorced2000 = pwds.00) %>%
    mutate_if(is.numeric, list(~na_if(., Inf))) %>% mutate_if(is.numeric, list(~na_if(.,
    -Inf))) %>% na.omit()

stargazer(df.initial, type = s.type, digits = 0, summary.stat = c("min", "p25", "median",
    "mean", "p75", "max"))


Statistic	Min	Pctl(25)	Median	Mean	Pctl(75)	Max

MedianHomevalue2000	0	105,275	154,368	186,471	224,208	1,288,551
HouseHoldIncome2000	3,604	42,522	56,389	61,418	74,824	257,711
PerCapitaIncome2000	2,404	20,344	26,443	29,209	34,281	214,960
PercentOfProfessionalEmployyes2000	1	23	31	34	43	98
PercentWithHighSchoolDegreeOrLess2000	26	67	72	72	77	100
PercentWithFourYearCollegeDegreeOrMore2000	0	12	21	26	36	95
Percentunemployed2000	0	3	5	6	8	97
PercentWidowedDivorced2000	0	14	18	19	22	70

Visualizations of changes from 2000 - 2010

Histogram showing percentage growth in per capita income from 2000-2010

hg2 <- hist(pct.incpc.change, breaks = 2000, xlim = c(-1, 2), yaxt = "n", xaxt = "n",
    xlab = "", cex.main = 1.5, ylab = "", main = "Growth in per capita income by Census Tract 2000 to 2010",
    col = "gray40", border = "white")

axis(side = 1, at = seq(from = -1, to = 2, by = 0.5), labels = paste0(seq(from = -100,
    to = 200, by = 50), "%"))

ymax <- max(hg2$count)

mean.x <- mean(pct.incpc.change, na.rm = T)
abline(v = mean.x, col = "darkorange", lwd = 2, lty = 2)
text(x = 1, y = (0.5 * ymax), labels = paste0("Mean = ", round(100 * mean.x, 0),
    "%"), col = "darkorange", cex = 1.8, pos = 4)

median.x <- median(pct.incpc.change, na.rm = T)
abline(v = median.x, col = "dodgerblue", lwd = 2, lty = 2)
text(x = 1, y = (0.6 * ymax), labels = paste0("Median = ", round(100 * median.x,
    0), "%"), col = "dodgerblue", cex = 1.8, pos = 4)

Histogram showing percentage growth in Median Household Income 2000-2010

hg3 <- hist(pct.hinc.change, breaks = 2000, xlim = c(-1, 2), yaxt = "n", xaxt = "n",
    xlab = "", cex.main = 1.5, ylab = "", main = "Growth in median house hold income by Census Tract 2000 to 2010",
    col = "gray40", border = "white")

axis(side = 1, at = seq(from = -1, to = 2, by = 0.5), labels = paste0(seq(from = -100,
    to = 200, by = 50), "%"))

ymax <- max(hg3$count)

mean.x <- mean(pct.hinc.change, na.rm = T)
abline(v = mean.x, col = "darkorange", lwd = 2, lty = 2)
text(x = 1, y = (0.5 * ymax), labels = paste0("Mean = ", round(100 * mean.x, 0),
    "%"), col = "darkorange", cex = 1.8, pos = 4)

median.x <- median(pct.hinc.change, na.rm = T)
abline(v = median.x, col = "dodgerblue", lwd = 2, lty = 2)
text(x = 1, y = (0.6 * ymax), labels = paste0("Median = ", round(100 * median.x,
    0), "%"), col = "dodgerblue", cex = 1.8, pos = 4)

Histogram showing percentage growth in Professionals from 2000-2010

hg4 <- hist(pct.pprof.change, breaks = 20000, xlim = c(-1, 2), yaxt = "n", xaxt = "n",
    xlab = "", cex.main = 1.5, ylab = "", main = "Growth in professional by Census Tract 2000 to 2010",
    col = "gray40", border = "white")

axis(side = 1, at = seq(from = -1, to = 2, by = 0.5), labels = paste0(seq(from = -100,
    to = 200, by = 50), "%"))

ymax <- max(hg4$count)

mean.x <- mean(pct.pprof.change, na.rm = T)
abline(v = mean.x, col = "darkorange", lwd = 2, lty = 2)
text(x = 1, y = (0.5 * ymax), labels = paste0("Mean = ", round(100 * mean.x, 0),
    "%"), col = "darkorange", cex = 1.8, pos = 4)

median.x <- median(pct.pprof.change, na.rm = T)
abline(v = median.x, col = "dodgerblue", lwd = 2, lty = 2)
text(x = 1, y = (0.6 * ymax), labels = paste0("Median = ", round(100 * median.x,
    0), "%"), col = "dodgerblue", cex = 1.8, pos = 4)

Histogram showing percentage growth in populaton with high school or less by Census Tract 2000 to 2010

hg5 <- hist(pct.phs.change, breaks = 500, xlim = c(-1, 2), yaxt = "n", xaxt = "n",
    xlab = "", cex.main = 1.5, ylab = "", main = "Growth in populaton with high school or less by Census Tract 2000 to 2010",
    col = "gray40", border = "white")

axis(side = 1, at = seq(from = -1, to = 2, by = 0.5), labels = paste0(seq(from = -100,
    to = 200, by = 50), "%"))

ymax <- max(hg5$count)

mean.x <- mean(pct.phs.change, na.rm = T)
abline(v = mean.x, col = "darkorange", lwd = 2, lty = 2)
text(x = 1, y = (0.5 * ymax), labels = paste0("Mean = ", round(100 * mean.x, 0),
    "%"), col = "darkorange", cex = 1.8, pos = 4)

median.x <- median(pct.phs.change, na.rm = T)
abline(v = median.x, col = "dodgerblue", lwd = 2, lty = 2)
text(x = 1, y = (0.6 * ymax), labels = paste0("Median = ", round(100 * median.x,
    0), "%"), col = "dodgerblue", cex = 1.8, pos = 4)

Histogram showing growth in population with 4 yr college degree or more by census tract 2000 to 2010

hg6 <- hist(pct.pcol.change, breaks = 500, xlim = c(-1, 2), yaxt = "n", xaxt = "n",
    xlab = "", cex.main = 1.5, ylab = "", main = "Growth in population with 4 yrs college degree or more by Census Tract 2000 to 2010",
    col = "gray40", border = "white")

axis(side = 1, at = seq(from = -1, to = 2, by = 0.5), labels = paste0(seq(from = -100,
    to = 200, by = 50), "%"))

ymax <- max(hg6$count)

mean.x <- mean(pct.pcol.change, na.rm = T)
abline(v = mean.x, col = "darkorange", lwd = 2, lty = 2)
text(x = 1, y = (0.5 * ymax), labels = paste0("Mean = ", round(100 * mean.x, 0),
    "%"), col = "darkorange", cex = 1.8, pos = 4)

median.x <- median(pct.pcol.change, na.rm = T)
abline(v = median.x, col = "dodgerblue", lwd = 2, lty = 2)
text(x = 1, y = (0.6 * ymax), labels = paste0("Median = ", round(100 * median.x,
    0), "%"), col = "dodgerblue", cex = 1.8, pos = 4)

Histogram showing growth in unemployed population by census tract 2000 to 2010

hg7 <- hist(pct.punemp.change, breaks = 2000, xlim = c(-1, 2), yaxt = "n", xaxt = "n",
    xlab = "", cex.main = 1.5, ylab = "", main = "Growth in unemployed  population by Census Tract 2000 to 2010",
    col = "gray40", border = "white")

axis(side = 1, at = seq(from = -1, to = 2, by = 0.5), labels = paste0(seq(from = -100,
    to = 200, by = 50), "%"))

ymax <- max(hg7$count)

mean.x <- mean(pct.punemp.change, na.rm = T)
abline(v = mean.x, col = "darkorange", lwd = 2, lty = 2)
text(x = 1, y = (0.5 * ymax), labels = paste0("Mean = ", round(100 * mean.x, 0),
    "%"), col = "darkorange", cex = 1.8, pos = 4)

median.x <- median(pct.punemp.change, na.rm = T)
abline(v = median.x, col = "dodgerblue", lwd = 2, lty = 2)
text(x = 1, y = (0.6 * ymax), labels = paste0("Median = ", round(100 * median.x,
    0), "%"), col = "dodgerblue", cex = 1.8, pos = 4)

Histogram showing growth in widowed, divorced and single population by Census Tract 2000 to 2010

hg8 <- hist(pct.pwds.change, breaks = 2000, xlim = c(-1, 2), yaxt = "n", xaxt = "n",
    xlab = "", cex.main = 1.5, ylab = "", main = "Growth in widowed, divorced and single population by Census Tract 2000 to 2010",
    col = "gray40", border = "white")

axis(side = 1, at = seq(from = -1, to = 2, by = 0.5), labels = paste0(seq(from = -100,
    to = 200, by = 50), "%"))

ymax <- max(hg8$count)

mean.x <- mean(pct.pwds.change, na.rm = T)
abline(v = mean.x, col = "darkorange", lwd = 2, lty = 2)
text(x = 1, y = (0.5 * ymax), labels = paste0("Mean = ", round(100 * mean.x, 0),
    "%"), col = "darkorange", cex = 1.8, pos = 4)

median.x <- median(pct.pwds.change, na.rm = T)
abline(v = median.x, col = "dodgerblue", lwd = 2, lty = 2)
text(x = 1, y = (0.6 * ymax), labels = paste0("Median = ", round(100 * median.x,
    0), "%"), col = "dodgerblue", cex = 1.8, pos = 4)

Statistical summary of neighborhood vitality variable from 2000 -2010

summary.df <- data.frame(mhmval.change, pct.hinc.change, pct.incpc.change, pct.pcol.change,
    pct.pprof.change, pct.phs.change, pct.punemp.change, pct.pwds.change)
summary(summary.df)

 mhmval.change      pct.hinc.change    pct.incpc.change   pct.pcol.change   
 Min.   :-1063342   Min.   :-0.96352   Min.   :-0.95520   Min.   :      -1  
 1st Qu.:    7102   1st Qu.:-0.15841   1st Qu.:-0.12464   1st Qu.:       0  
 Median :   35732   Median :-0.04704   Median :-0.01434   Median :       0  
 Mean   :   59841   Mean   :-0.02816   Mean   : 0.00337   Mean   :     588  
 3rd Qu.:   94150   3rd Qu.: 0.07274   3rd Qu.: 0.10249   3rd Qu.:       0  
 Max.   : 1000001   Max.   : 7.55611   Max.   :33.72376   Max.   :33808892  
 pct.pprof.change   pct.phs.change     pct.punemp.change  pct.pwds.change   
 Min.   :-0.98530   Min.   :-0.59664   Min.   :    -1.0   Min.   :-0.99579  
 1st Qu.:-0.08802   1st Qu.:-0.06611   1st Qu.:     0.2   1st Qu.:-0.12861  
 Median : 0.05213   Median :-0.01275   Median :     0.8   Median : 0.03012  
 Mean   : 0.08979   Mean   :-0.01166   Mean   :    16.6   Mean   : 0.06221  
 3rd Qu.: 0.20923   3rd Qu.: 0.04116   3rd Qu.:     1.7   3rd Qu.: 0.21232  
 Max.   :90.40845   Max.   : 1.74184   Max.   :490464.0   Max.   :36.61784

Correlation between changes in neighborhood health metrics and change in home value

Bivariate correlation table of variables

correlation.df <- dplyr::select(df1, "PercentGrowthMedianHomeValue.00.to.10", "HouseHoldIncomeChange.00.to.10",
    "PerCaptiaIncomeChange.00.to.10", "ProfessionalChange.00.to.10", "PercentWithHighSchoolDegreeOrLessChange.00.to.10",
    "PercentWithFourYearCollegeDegreeOrMoreChange00.to.10", "PercentunemployedChange.00to.10",
    "PercentWidowedDivorcedChange.00.to.1o")

pairs(correlation.df, lower.panel = panel.smooth, upper.panel = panel.cor)

Analysis

Do patterns of neighborhood vitality follow the same patterns as home values? Do we see consistent increases over time? What percentage of tracts improved, according to your measures, and what percentage got worse?

The patterns of neighborhood vitality follow the same patterns as home values, most of the variable with exceptions to household income and less educated population see an increase over the period of time.

What is the correlation between change in neighborhood health metrics and change in home value? Consider using the pairs() function from the previous lab to describe these relationships.

The correlation between changes in neighborhood health metrics and changes in home value is shown above in a correlation table.

Measuring Gentrification

Load data

d2 <- d.full
# adjust 1990 home values for inflation average inflation for 1990 is set to
# 2.30% adjusting that for 10 year (1.023)^10 = 1.255325
mhv.90 <- d2$mhmval90 * 1.255325
mhv.00 <- d2$mhmval00

mhv.change <- mhv.00 - mhv.90

# small initial values are skewing percentages an average home value below $10k
# is really low - these must be mostly vacant lots?

mhv.90[mhv.90 < 1000] <- NA
pct.change <- 100 * (mhv.change/mhv.90)
summary(pct.change)

     Min.   1st Qu.    Median      Mean   3rd Qu.      Max.      NA's 
 -100.000   -11.611     5.614    10.823    26.418 21479.478       141

d2$mhv.90 <- mhv.90
d2$mhv.00 <- mhv.00
d2$mhv.change <- mhv.change
d2$pct.change <- pct.change

Select variables for operationalizing a definition of gentrification

d2 <- select( d2,

             tractid, cbsa.x, cbsaname.x,            # ids / units of analysis

             mhv.90, mhv.00, mhv.change, pct.change,    # home value 

             hinc90, hu90, own90, rent90,        # ses
             hinc00, hu00, own00, rent00,

             empclf90, clf90, unemp90, prof90,   # employment 
             empclf00, clf00, unemp00, prof00,

             dpov90, npov90,                     # poverty
             dpov00, npov00,

             ag25up90, hs90, col90,              # education 
             ag25up00, hs00, col00,

             pop90.x, nhwht90, nhblk90, hisp90, asian90,   # race
             pop00.y, nhwht00, nhblk00, hisp00, asian00

          ) # end select


d2 <-
  d2 %>%
  mutate(
          # 1990 variables
          p.white.90 = 100 * nhwht90 / pop90.x,
          p.black.90 = 100 * nhblk90 / pop90.x,
          p.hisp.90 = 100 * hisp90 / pop90.x,
          p.asian.90 = 100 * asian90 / pop90.x,
          p.hs.edu.90 = 100 * (hs90+col90) / ag25up90,
          p.col.edu.90 = 100 * col90 / ag25up90,
          p.prof.90 = 100 * prof90 / empclf90,
          p.unemp.90 = 100 * unemp90 / clf90,
          pov.rate.90 = 100 * npov90 / dpov90,

          # 2000 variables
          p.white.00 = 100 * nhwht00 / pop00.y,
          p.black.00 = 100 * nhblk00 / pop00.y,
          p.hisp.00 = 100 * hisp00 / pop00.y,
          p.asian.00 = 100 * asian00 / pop00.y,
          p.hs.edu.00 = 100 * (hs00+col00) / ag25up00,
          p.col.edu.00 = 100 * col00 / ag25up00,
          p.prof.00 = 100 * prof00 / empclf00,
          p.unemp.00 = 100 * unemp00 / clf00,
          pov.rate.00 = 100 * npov00 / dpov00 )

d2 <- d2 %>% group_by(cbsaname.x) %>% mutate(metro.mhv.pct.90 = ntile(mhv.90, 100),
    metro.mhv.pct.00 = ntile(mhv.00, 100), metro.median.pay.90 = median(hinc90, na.rm = T),
    metro.median.pay.00 = median(hinc00, na.rm = T), metro.race.rank.90 = ntile((100 -
        p.white.90), 100)) %>% ungroup() %>% mutate(metro.mhv.pct.change = metro.mhv.pct.00 -
    metro.mhv.pct.90, pay.change = metro.median.pay.00 - metro.median.pay.90, race.change = p.white.00 -
    p.white.90, mhv.change = mhv.00 - mhv.90)

d3 <- d2 %>% select(c("tractid", "cbsa.x", "cbsaname.x", "mhv.90", "mhv.00", "mhv.change",
    "pct.change", "p.white.90", "p.black.90", "p.hisp.90", "p.asian.90", "p.hs.edu.90",
    "p.col.edu.90", "p.prof.90", "p.unemp.90", "pov.rate.90", "p.white.00", "p.black.00",
    "p.hisp.00", "p.asian.00", "p.hs.edu.00", "p.col.edu.00", "p.prof.00", "p.unemp.00",
    "pov.rate.00", "metro.mhv.pct.90", "metro.mhv.pct.00", "metro.median.pay.90",
    "metro.median.pay.00", "metro.mhv.pct.change", "pay.change", "race.change", "metro.race.rank.90"))

# head( d3 ) %>% pander()
d3 <- data.frame(d3)
stargazer(d3, type = s.type, digits = 0, summary.stat = c("min", "p25", "median",
    "mean", "p75", "max"))


Statistic	Min	Pctl(25)	Median	Mean	Pctl(75)	Max

mhv.90	1,252	74,064	108,711	141,431	178,256	627,664
mhv.00	0	81,600	119,900	144,738	173,894	1,000,001
mhv.change	-627,664	-15,707	5,251	3,171	24,219	965,480
pct.change	-100	-12	6	11	26	21,479
p.white.90	0	64	87	74	95	100
p.black.90	0	1	3	12	10	100
p.hisp.90	0	1	3	10	9	100
p.asian.90	0	0	1	3	3	94
p.hs.edu.90	0	69	74	74	80	100
p.col.edu.90	0	10	18	22	30	100
p.prof.90	0	17	25	27	34	100
p.unemp.90	0	4	5	7	8	64
pov.rate.90	0	4	8	12	16	100
p.white.00	0	47	78	Inf	91	Inf
p.black.00	0	1	4	Inf	14	Inf
p.hisp.00	0	2	4	Inf	15	Inf
p.asian.00	0	1	2	Inf	5	Inf
p.hs.edu.00	0	67	72	72	77	100
p.col.edu.00	0	12	21	26	36	100
p.prof.00	0	23	31	34	43	100
p.unemp.00	0	3	5	6	8	100
pov.rate.00	0	4	9	12	17	100
metro.mhv.pct.90	1	25	50	50	75	100
metro.mhv.pct.00	1	25	50	50	75	100
metro.median.pay.90	14,871	28,906	32,457	32,924	35,833	52,374
metro.median.pay.00	23,012	39,457	43,139	45,054	49,522	73,701
metro.mhv.pct.change	-99	-6	0	0	7	99
pay.change	4,930	9,775	11,441	12,130	14,001	26,211
race.change	-100	-12	-5	Inf	-2	Inf
metro.race.rank.90	1	25	50	50	75	100

Operationalising Gentrification

Number of tracts gentrified

# income percent white home values absolute home value relative to metro
# education stats employment stats


# home value in lower than average home in a metro in 2000
poor.1990 <- d3$metro.mhv.pct.90 < 50

# above average diversity for metro
diverse.1990 <- d3$metro.race.rank.90 > 50

# home values increased more than overall city gains change in percentile rank
# within the metro
mhv.pct.increase <- d3$metro.mhv.pct.change > 0

# faster than average growth 25% growth in value is median for the country
home.val.rise <- d3$pct.change > 25

# proportion of whites increases by more than 3 percent measured by increase in
# white
loss.diversity <- d3$race.change > 3

# Increase in proportion of high school degree or less population by 3 %
hs.pct.increase <- d3$p.hs.edu.90 > 3

# Increase in proportion of college graduate population by 3 %
col.pct.increase <- d3$p.col.edu.90 > 3

# Increase in proportion of unemployed population by 3 %
unemp.pct.increase <- d3$p.unemp.90 > 3


g.flag <- poor.1990 & diverse.1990 & mhv.pct.increase & home.val.rise & loss.diversity &
    hs.pct.increase & col.pct.increase & unemp.pct.increase


num.candidates <- sum(poor.1990 & diverse.1990, na.rm = T)
num.gentrified <- sum(g.flag, na.rm = T)

num.gentrified

[1] 414

Tracts eligible for gentrification

num.candidates

[1] 19885

num.gentrified/num.candidates

[1] 0.02081971

# according to this 2.08% of urban tracts experience gentrification between 2000
# and 2010.

Analysis

How many census tracts are candidates (start out at a low income level with high diversity)? And of those how many have transitioned into advanced stages of gentrification?

There are 19,885 census tracts which are candidates for gentrification. Out of that 414 tracts have transitioned into advanced stages of gentrification.

Provide an explanation and justification of the way you measure gentrification in the data.

Poor and diverse tracts are identified because historically gentrifictaion start in poor and diverse neighborhoods. Increase in Home value is one of the major indicator of gentrifiaction and it is included in the this model to identify faster than average growth in home values. A neighborhood gentrification is marked by influx of educated and white population, which in turn means there will be loss of diversity, increase in college graduate, decrease in low educated and unemployed population. After considering all these variable to measure gentrification we can conclude that 2.08% of qualified tracts observed gentrification between 2000-2010.

Visualizing Gentrification

Load spatial Data

github.url <- "https://raw.githubusercontent.com/DS4PS/cpp-529-master/master/data/phx_dorling.geojson"
phx <- geojson_read(x = github.url, what = "sp")
plot(phx)

Create sub-set data frame to merge census and spatial data

# create small dataframe for the merge
df <- data.frame(tractid = d5$tractid, mhmval.00, mhmval.10, mhmval.change, pct.mhmval)



# create GEOID that matches GIS format

# create a geoID for merging by tract
df$GEOID <- substr(df$tractid, 6, 18)  # extract codes
df$GEOID <- gsub("-", "", df$GEOID)  # remove hyphens
class(df$GEOID)

[1] "character"

head(df$GEOID)

[1] "01001020100" "01001020200" "01001020300" "01001020400" "01001020500"
[6] "01001020600"

head(phx@data)  # sp class from GIS file, so data frame is located @data

      GEOID2       GEOID STATEFP COUNTYFP TRACTCE             AFFGEOID   NAME
1 4013010101 04013010101      04      013  010101 1400000US04013010101 101.01
  LSAD    ALAND AWATER  POP  MHHI  ID  pop.w pnhwht12 pnhblk12 phisp12 pntv12
1   CT 54284173  27466 4915 87167 437 0.4915    95.17        0    3.64      0
  pfb12 polang12 phs12 pcol12 punemp12 pflabf12 pprof12 pmanuf12 pvet12 psemp12
1  8.22     6.68 19.82  51.53     7.56    39.43   48.52     7.44  14.89   22.66
    hinc12  incpc12 ppov12 pown12 pvac12 pmulti12  mrent12 mhmval12 p30old12
1 39.28424 23.73314   7.37  92.19   32.5     0.81 55.12244 48.42995    13.37
  p10yrs12 p18und12 p60up12 p75up12 pmar12 pwds12 pfhh12
1    74.75    14.18   41.41   17.43  82.28  10.39   0.26
 [ reached 'max' / getOption("max.print") -- omitted 5 rows ]

# merge census data with dorling map

nrow(phx)  # check dimensions

[1] 913

phx <- merge(phx, df, by.x = "GEOID", by.y = "GEOID")

# make sure they have not changed or you are missing rows in your data frame or
# merging with the wrong ID
nrow(phx)

[1] 913

Dorling Cartograms

According to this Dorling Cartogram Map, high value tracts are located on the central, northeastern, and southern sides of Phoenix, Arizona.

phx <- spTransform(phx, CRS("+init=epsg:3395"))

bb <- st_bbox(c(xmin = -12519146, xmax = -12421368, ymax = 3965924, ymin = 3899074),
    crs = st_crs("+init=epsg:3395"))

tm_shape(phx, bbox = bb) + tm_polygons(col = "mhmval.00", n = 10, style = "quantile",
    palette = "Spectral") + tm_layout("Dorling Cartogram", title.position = c("right",
    "top"))

Acccording to this Dorling Cartogram Map, the largest gain from 2000-2010 occurred in the tracts that were below average in 2000.

tm_shape(phx, bbox = bb) + tm_polygons(col = "mhmval.change", n = 10, style = "quantile",
    palette = "Spectral", midpoint = NA) + tm_layout("Dorling Cartogram", title.position = c("right",
    "top"))

This Dorling Cartogram Map shows the percentage change in median Home values from 2000 to 2010 for Phoenix, AZ Metro Area.

tm_shape(phx, bbox = bb) + tm_polygons(col = "pct.mhmval", n = 10, style = "jenks",
    palette = "Spectral", midpoint = 0, showNA = TRUE) + tm_layout("Dorling Cartogram",
    title.position = c("right", "top"))

References

Baum-Snow, N., & Hartley, D. (2016). Accounting for central neighborhood change, 1980-2010. Retrieved April 29, 2020, from https://github.com/DS4PS/cpp-528-spr-2020/raw/master/articles/home-value-change/economic-model-central-city-neighborhoods-white-flight-white-return.pdf