packages <- c("tidyverse","stargazer","AER","asbio","tigerstats","readxl","foreign","wooldridge","moderndive","gridExtra","haven","car") ## This is how you define an object (which is a vector here)
install.packages(packages, repos='http://cran.us.r-project.org') # Installing packages at once
lapply(packages, library, character.only = T) # Loading the packages
Below are some ways to load the data files in different formats. To load excel and Stata data files, you will need to install the packages “readxl” and “foreign” respectively.
bwsmoking_excel <- read_xlsx("S:\\Baruch\\ECO 4000\\Spring2022\\Datasets\\Birthweight and Smoking\\birthweight_smoking.xlsx")
## Read a csv file
bwsmoking_csv <- read_csv("S:\\Baruch\\ECO 4000\\Spring2022\\Datasets\\Birthweight and Smoking\\bwsmoking.csv")
## Read a STATA file (will require package named "haven")
bwsmoking_stata <- read_dta("S:\\Baruch\\ECO 4000\\Spring2022\\Datasets\\Birthweight and Smoking\\birthweight_smoking.dta")
## Read an RDS file
bwsmoking_R <- readRDS("S:\\Baruch\\ECO 4000\\Spring2022\\Datasets\\Birthweight and Smoking\\bwsmoking.rds")
These examples are from the practice problem set provided to the students.
A company claims that its soup machines deliver exactly 10.0 ounces of soup—no more, no less. A researcher samples 100 bowls of soup and finds that:
\(\overline{X}\)= 10.28 ounces
s = 1.20 ounces
Test the company’s claim at 5% and 1% significance level.
We denote null hypothesis as \(H_{0}\) and alternative hypothesis as \(H_{1}\).
\[H_{0} :\mu = 10\] \[H_{1} :\mu \neq 10\]
We define z-statistic in terms of the sample mean, sample size, and the population standard deviation \(\sigma\)
\(z = \frac{\overline{X} -
\mu_{0}}{\bigg(\frac{\sigma}{\sqrt{n}}\bigg)}\)
We reject the null hypothesis if \(|z| \geq
|z_{\frac{\alpha}{2}}|\), where \(|z|\) is the absolute value of the
z-statistic we calculated and \(|z_{\frac{\alpha}{2}}|\) is the critical
value we obtain from the table. Notice that we use \(|z_{\frac{\alpha}{2}}|\) here because we
are conducting a two-sided test.
We reject the null hypothesis if p-value \(\leq \frac{\alpha}{2}\), where \(\alpha\) is a significance level.(we will often use 0.01,0.05,and 0.1 values for \(\alpha\) for our purposes.)
# A function to calculate z scores and compare them with critical values of z (ones we obtain from the table)
z_test <- function(mu,x_bar,s,n,alpha,alternative){
sd = s/sqrt(n)
z <- (x_bar - mu)/(sd)
if(alternative == "two-sided"){
z_alpha = qnorm((1-alpha/2))
}
else if(alternative == "less"){
z_alpha = -qnorm((1-alpha))
}
else if(alternative == "greater"){
z_alpha = qnorm((1-alpha))
}
else("NA")
z_list = list("|z_calculated|" = abs(z), "|z_critical|" = abs(z_alpha))
return(z_list)
}
set.seed(21)
Q_9 <- tibble(X = rnorm(mean = 10.28, sd = 0.12, n = 100)) # Our simulated data
z_05 <- z_test(10,10.28,s = 1.2,100,0.05, alternative = "two-sided") ## a two-sided test at 5 % significance level
z_05
## $`|z_calculated|`
## [1] 2.333333
##
## $`|z_critical|`
## [1] 1.959964
##
## One Sample t-test
##
## data: Q_9$X
## t = 23.422, df = 99, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 10
## 95 percent confidence interval:
## 10.26424 10.31316
## sample estimates:
## mean of x
## 10.2887
As we notice here, the absolute value of z-calculated = 2.33 is greater than the absolute value of z-critical = 1.96. Therefore, we reject the null hypothesis being \(\mu = 10\)
z_01 <- z_test(10,10.28,s = 1.2,100,0.01, alternative = "two-sided") ## a two-sided test at 1 % significance level
z_01
## $`|z_calculated|`
## [1] 2.333333
##
## $`|z_critical|`
## [1] 2.575829
As we notice above, the absolute value of z-calculated = 2.33 is less than the absolute value of z-critical = 2.58. Therefore,we fail reject the null hypothesis being \(\mu = 10\)
A manufacturer produces drill bits with an intended life of at least 580 hours and a standard deviation of 30 hours. A quality control scientist draws a sample of 100 bits and finds \(\overline{X}=577\). Test at \(\alpha =.05\) to see if the machinery needs adjusting.
\[H_{0} :\mu \geq 580\] \[H_{1} :\mu < 580\]
set.seed(121)
Q_6 <- tibble(X = rnorm(mean = 577, sd = 30, n = 100)) # dataset with N ~ (577,900) (it does not have to be normal though. Recall Central Limit Theorem!)
z_one_sided <- z_test(mu = 580, x_bar = 577, s = 30, n = 100, alpha = 0.05, alternative = "less")
z_one_sided
## $`|z_calculated|`
## [1] 1
##
## $`|z_critical|`
## [1] 1.644854
As we can notice above, the absolute value of z-calculated = 1 is less than the absolute value of z-critical = 1.64. Therefore,we fail reject the null hypothesis being \(\mu = 580\).
A drug company that manufactures a diet drug claims that those using the drug for 30 days will lose at least 15 pounds. You sample 30 people who have used the drug and find that the average weight loss was 12 pounds with a standard deviation of 5 pounds. (Hint : When sample is small enough i.e. \(n \leq 30\) use a t-test). Test the claim at the .05 significance level.
\[H_{0} :\mu \geq 15\] \[H_{1} :\mu < 15\]
t_test <- function(mu,x_bar,s,n,alpha,alternative){
sd = s/sqrt(n)
t <- (x_bar - mu)/(sd)
if(alternative == "two-sided"){
t_alpha = qt(alpha/2,df = n-1)
}
else if(alternative == "less"){
t_alpha = - qt(1-alpha, df= n-1)
}
else if(alternative == "greater"){
t_alpha = qt(1-alpha, df= n-1)
}
else("NA")
t_list = list("|t_calculated|" = abs(t), "|t_critical|" = abs(t_alpha))
return(t_list)
}
t_one_sided <- t_test(15,12,s = 5,n=30,0.05,alternative = "less")
t_one_sided
## $`|t_calculated|`
## [1] 3.286335
##
## $`|t_critical|`
## [1] 1.699127
## $`|z_calculated|`
## [1] 3.286335
##
## $`|z_critical|`
## [1] 1.644854
## [1] 0.002658998
As we notice above, the absolute value of t-calculated = 3.29 is greater than the absolute value of t-critical = 1.69. Therefore,we reject the null hypothesis being \(\mu = 15\).
p-value here is 0.002 which is less than 0.05. Therefore,we reject the null hypothesis being \(\mu = 15\).
## POPULATION PARAMETERS
B0 = 2
B1 = 0.5
n = 10000
coeffs <- tibble(b_0 = rep(0,n),b_1 = rep(0,n))
for (i in 1:n) {
dat1 <- tibble(X = 1:50, u = rnorm(50), Y = B0 + B1*X + u)
reg <- lm(Y~X, data = dat1)
model <- summary(reg)
coeffs$b_0[i] = model$coefficients[1,1]
coeffs$b_1[i] = model$coefficients[2,1]
}
### With increased error
coeffs_2 <- tibble(b_02 = rep(0,n),b_12 = rep(0,n))
for (i in 1:n) {
dat2 <- tibble(X = 1:50, u = rnorm(50), Y = B0 + B1*X + 2*u)
reg2 <- lm(Y~X, data = dat2)
model2 <- summary(reg2)
coeffs_2$b_02[i] = model2$coefficients[1,1]
coeffs_2$b_12[i] = model2$coefficients[2,1]
}
##### Plot the data
ggplot(data = coeffs, aes(x = b_1), color = "blue")+
geom_density()+
geom_density(data = coeffs_2, aes(x = b_12), color = "green")+
xlab("beta_hat")
For this exercise we will require to install r package “wooldridge”. This package contains all the data that are used in Wooldridge’s Introductory Econometrics textbook. We will use those data sets for our purpose.
E1. For the population of chief executive officers, let Y be annual salary (salary) in thousands of dollars.Thus, y = 856.3 indicates an annual salary of $856,300, and y = 1,452.6 indicates a salary of $1,452,600. Let X be the average return on equity (roe) for the CEO’s firm for the previous three years. (Return on equity is defined in terms of net income as a percentage of common equity.) For example, if roe = 10, then average return on equity is 10%.
We postulate that our model is
\[salary = \beta_{0} + \beta_{1} roe + u\]
The slope parameter \(\beta_{1}\) measure the change in annual salary when the return on equity increases by one percentage point. Since, a higher roe is good for the company, we expect \(\beta_{1}\) to be positive.
load("S:\\Baruch\\ECO 4000\\Spring2022\\Datasets\\ceosal1.RData")
ceo1 <- data
## Summary Statistics
stargazer(ceo1, type = "html",digits = 2, title = "Descriptive Statistics")
Statistic | N | Mean | St. Dev. | Min | Max |
salary | 209 | 1,281.12 | 1,372.35 | 223 | 14,822 |
pcsalary | 209 | 13.28 | 32.63 | -61 | 212 |
sales | 209 | 6,923.79 | 10,633.27 | 175.20 | 97,649.90 |
roe | 209 | 17.18 | 8.52 | 0.50 | 56.30 |
pcroe | 209 | 10.80 | 97.22 | -98.90 | 977.00 |
ros | 209 | 61.80 | 68.18 | -58 | 418 |
indus | 209 | 0.32 | 0.47 | 0 | 1 |
finance | 209 | 0.22 | 0.42 | 0 | 1 |
consprod | 209 | 0.29 | 0.45 | 0 | 1 |
utility | 209 | 0.17 | 0.38 | 0 | 1 |
lsalary | 209 | 6.95 | 0.57 | 5.41 | 9.60 |
lsales | 209 | 8.29 | 1.01 | 5.17 | 11.49 |
Always start with looking at the summary statistics of your data. It is helpful to get the feel of the data before we start our analysis.The data set CEOSAL1 contains information on 209 CEOs for the year 1990; these data were obtained from Business Week (5/6/91). In this sample, the average annual salary is $1,281,120, with the smallest and largest being $223,000 and $14,822,000, respectively. The average return on equity for the years 1988, 1989, and 1990 is 17.18%, with the smallest and largest values being 0.5% and 56.3%, respectively.
## plot the data
p1 <- ggplot(data = ceo1, aes(x = roe, y = salary)) +
geom_point()+
geom_smooth(method = "lm")+
xlab("Return on Equity")+
ylab("Salary")
p2 <- ggplot(data = ceo1, aes(x = roe, y = lsalary)) + ## Just for your reference
geom_point()+
geom_smooth(method = "lm")+
xlab("Return on Equity")+
ylab("Log Salary")
gridExtra::grid.arrange(p1,p2)
We have the results after fitting the model. Out fitted model looks like this
\[\widehat{salary} = 963.191 + 18.501 roe\]
model_1 <- lm(data = ceo1, salary ~ roe) # level - level
model_2 <- lm(data = ceo1, lsalary ~ roe) # log-level
stargazer(model_1,model_2, type = "html",dep.var.labels = c("Salary","Log Salary"), title = "CEO Salary and Return on Equity", style = "qje",notes.append = FALSE,notes = c("<sup>⋆</sup>p<0.1; <sup>⋆⋆</sup>p<0.05; <sup>⋆⋆⋆</sup>p<0.01"))
Salary | Log Salary | |
(1) | (2) | |
roe | 18.501* | 0.014*** |
(11.123) | (0.005) | |
Constant | 963.191*** | 6.712*** |
(213.240) | (0.087) | |
N | 209 | 209 |
R2 | 0.013 | 0.043 |
Adjusted R2 | 0.008 | 0.039 |
Residual Std. Error (df = 207) | 1,366.555 | 0.555 |
F Statistic (df = 1; 207) | 2.767* | 9.408*** |
Notes: | ⋆p<0.1; ⋆⋆p<0.05; ⋆⋆⋆p<0.01 |
where the intercept and slope estimates have been rounded to three decimal places; we use “salary hat” to indicate that this is an estimated equation.
How do we interpret the equation?
First, if the return on equity is zero, roe = 0, then the predicted salary is the intercept, 963.191, which equals $963,191 because salary is measured in thousands. Next, we can write the predicted change in salary as a function of the change in roe: \(\widehat{\Delta salary} = 18.501 (\Delta roe)\). This means that if the return on equity increases by one percentage point, \((\Delta roe) = 1\) , then salary is predicted to change by about 18.5, or $18,500.
Because it is a linear equation, this is the estimated change regardless of the initial salary.
Question : What is the predicted salary of a CEO if the return on equity is 30 percent?
We can use our fitted model, \(\widehat{salary}, = 963.191 + 18.501 roe\). Plug in the value of roe = 30. i.e. \(\widehat{salary} = 963.191 + 18.501 (30) = 1,518,221\), which is over $1.5 million dollars. This is the predicted value that our model gives us. Now, keep in mind as we talked about it in the class, the assumption of zero conditional mean does not satisfy in this case. There are other variables that could potentially affect the salary of a CEO.
ceo1 <- ceo1%>%
mutate(lroe = log(roe))
model_3 <- lm(data = ceo1, salary ~ lroe) # level-log
model_4 <- lm(data = ceo1, lsalary ~ roe) # log-level
model_5 <- lm(data = ceo1, lsalary ~ lroe) # log-log
stargazer(model_3,model_4, model_5,type = "html",dep.var.labels = c("Salary","Log Salary","Log Salary"), title = "CEO Salary and Return on Equity", style = "qje",notes.append = FALSE,notes = c("<sup>⋆</sup>p<0.1; <sup>⋆⋆</sup>p<0.05; <sup>⋆⋆⋆</sup>p<0.01"))
Salary | Log Salary | ||
(1) | (2) | (3) | |
lroe | 255.311 | 0.170** | |
(173.884) | (0.071) | ||
roe | 0.014*** | ||
(0.005) | |||
Constant | 586.596 | 6.712*** | 6.489*** |
(482.396) | (0.087) | (0.197) | |
N | 209 | 209 | 209 |
R2 | 0.010 | 0.043 | 0.027 |
Adjusted R2 | 0.006 | 0.039 | 0.022 |
Residual Std. Error (df = 207) | 1,368.548 | 0.555 | 0.560 |
F Statistic (df = 1; 207) | 2.156 | 9.408*** | 5.689** |
Notes: | ⋆p<0.1; ⋆⋆p<0.05; ⋆⋆⋆p<0.01 |
Analysis of Wage and Education Data, (1976 CPS)
Statistic | N | Mean | St. Dev. | Min | Max |
wage | 526 | 5.90 | 3.69 | 0.53 | 24.98 |
educ | 526 | 12.56 | 2.77 | 0 | 18 |
exper | 526 | 17.02 | 13.57 | 1 | 51 |
tenure | 526 | 5.10 | 7.22 | 0 | 44 |
nonwhite | 526 | 0.10 | 0.30 | 0 | 1 |
female | 526 | 0.48 | 0.50 | 0 | 1 |
married | 526 | 0.61 | 0.49 | 0 | 1 |
numdep | 526 | 1.04 | 1.26 | 0 | 6 |
smsa | 526 | 0.72 | 0.45 | 0 | 1 |
northcen | 526 | 0.25 | 0.43 | 0 | 1 |
south | 526 | 0.36 | 0.48 | 0 | 1 |
west | 526 | 0.17 | 0.38 | 0 | 1 |
construc | 526 | 0.05 | 0.21 | 0 | 1 |
ndurman | 526 | 0.11 | 0.32 | 0 | 1 |
trcommpu | 526 | 0.04 | 0.20 | 0 | 1 |
trade | 526 | 0.29 | 0.45 | 0 | 1 |
services | 526 | 0.10 | 0.30 | 0 | 1 |
profserv | 526 | 0.26 | 0.44 | 0 | 1 |
profocc | 526 | 0.37 | 0.48 | 0 | 1 |
clerocc | 526 | 0.17 | 0.37 | 0 | 1 |
servocc | 526 | 0.14 | 0.35 | 0 | 1 |
lwage | 526 | 1.62 | 0.53 | -0.63 | 3.22 |
expersq | 526 | 473.44 | 616.04 | 1 | 2,601 |
tenursq | 526 | 78.15 | 199.43 | 0 | 1,936 |
### Wage-Education
w1 <- ggplot(data = wage1, aes(x = educ, y = wage)) +
geom_point()+
geom_smooth(method = "lm")+
xlab("Years of Education")+
ylab("Hourly Wage")+
labs(title = "Wages vs Education, 1976")
w2 <- ggplot(data = wage1, aes(x = educ, y = lwage)) +
geom_point()+
geom_smooth(method = "lm")+
xlab("Years of Education")+
ylab("Hourly Wage(in log terms)")+
labs(title = "Wages vs Education, 1976")
gridExtra::grid.arrange(w1,w2)
## Regression Analysis
wmodel_1 <- lm(data = wage1, wage ~ educ)
wmodel_2 <- lm(data = wage1, lwage ~ educ)
wage1 = wage1%>%
filter(educ != 0)%>%
mutate(leduc = log(educ))
wmodel_3 <- lm(data = wage1, wage ~ leduc)
wmodel_4 <- lm(data = wage1, lwage ~ leduc)
stargazer(wmodel_1, wmodel_2,wmodel_3,wmodel_4,type = "html",dep.var.labels = c("wage","log wage","wage","log wage"), title = "Wage and Education", style = "qje",notes.append = FALSE,notes = c("<sup>⋆</sup>p<0.1; <sup>⋆⋆</sup>p<0.05; <sup>⋆⋆⋆</sup>p<0.01"))
wage | log wage | wage | log wage | |
(1) | (2) | (3) | (4) | |
educ | 0.541*** | 0.083*** | ||
(0.053) | (0.008) | |||
leduc | 5.330*** | 0.825*** | ||
(0.608) | (0.086) | |||
Constant | -0.905 | 0.584*** | -7.460*** | -0.445** |
(0.685) | (0.097) | (1.532) | (0.218) | |
N | 526 | 526 | 524 | 524 |
R2 | 0.165 | 0.186 | 0.128 | 0.149 |
Adjusted R2 | 0.163 | 0.184 | 0.127 | 0.147 |
Residual Std. Error | 3.378 (df = 524) | 0.480 (df = 524) | 3.455 (df = 522) | 0.491 (df = 522) |
F Statistic | 103.363*** (df = 1; 524) | 119.582*** (df = 1; 524) | 76.849*** (df = 1; 522) | 91.119*** (df = 1; 522) |
Notes: | ⋆p<0.1; ⋆⋆p<0.05; ⋆⋆⋆p<0.01 |
In regression 1, 1 year increase in education is associated with $0.541 increase in hourly wage.
In regression 2, an increase in education by 1 year is associated with \(100 \times \hat{\beta_{1}}\) = 100 x 0.083 = 8.3 % increase in hourly wage.
In regression 3, 1 % increase in education is associated with \(\frac{1}{100} \times \hat{\beta_{1}}\) = \(\frac{1}{100} \times 5.33\) = $ 0.0533 increase in hourly wage.
In regression 4, 1 % increase in education is associated with \(\hat{\beta_{1}}\) % = 0.83 % increase in hourly wage. (This gives us Elasticity! Recall from your economics class.)
We can not compare \(R^{2}\) values when the dependent variables are different. For instance, in the table above, we are not able to compare regression 1 and 2 since the dependent variables are wage and log(wage) respectively. In these situations we rely on our knowledge of economic theory and make decisions based on that. For example, it is standard practice to have a regression model like regression 2 in labor economics.
m1 <- ggplot(data = wage1, aes(x = educ, y = lwage)) +
geom_point()+
geom_smooth(method = "lm")+
xlab("Years of Education")+
ylab("Hourly Wage")+
labs(title = "Wages vs Education, 1976")
m2 <- ggplot(data = wage1, aes(x = exper, y = lwage)) +
geom_point()+
geom_smooth(method = "lm")+
xlab("Years of Experience")+
ylab("Hourly Wage)")+
labs(title = "Wages vs Experience, 1976")
m3 <- ggplot(data = wage1, aes(x = tenure, y = lwage)) +
geom_point()+
geom_smooth(method = "lm")+
xlab("Years with Employer")+
ylab("Hourly Wage)")+
labs(title = "Wages vs Tenure, 1976")
m4 <- ggplot(data = wage1, aes(x = married, y = lwage)) +
geom_point()+
geom_smooth(method = "lm")+
xlab("Marital Status")+
ylab("Hourly Wage)")+
labs(title = "Wages vs Marital Status, 1976")
gridExtra::grid.arrange(m1,m2,m3,m4)
## Regression Analysis
multiple_1 <- lm(data = wage1, wage ~ educ)
multiple_2 <- lm(data = wage1, lwage ~ educ)
multiple_3 <- lm(data = wage1, lwage ~ educ + exper)
multiple_4 <- lm(data = wage1, lwage ~ educ + exper + tenure)
multiple_5 <- lm(data = wage1, lwage ~ educ + exper + tenure + married)
stargazer(multiple_1, multiple_2,multiple_3,multiple_4,multiple_5,type = "html",dep.var.labels = c("wage","log wage"), title = "Wage and Education", style = "qje",notes.append = FALSE,notes = c("<sup>⋆</sup>p<0.1; <sup>⋆⋆</sup>p<0.05; <sup>⋆⋆⋆</sup>p<0.01"))
wage | log wage | ||||
(1) | (2) | (3) | (4) | (5) | |
educ | 0.572*** | 0.087*** | 0.103*** | 0.097*** | 0.092*** |
(0.055) | (0.008) | (0.008) | (0.008) | (0.008) | |
exper | 0.010*** | 0.004** | 0.002 | ||
(0.002) | (0.002) | (0.002) | |||
tenure | 0.022*** | 0.021*** | |||
(0.003) | (0.003) | ||||
married | 0.167*** | ||||
(0.042) | |||||
Constant | -1.302* | 0.525*** | 0.150 | 0.217** | 0.218** |
(0.714) | (0.101) | (0.112) | (0.107) | (0.106) | |
N | 524 | 524 | 524 | 524 | 524 |
R2 | 0.169 | 0.191 | 0.256 | 0.322 | 0.342 |
Adjusted R2 | 0.168 | 0.189 | 0.253 | 0.318 | 0.337 |
Residual Std. Error | 3.372 (df = 522) | 0.479 (df = 522) | 0.460 (df = 521) | 0.439 (df = 520) | 0.433 (df = 519) |
F Statistic | 106.506*** (df = 1; 522) | 123.032*** (df = 1; 522) | 89.545*** (df = 2; 521) | 82.375*** (df = 3; 520) | 67.558*** (df = 4; 519) |
Notes: | ⋆p<0.1; ⋆⋆p<0.05; ⋆⋆⋆p<0.01 |
## Regression Analysis
mod_1 <- lm(data = wage1, lwage ~ female)
mod_2 <- lm(data = wage1, lwage ~ female + educ)
mod_3 <- lm(data = wage1, lwage ~ female + educ + exper)
mod_4 <- lm(data = wage1, lwage ~ female + educ + exper + tenure)
mod_5 <- lm(data = wage1, lwage ~ female + educ + exper + tenure + married)
mod_6 <- lm(data = wage1, lwage ~ female + educ + exper + tenure + female*educ)
mod_7 <- lm(data = wage1, lwage ~ female + educ + exper + tenure + female*educ + female*married)
stargazer(mod_1, mod_2,mod_3,mod_4,mod_5,mod_6,mod_7,type = "html",dep.var.labels = c("log wage"), title = "Wage and Gender", style = "qje",notes.append = FALSE,notes = c("<sup>⋆</sup>p<0.1; <sup>⋆⋆</sup>p<0.05; <sup>⋆⋆⋆</sup>p<0.01"))
log wage | |||||||
(1) | (2) | (3) | (4) | (5) | (6) | (7) | |
female | -0.396*** | -0.365*** | -0.348*** | -0.306*** | -0.290*** | -0.407** | -0.252 |
(0.043) | (0.039) | (0.037) | (0.037) | (0.037) | (0.186) | (0.184) | |
educ | 0.082*** | 0.097*** | 0.093*** | 0.089*** | 0.090*** | 0.085*** | |
(0.007) | (0.007) | (0.007) | (0.007) | (0.009) | (0.009) | ||
exper | 0.010*** | 0.005*** | 0.003* | 0.005*** | 0.003** | ||
(0.001) | (0.002) | (0.002) | (0.002) | (0.002) | |||
tenure | 0.017*** | 0.017*** | 0.017*** | 0.015*** | |||
(0.003) | (0.003) | (0.003) | (0.003) | ||||
married | 0.125*** | 0.291*** | |||||
(0.040) | (0.055) | ||||||
female:educ | 0.008 | 0.012 | |||||
(0.014) | (0.014) | ||||||
female:married | -0.317*** | ||||||
(0.074) | |||||||
Constant | 1.814*** | 0.759*** | 0.407*** | 0.428*** | 0.418*** | 0.464*** | 0.369*** |
(0.030) | (0.097) | (0.107) | (0.104) | (0.103) | (0.122) | (0.121) | |
N | 524 | 524 | 524 | 524 | 524 | 524 | 524 |
R2 | 0.138 | 0.308 | 0.362 | 0.401 | 0.412 | 0.401 | 0.433 |
Adjusted R2 | 0.137 | 0.305 | 0.358 | 0.396 | 0.406 | 0.395 | 0.425 |
Residual Std. Error | 0.494 (df = 522) | 0.443 (df = 521) | 0.426 (df = 520) | 0.413 (df = 519) | 0.410 (df = 518) | 0.414 (df = 518) | 0.403 (df = 516) |
F Statistic | 83.868*** (df = 1; 522) | 115.919*** (df = 2; 521) | 98.193*** (df = 3; 520) | 86.774*** (df = 4; 519) | 72.579*** (df = 5; 518) | 69.388*** (df = 5; 518) | 56.191*** (df = 7; 516) |
Notes: | ⋆p<0.1; ⋆⋆p<0.05; ⋆⋆⋆p<0.01 |
wage_new <- wage1%>%
mutate(male = if_else(female == 1,0,1))
mod_m <- lm(data = wage_new, lwage ~ male)
mod_fm <- lm(data = wage_new, lwage ~ female)
mod_red <- lm(data = wage_new, lwage ~ female + male)
stargazer(mod_m,mod_fm,mod_red,type = "html",dep.var.labels = c("log wage"), title = "Wage and Gender", style = "qje",notes.append = FALSE,notes = c("<sup>⋆</sup>p<0.1; <sup>⋆⋆</sup>p<0.05; <sup>⋆⋆⋆</sup>p<0.01"))
log wage | |||
(1) | (2) | (3) | |
male | 0.396*** | ||
(0.043) | |||
female | -0.396*** | -0.396*** | |
(0.043) | (0.043) | ||
Constant | 1.418*** | 1.814*** | 1.814*** |
(0.031) | (0.030) | (0.030) | |
N | 524 | 524 | 524 |
R2 | 0.138 | 0.138 | 0.138 |
Adjusted R2 | 0.137 | 0.137 | 0.137 |
Residual Std. Error (df = 522) | 0.494 | 0.494 | 0.494 |
F Statistic (df = 1; 522) | 83.868*** | 83.868*** | 83.868*** |
Notes: | ⋆p<0.1; ⋆⋆p<0.05; ⋆⋆⋆p<0.01 |
With Multiple Dummies
Let us go back to our old example on salaries of CEOs
salary: 1990 salary, thousands $
pcsalary: percent change salary, 89-90
sales: 1990 firm sales, millions $
roe: return on equity, 88-90 avg
pcroe: percent change roe, 88-90
ros: return on firm’s stock, 88-90
indus: =1 if industrial firm
finance: =1 if financial firm
consprod: =1 if consumer product firm
utility: =1 if transport. or utilties
lsalary: natural log of salary
lsales: natural log of sales
model_md <- lm(data = ceosal1, salary ~ roe + indus + finance + consprod)
model_md1 <- lm(data = ceosal1, salary ~ roe + indus + finance + consprod + utility)
stargazer(model_md, model_md1,type = "html",dep.var.labels = "Salary", title = "CEO Salary and Return on Equity", style = "qje",notes.append = FALSE,notes = c("<sup>⋆</sup>p<0.1; <sup>⋆⋆</sup>p<0.05; <sup>⋆⋆⋆</sup>p<0.01"))
Salary | ||
(1) | (2) | |
roe | 2.499 | 2.499 |
(12.397) | (12.397) | |
indus | 396.514 | 396.514 |
(286.959) | (286.959) | |
finance | 609.637** | 609.637** |
(300.829) | (300.829) | |
consprod | 966.333*** | 966.333*** |
(315.436) | (315.436) | |
utility | ||
Constant | 699.470*** | 699.470*** |
(264.622) | (264.622) | |
N | 209 | 209 |
R2 | 0.062 | 0.062 |
Adjusted R2 | 0.044 | 0.044 |
Residual Std. Error (df = 204) | 1,342.054 | 1,342.054 |
F Statistic (df = 4; 204) | 3.374** | 3.374** |
Notes: | ⋆p<0.1; ⋆⋆p<0.05; ⋆⋆⋆p<0.01 |