##зареждане на нужните данни## housing <-read.csv("http://www.observatoria.eu/boyan/sites/default/files/bliss_m3.csv", sep=",") dict_housing <-read.csv("http://www.observatoria.eu/boyan/sites/default/files/dict_BLISS_m3.csv", sep=",") blissm1 <- read.csv ("http://www.observatoria.eu/boyan/sites/default/files/bliss_m1_1.csv", sep=",") dict_blissm1 <-read.csv ("http://www.observatoria.eu/boyan/sites/default/files/dict_BLISS_m1.csv", sep=",") blissm10<-read.csv("http://www.observatoria.eu/boyan/sites/default/files/bliss_m10.csv", sep=",") dict_blissm10<-read.csv("http://www.observatoria.eu/boyan/sites/default/files/dict_BLISS_m10.csv", sep=",") blissm4<-read.csv("http://www.observatoria.eu/boyan/sites/default/files/bliss_m4.csv",sep=",") dict_blissm4<-read.csv("http://www.observatoria.eu/boyan/sites/default/files/dict_BLISS_m4.csv", sep=",") wt_cs_ind<-read.csv("http://www.observatoria.eu/boyan/sites/default/files/wt_BLISS_ms_cs_ind.csv", sep=",") wt_cs_hh<-read.csv("http://www.observatoria.eu/boyan/sites/default/files/wt_BLISS_ms_cs_hh.csv", sep=",") ## тези два модула не са необходими## hh <-read.csv("C:/Users/bzahariev/Documents/Training/open data/bliss/bliss_hh.csv", sep=";") hhm <-read.csv("C:/Users/bzahariev/Documents/Training/open data/bliss/bliss_hhm.csv", sep=";") ##модул m3 е осветен на жилището, module m3 is about the dwelling## ##m3_q3 e броят на стаите, с които домакинството разполага, остава да пресметнем колко стаи трябва да имат според дефиницията на Евростат. m3_q3 is the number of rooms. we have to calculate the number of rooms needed## ##blissm1$m1_q2 = relation to the head, 1=head, 2=spouse, blissm1$m1_q4a_age## ##blissm1$m1_q3 = sex, blissm1$m1_q2 = relation to the head, blissm1$m1_q4a_age, blissm1$m1_q5 = marital status, 1=single, 2=married, 3=living together## blissm1 <- rename(blissm1, c(HH_ID="household")) names(blissm1)[names(blissm1)=="HH_ID"] <- "household" names(blissm1)[names(blissm1)=="HHM_ID"] <- "person" ##по-горе е даден пример за преименуване на променливи## marital<-subset(blissm1,select=c(household, person,m1_q5)) ##Тези команди не са необходими повече## hhm<-merge(hhm,marital, by=c("household","person"), all.hhm=T, sort =T) hhm2<-merge(hhm,subset(blissm1,select=c(household, person,m1_q5)), by=c("household","person"), all.hhm=T, sort =T) #one room for the household; #one room per couple in the household; #one room for each single person aged 18 or more; #one room per pair of single people of the same gender between 12 and 17 years of age; #one room for each single person between 12 and 17 years of age and not included in the previous category; #one room per pair of children under 12 years of age. ##създаване на променлива за броя на стаите, creating a variable that will hold the data of rooms needed## blissm1$rooms<-1 housing$rooms<-1 ##проверка дали променливата е количествена, checking if age is numeric## is.numeric(blissm1$m1_q4a_age) mean(blissm1$m1_q4a_age, na.rm=T) ##Копиране на същия масив с данни с ново име. Copying the data with a new name.## hhm<-blissm1 ## ако някой няма партньор, if somebody is not a couple. Това са само фрагменти от логическата структура на кода, още не са истински команди## #(hhm$m1_q5 != 2 | hhm$m1_q5 != 3) + 1 #(hhm$m1_q5 = 2 | hhm$m1_q5 = 3) + 0.5 ##ако някой попада в определена възрастова група## #(hhm$m1_q4a_age>11 $ hhm$m1_q4a<18) + 1 #(hhm$m1_q4a_age<12) + ceiling(0.5) ##при децата създаваме категория за деца до 12 години, като за определяне на броя стаи делим на две и закръгляме нагоре## hhm$children11<-ifelse(hhm$m1_q4a_age<12,1,0) sum(hhm$children11,na.rm=T) ##при възрастните създаваме категория singe и категория couple, при couple делим на две, при single сумираме, при living together се предполага, че са в едно домакинство, при married може да има проблем## hhm$single<-ifelse(hhm$m1_q5 != 2 & hhm$m1_q5 !=3 ,1,0) hhm$couple<-ifelse(hhm$m1_q5 == 2 | hhm$m1_q5 ==3 ,1,0) ##при категорията 12-17 създаваме две променливи - мъже и жени, в рамките на вкяка променлива сумираме и закръгляме нагоре## hhm$children17m<-ifelse(hhm$m1_q4a_age > 11 & hhm$m1_q4a_age < 18 & hhm$m1_q3 == 1 ,1,0) hhm$children17f<-ifelse(hhm$m1_q4a_age > 11 & hhm$m1_q4a_age < 18 & hhm$m1_q3 == 2 ,1,0) hhm1<-subset(hhm, select=c(household, children11:children17f)) ##Понякога може да се наложи следвата операция. В случая не се налага.## #hhm1$children11<-as.numeric(unlist(hhm1$children11))## #hhm1$children17m<-as.numeric(unlist(hhm1$children17m))## #hhm1$children17f<-as.numeric(unlist(hhm1$children17f))## #hhm1$single<-as.numeric(unlist(hhm1$single))## #hhm1$couple<-as.numeric(unlist(hhm1$couple))## hhm2<-aggregate(.~household, data=hhm1, sum) names(housing)[names(housing)=="HH_ID"] <- "household" names(housing)[names(housing)=="HHM_ID"] <- "person" intersect(names(housing), names(hhm2)) hhm2$roomsneeded<-1+ceiling(hhm2$children11/2) + hhm2$single + ceiling(hhm2$couple/2) + ceiling(hhm2$children17m/2) + ceiling(hhm2$children17f/2) hhm2<-merge(hhm2,subset(housing,select=c(household, m3_q3, m3_q5_1, m3_q5_3, m3_q5_6, m3_q5_7)), by="household", all.hhm2=T, sort =T) names(hhm2)[names(hhm2)=="m3_q3"] <- "roomsavailable" names(hhm2)[names(hhm2)=="m3_q5_3"] <- "electricity" names(hhm2)[names(hhm2)=="m3_q5_1"] <- "water" names(hhm2)[names(hhm2)=="m3_q5_6"] <- "toilet" names(hhm2)[names(hhm2)=="m3_q5_7"] <- "bathroom" hhm2$overcrowd<-ifelse((hhm2$roomsavailable - hhm2$roomsneeded)<0,1,0) blissm1<-merge(blissm1,subset(hhm2,select=c(household, overcrowd, water, electricity, toilet, bathroom)), by="household", all.blissm1=T, sort =T) ##Преименуваме променливите, свързани със здравето## names(blissm1)[names(blissm1)=="m1_q7"] <- "insurance" names(blissm1)[names(blissm1)=="m1_q9a"] <- "health" names(blissm1)[names(blissm1)=="m1_q9c"] <- "disability" ##Изчисляваме променливите, свързани с образованието## blissm1$primary<-ifelse(blissm1$m1_q14a < 4 & blissm1$m1_q4a_age >18,1,0) blissm1$dropout<-ifelse(blissm1$m1_q11a==7 & blissm1$m1_q4a_age >7 & blissm1$m1_q4a_age < 16,1,0) names(blissm10)[names(blissm10)=="HH_ID"] <- "household" names(blissm10)[names(blissm10)=="m10_q9а"] <- "holiday" names(blissm10)[names(blissm10)=="m10_q10"] <- "eat" names(blissm10)[names(blissm10)=="m10_q19"] <- "income" names(blissm10)[names(blissm10)=="poverty_rate"] <- "mon_poverty" names(blissm10)[names(blissm10)=="m10_q20"] <- "language" blissm1<-merge(blissm1,subset(blissm10,select=c(household, holiday, eat, income, income_modif, mon_poverty, language)),by="household",all.blissm1=T, sort=T) names(blissm4)[names(blissm4)=="HH_ID"] <- "household" names(blissm4)[names(blissm4)=="m4_q1"] <- "television" names(blissm4)[names(blissm4)=="m4_q10"] <- "washing" names(blissm4)[names(blissm4)=="m4_q16"] <- "car" names(blissm4)[names(blissm4)=="m4_q13"] <- "telephone" names(blissm4)[names(blissm4)=="m4_q14"] <- "mobile" install.packages("car", dependencies=T) install.packages("survey", dependencies=T) install.packages("MatrixModels") require(car) blissm4$television<-recode(blissm4$television,"0=1; 1:1000=0") blissm4$washing<-recode(blissm4$washing,"0=1; 1:1000=0") blissm4$car<-recode(blissm4$car,"0=1; 1:1000=0") blissm4$telephone<-recode(blissm4$telephone,"0=1; 1:1000=0") blissm4$mobile<-recode(blissm4$mobile,"0=1; 1:1000=0") blissm4$deprived<-ifelse((rowSums(subset(blissm4, select=c(television, washing, car)), na.rm=T)<2 & rowSums(subset(blissm4, select=c(telephone, mobile)), na.rm=T)>0) |rowSums(subset(blissm4, select=c(television, washing, car)), na.rm=T)>1, 1,0) blissm1<-merge(blissm1,subset(blissm4,select=c(household,deprived)),by="household",all.blissm1=T, sort=T) ##трябва да се прекодират insurance(2), disability (1), health (4 or 5) electricity (2), water (2), toilet (2), bathroom (2), holiday(2), eat(2) ## blissm1$insurance<-recode(blissm1$insurance,"2=1; 1=0") blissm1$disability<-recode(blissm1$disability,"2=0") blissm1$electricity<-recode(blissm1$electricity,"2=1; 1=0") blissm1$water<-recode(blissm1$water,"2=1; 1=0") blissm1$toilet<-recode(blissm1$toilet,"2=1; 1=0") blissm1$bathroom<-recode(blissm1$bathroom,"2=1; 1=0") blissm1$holiday<-recode(blissm1$holiday,"2=1; 1=0") blissm1$eat<-recode(blissm1$eat,"2=1; 1=0") blissm1$poorhealth<-ifelse(blissm1$health == 4 | blissm1$health == 5,1,0) blissm1$toil_bath<-ifelse(blissm1$toilet==1|blissm1$bathroom==1, 1,0) blissm1$mdpov<-rowSums(subset(blissm1, select = c(insurance, disability, poorhealth, primary, dropout, holiday, eat, deprived, toil_bath, electricity, water, overcrowd)), na.rm=T) blissm1$mdpoor<-ifelse(blissm1$mdpov>3,1,0) mean(blissm1$mdpoor,na.rm=T) names(wt_cs_ind)[names(wt_cs_ind)=="hh_id"] <- "household" names(wt_cs_ind)[names(wt_cs_ind)=="hhm_id"] <- "person" names(wt_cs_ind)[names(wt_cs_ind)=="wt_BLISS_ms_cs_ind_final"] <- "weight" blissm1<-merge(blissm1, subset(wt_cs_ind, select=c(household, person, weight)), by=c("household", "person"), all.blissm1=T, sort=T) ##претеглени хистограми и боксплотове## require(survey) design <- svydesign(id = ~1, strata = NULL, weights = ~weight, data = blissm1) svyboxplot(income~mdpoor,design) svyhist(~income, design)