data = read.csv2("https://raw.githubusercontent.com/basilhan/datasets/master/kc-house-data.csv",sep=",",header=T) library(tidyverse) glimpse(data) plot(data$sqft_living, data$price) reg = lm(price ~ sqft_living, data = data) abline(reg,col="red") summary(reg) library(geosphere) #Koordinaten zu Zahlen umwandeln data %>% rowwise() %>% mutate(lat = as.numeric(lat), long = as.numeric(long), distanceSeattle = distHaversine(c(long,lat),c(-122.3321,47.5813902)) ) -> data #Entfernung berechnen und speichern #coordinatesSeattle = 47.6062,-122.3321 reg = lm(price ~ sqft_living + sqft_lot + condition + grade + sqft_above + distanceSeattle, data = data) summary(reg) data %>% mutate(sqft_price = as.numeric(price) / as.numeric(sqft_living)) -> data reg2 = lm(price ~ sqft_living + waterfront, data = data) summary(reg2) #nur Entfernung nach Seattle reg3 = lm(sqft_price ~ distanceSeattle, data = data) summary(reg3) #Entfernung nach Seattle und waterfront reg3 = lm(sqft_price ~ waterfront + distanceSeattle, data = data) summary(reg3) #Entfernung nach Seattle und waterfront und view reg3 = lm(sqft_price ~ waterfront + view + distanceSeattle, data = data) summary(reg3) #Alle unabhängigen Variablen im Datensatz (inkl. Distance) außer Datum, Id, Koordinaten, zipcode, squarefootprice #Inseratpreis vorhersagen reg4 = lm(price ~ ., select(data, -date, -id,-long,-lat, -zipcode, - sqft_price)) summary(reg4)