Housing Price Predictor

# Takes in user's housing preferences (minimum square feet, minimum bedrooms, minimum Bathrooms, & minimum year built), filters the dataset, visualizes it using a scatterplot and uses a linear regression ML model as a predictor for housing prices v. square feet. Both the actual, and predicted data are visualized in a single plot, as well as an accuracy metric for the predictor (R-squared).

import pandas as pd 
import matplotlib.pyplot as plt
from datetime import datetime
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score 
import numpy as np 

def main():
    # wrangling the data
    df = pd.read_csv('housing_price_dataset.csv')
    df = pd.get_dummies(df, columns=['Neighborhood'])

    # collecting user preferences
    print("Hello User! Thank you for using our Housing Price Predictor!")
    print()
    print("Enter your house preferences below: ")
    neighborhood = input("Ideal Neighborhood (Urban, Suburb, Rural)? ")
    while (neighborhood not in ['Urban', 'Suburb', 'Rural']):
        print("Invalid neighborhood. Please enter your ideal neighborhood again:")
        neighborhood = input("Ideal Neighborhood (Urban, Suburb, Rural)? ")
    min_sqft = int(input("Minimum Square Feet? "))
    min_beds = int(input("Minimum Bedrooms? "))
    min_baths = int(input("Minimum Bathrooms? "))
    min_year = int(input("Minimum Year Built? "))

    # filtering data based on user preferences
    dff = df[df['SquareFeet'] >= min_sqft]
    dff = dff[dff['Bedrooms'] >= min_beds]
    dff = dff[dff['Bathrooms'] >= min_baths]
    dff = dff[dff['YearBuilt'] >= min_year]
    if (neighborhood == 'Rural'):
        dff = dff[dff['Neighborhood_Rural'] == 1]
    elif (neighborhood == 'Suburb'):
        dff = dff[dff['Neighborhood_Suburb'] == 1]
    elif (neighborhood == 'Urban'):
        dff = dff[dff['Neighborhood_Urban'] == 1]
    if (dff.empty):
        print("No matches")
        return

    # building ML predictions
    X = dff[['SquareFeet', 'Bedrooms', 'Bathrooms', 'YearBuilt', 'Neighborhood_Rural', 'Neighborhood_Suburb', 'Neighborhood_Urban']]
    y = dff['Price']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25)
    model = LinearRegression()
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    X_test["y_pred"] = y_pred
    X_test = X_test.sort_values("SquareFeet")
    X["y"] = y
    X = X.sort_values("SquareFeet")

    # visualize predictions
    fig, ax = plt.subplots(1, 1, figsize = (12, 7))
    ax.scatter(X['SquareFeet'], X['y'], color = "darkblue", alpha = 0.4, label = "Actual Values", marker = '.')
    ax.plot(X_test['SquareFeet'], X_test["y_pred"].rolling(window = 30).mean(), color = "darkorange", label = "Predicted Values", linewidth = 3)
    fig.suptitle("Actual & Predicted Prices by Square Feet Based On Housing Criteria")
    ax.set(xlabel = "Square Feet", ylabel = "Price ($)")
    ax.legend(loc = "upper right")
    fig.tight_layout()
    fig.savefig("housing.png")

    # output regression accuracy score r squared
    print()
    print("R-Squared (Model Accuracy):", round(r2_score(y_test, y_pred), 2))    


if __name__ == '__main__':
    main()
Rachana Kadikar

About Me
Projects

Housing Price Predictor