In the heatmap:
The x-axis represents the ‘% INACTIVE’ feature.
The y-axis represents the ‘% OBESE’ feature.
The color intensity at each point in the heatmap represents the predicted values for ‘% DIABETIC’ for the corresponding combination of ‘% OBESE’ and ‘% INACTIVE’.
By above picture we can show more %obese and more %Inactive most probably the county will be obese.
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns # Import Seaborn
# Feature Engineering (if you haven’t already done it)
df[‘Interaction’] = df[‘% OBESE’] * df[‘% INACTIVE’]
df[‘% OBESE_squared’] = df[‘% OBESE’] ** 2
df[‘% INACTIVE_squared’] = df[‘% INACTIVE’] ** 2
# Define your dependent variable and independent variables
X = df[[‘% OBESE’, ‘% INACTIVE’, ‘Interaction’, ‘% OBESE_squared’, ‘% INACTIVE_squared’]] # Add new features
y = df[‘% DIABETIC’] # Replace with the actual column name
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Preprocessing for numerical features (scaling)
numerical_transformer = Pipeline(steps=[
(‘scaler’, StandardScaler())
])
# Create and fit the Linear Regression model in a pipeline
linear_model = Pipeline(steps=[
(‘preprocessor’, numerical_transformer),
(‘model’, LinearRegression())
])
# Fit the model on the training data
linear_model.fit(X_train, y_train)
# Make predictions on the testing set
y_pred = linear_model.predict(X_test)
# Calculate the Mean Squared Error (MSE) on the testing set
mse = mean_squared_error(y_test, y_pred)
print(f”Mean Squared Error: {mse:.2f}”)
# Calculate the R-squared value
r2 = r2_score(y_test, y_pred)
print(f”R-squared: {r2:.2f}”)
# Create a DataFrame containing X_test and predictions for visualization
results_df = pd.DataFrame({‘% OBESE’: X_test[‘% OBESE’],
‘% INACTIVE’: X_test[‘% INACTIVE’],
‘Prediction’: y_pred})
# Create a heatmap to visualize the relationships
heatmap_data = results_df.pivot_table(index=’% OBESE’, columns=’% INACTIVE’, values=’Prediction’)
plt.figure(figsize=(10, 8))
sns.heatmap(heatmap_data, annot=True, cmap=’coolwarm’, linewidths=.5)
plt.title(‘Heatmap of Predictions (Linear Regression)’)
plt.xlabel(‘% INACTIVE’)
plt.ylabel(‘% OBESE’)
plt.show()