import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
# Read the CSV file
file_path = 'data.csv' # Update with the actual file path
data = pd.read_csv(file_path)
# Create an empty DataFrame to hold hex codes
df = pd.DataFrame(columns=['Neighborhood', 'Hex Code'])
# Iterate over each column (neighborhood)
for col in data.columns:
# Extract neighborhood name
neighborhood = col
# Extract hex codes for the neighborhood
hex_codes = data[col].values
# Create a DataFrame for the current neighborhood
neighborhood_df = pd.DataFrame({'Neighborhood': neighborhood, 'Hex Code': hex_codes})
# Concatenate with hex_codes_df
df = pd.concat([df, neighborhood_df], ignore_index=True)
df = df.dropna(subset="Hex Code")
df
Neighborhood | Hex Code | |
---|---|---|
0 | Nob Hill | #C7C1B1 |
1 | Nob Hill | #C2C3CE |
2 | Nob Hill | #B06A4D |
3 | Nob Hill | #BDBAB2 |
4 | Nob Hill | #AFBFCB |
... | ... | ... |
595 | Mission | #CBCAC5 |
596 | Mission | #9F9B90 |
597 | Mission | #D6A875 |
598 | Mission | #EAEBE6 |
599 | Mission | #5A5D6E |
402 rows × 2 columns
# Group data by neighborhood
grouped_data = df.groupby('Neighborhood')['Hex Code'].unique()
# Set the number of columns for the grid
num_columns = 20 # You can adjust this based on your preference
# Plot squares for each hex code in each neighborhood in a grid
for neighborhood, hex_codes in grouped_data.items():
num_hex_codes = len(hex_codes)
num_rows = (num_hex_codes - 1) // num_columns + 1
plt.figure(figsize=(num_columns, num_rows))
plt.title(f'{neighborhood}')
for i, hex_code in enumerate(hex_codes):
row = i // num_columns
col = i % num_columns
# Create a square with the hex code color
plt.fill_between([col, col + 1], -row, -row - 1, color=hex_code)
plt.text(col + 0.5, -row - 0.5, hex_code, ha='center', va='center', fontsize=8, color='white')
plt.axis('off')
plt.show()
from sklearn.cluster import KMeans
# Convert Hex Code to RGB
def hex_to_rgb(hex_code):
hex_code = hex_code.lstrip('#')
return tuple(int(hex_code[i:i+2], 16) for i in (0, 2, 4))
df['RGB'] = df['Hex Code'].apply(hex_to_rgb)
# Apply KMeans clustering to each neighborhood
def kmeans_clustering(colors):
kmeans = KMeans(n_clusters=12)
kmeans.fit(colors)
return kmeans.cluster_centers_
# Initialize an empty DataFrame to hold clustered colors
clustered_colors_df = pd.DataFrame(columns=['Neighborhood', 'Clustered Color'])
# Initialize an empty list to hold clustered colors
clustered_colors_list = []
# Group data by neighborhood and apply KMeans clustering
for neighborhood, group in df.groupby('Neighborhood'):
# Convert RGB to array
colors_array = np.array(group['RGB'].tolist())
# Apply KMeans clustering
clustered_colors = kmeans_clustering(colors_array)
# Add clustered colors to the DataFrame
for color in clustered_colors:
clustered_colors_list.append({'Neighborhood': neighborhood, 'Clustered Color': color})
# Convert the list of clustered colors to a DataFrame
clustered_colors_df = pd.DataFrame(clustered_colors_list)
print(clustered_colors_df)
/Users/caoyujia/anaconda3/lib/python3.11/site-packages/sklearn/cluster/_kmeans.py:1412: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning super()._check_params_vs_input(X, default_n_init=10) /Users/caoyujia/anaconda3/lib/python3.11/site-packages/sklearn/cluster/_kmeans.py:1412: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning super()._check_params_vs_input(X, default_n_init=10) /Users/caoyujia/anaconda3/lib/python3.11/site-packages/sklearn/cluster/_kmeans.py:1412: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning super()._check_params_vs_input(X, default_n_init=10)
Neighborhood Clustered Color 0 Castro [202.125, 158.75, 138.625] 1 Castro [79.66666666666667, 106.33333333333334, 134.83... 2 Castro [227.16666666666666, 223.0, 206.33333333333331] 3 Castro [163.0, 166.75, 169.75] 4 Castro [227.8, 200.0, 161.6] 5 Castro [126.25, 136.25, 141.0] 6 Castro [47.0, 66.5, 94.0] 7 Castro [154.8, 176.6, 202.2] 8 Castro [128.0, 157.83333333333334, 182.66666666666666] 9 Castro [189.28571428571428, 186.71428571428572, 178.0] 10 Castro [198.75, 212.0, 223.75] 11 Castro [250.0, 211.0, 106.0] 12 Hayes Valley [175.5, 165.16666666666666, 168.5] 13 Hayes Valley [94.75, 120.25, 165.5] 14 Hayes Valley [202.55555555555554, 207.44444444444446, 203.2... 15 Hayes Valley [131.2, 147.2, 167.2] 16 Hayes Valley [24.0, 76.0, 154.0] 17 Hayes Valley [233.33333333333331, 189.33333333333334, 134.6... 18 Hayes Valley [100.0, 96.0, 96.0] 19 Hayes Valley [130.0, 132.5, 123.0] 20 Hayes Valley [158.0, 176.6, 201.4] 21 Hayes Valley [152.5, 149.5, 140.83333333333334] 22 Hayes Valley [221.33333333333331, 229.33333333333331, 231.0] 23 Hayes Valley [195.28571428571428, 187.85714285714286, 164.8... 24 Mission [224.07692307692307, 226.84615384615387, 227.0... 25 Mission [179.25, 150.625, 123.375] 26 Mission [73.0, 86.00000000000001, 98.42857142857143] 27 Mission [211.64285714285714, 193.07142857142858, 165.7... 28 Mission [142.33333333333334, 76.00000000000001, 66.166... 29 Mission [53.0, 62.0, 253.0] 30 Mission [178.9090909090909, 187.04545454545456, 189.59... 31 Mission [129.45454545454544, 123.36363636363637, 124.4... 32 Mission [211.05882352941174, 209.64705882352942, 200.2... 33 Mission [184.75, 110.5, 83.25] 34 Mission [47.5, 44.5, 45.0] 35 Mission [156.86666666666667, 163.6, 163.06666666666666] 36 Nob Hill [201.0, 187.83333333333334, 162.0] 37 Nob Hill [174.66666666666666, 113.0, 91.0] 38 Nob Hill [116.2, 130.4, 142.4] 39 Nob Hill [224.33333333333331, 225.66666666666666, 225.0] 40 Nob Hill [90.0, 53.0, 32.0] 41 Nob Hill [189.8, 201.4, 209.6] 42 Nob Hill [199.75, 168.0, 122.5] 43 Nob Hill [220.66666666666666, 202.16666666666666, 180.3... 44 Nob Hill [176.1818181818182, 179.8181818181818, 176.454... 45 Nob Hill [244.8, 233.4, 204.0] 46 Nob Hill [144.0, 80.5, 61.5] 47 Nob Hill [155.0, 148.33333333333334, 136.33333333333334] 48 Noe Valley [225.90909090909088, 194.63636363636363, 144.5... 49 Noe Valley [132.8, 87.0, 80.99999999999999] 50 Noe Valley [170.0952380952381, 173.95238095238096, 166.52... 51 Noe Valley [214.75, 219.66666666666666, 222.66666666666666] 52 Noe Valley [93.99999999999999, 104.39999999999999, 116.79... 53 Noe Valley [191.77272727272725, 195.72727272727272, 196.0... 54 Noe Valley [144.63636363636363, 148.63636363636363, 134.2... 55 Noe Valley [135.83333333333334, 163.33333333333334, 190.3... 56 Noe Valley [123.87499999999999, 142.375, 157.625] 57 Noe Valley [250.0, 251.0, 248.0] 58 Noe Valley [46.5, 59.5, 73.0] 59 Noe Valley [226.25, 216.5, 190.25]
/Users/caoyujia/anaconda3/lib/python3.11/site-packages/sklearn/cluster/_kmeans.py:1412: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning super()._check_params_vs_input(X, default_n_init=10) /Users/caoyujia/anaconda3/lib/python3.11/site-packages/sklearn/cluster/_kmeans.py:1412: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning super()._check_params_vs_input(X, default_n_init=10)
# Set the number of columns for the grid
num_columns = 4 # You can adjust this based on your preference
n = 0
for neighborhood, rgb_tuples in clustered_colors_df.groupby('Neighborhood')['Clustered Color']:
n += 1
num_rgb_tuples = len(rgb_tuples)
num_columns = 4 # Adjust this as needed
num_rows = (num_rgb_tuples - 1) // num_columns + 1
fig, ax = plt.subplots(figsize=(num_columns, num_rows))
for i, rgb_tuple in enumerate(rgb_tuples):
row = i // num_columns
col = i % num_columns
# Create a square with the RGB tuple color
color = np.array(rgb_tuple) / 255.0
ax.fill_between([col, col + 1], -row, -row - 1, color=color)
# plt.text(col + 0.5, -row - 0.5, hex_code, ha='center', va='center', fontsize=8, color='white')
ax.axis('equal')
ax.axis('off')
ax.set_xlim(0, num_columns)
ax.set_ylim(-num_rows, 0)
print(f'{neighborhood}:')
plt.savefig(f"../images/hover-image-{n}.png", bbox_inches='tight', pad_inches=0)
plt.show()
Castro:
Hayes Valley:
Mission:
Nob Hill:
Noe Valley:
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns
df["RGB"] = [hex_to_rgb(hex_code) for hex_code in df["Hex Code"]]
# Plot one plot for each neighborhood
sns.set_style("white")
for neighborhood, group in df.groupby("Neighborhood"):
fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(group["RGB"].apply(lambda x: x[0]),
group["RGB"].apply(lambda x: x[1]),
group["RGB"].apply(lambda x: x[2]),
c=[(r/255, g/255, b/255) for (r, g, b) in group['RGB']],
label=neighborhood,
s=100,
alpha=0.7)
ax.set_xlabel("Red")
ax.set_ylabel("Green")
ax.set_zlabel("Blue")
ax.set_title(f"Colors in {neighborhood}")
ax.legend()
ax.grid(True)
plt.show()
def RGB_to_HEX(rgb):
# Convert RGB values to integers
r = int(rgb[0])
g = int(rgb[1])
b = int(rgb[2])
# Format as hexadecimal
return '#{:02x}{:02x}{:02x}'.format(r, g, b)
# Initialize an empty DataFrame to hold clustered colors
clustered_colors_df = pd.DataFrame(columns=['Clustered Color'])
# Initialize an empty list to hold clustered colors
clustered_colors_list = []
# Group data by neighborhood and apply KMeans clustering
colors_array = np.array(df['RGB'].tolist())
clustered_colors = kmeans_clustering(colors_array)
for color in clustered_colors:
hex_color = RGB_to_HEX(color)
clustered_colors_list.append(hex_color)
# Output clustered colors as a list of hex codes
print(clustered_colors_list)
# Reshape the clustered colors list into a 3x4 grid
clustered_colors_grid = np.array(clustered_colors_list).reshape(3, 4)
# Plotting the grid
fig, ax = plt.subplots(figsize=(8, 6))
ax.imshow([[(0, 0, 0)] * 3] * 4) # Black background
for i in range(3):
for j in range(4):
color = clustered_colors_grid[i, j]
ax.text(j, i, color, ha='center', va='center', color='white', fontsize=10)
ax.add_patch(plt.Rectangle((j - 0.5, i - 0.5), 1, 1, color=color))
ax.set_xlim(-0.5, 3.5)
ax.set_ylim(2.5, -0.5)
ax.set_xticks([])
ax.set_yticks([])
plt.show()
['#c6a07e', '#e3e4df', '#626771', '#868a88', '#b7bbbb', '#e1c69b', '#4668a6', '#975649', '#cfcfc8', '#3a4048', '#a8a8a1', '#8aa1b7']
/Users/caoyujia/anaconda3/lib/python3.11/site-packages/sklearn/cluster/_kmeans.py:1412: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning super()._check_params_vs_input(X, default_n_init=10)