import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Read the CSV file
file_path = 'data.csv'  # Update with the actual file path
data = pd.read_csv(file_path)

# Create an empty DataFrame to hold hex codes
df = pd.DataFrame(columns=['Neighborhood', 'Hex Code'])

# Iterate over each column (neighborhood)
for col in data.columns:
    # Extract neighborhood name
    neighborhood = col
    # Extract hex codes for the neighborhood
    hex_codes = data[col].values
    # Create a DataFrame for the current neighborhood
    neighborhood_df = pd.DataFrame({'Neighborhood': neighborhood, 'Hex Code': hex_codes})
    # Concatenate with hex_codes_df
    df = pd.concat([df, neighborhood_df], ignore_index=True)
    
df = df.dropna(subset="Hex Code")
df


# Group data by neighborhood
grouped_data = df.groupby('Neighborhood')['Hex Code'].unique()

# Set the number of columns for the grid
num_columns = 20  # You can adjust this based on your preference

# Plot squares for each hex code in each neighborhood in a grid
for neighborhood, hex_codes in grouped_data.items():
    num_hex_codes = len(hex_codes)
    num_rows = (num_hex_codes - 1) // num_columns + 1
    
    plt.figure(figsize=(num_columns, num_rows))
    plt.title(f'{neighborhood}')
    
    for i, hex_code in enumerate(hex_codes):
        row = i // num_columns
        col = i % num_columns
        # Create a square with the hex code color
        plt.fill_between([col, col + 1], -row, -row - 1, color=hex_code)
        plt.text(col + 0.5, -row - 0.5, hex_code, ha='center', va='center', fontsize=8, color='white')
    
    plt.axis('off')
    plt.show()


from sklearn.cluster import KMeans

# Convert Hex Code to RGB
def hex_to_rgb(hex_code):
    hex_code = hex_code.lstrip('#')
    return tuple(int(hex_code[i:i+2], 16) for i in (0, 2, 4))

df['RGB'] = df['Hex Code'].apply(hex_to_rgb)

# Apply KMeans clustering to each neighborhood
def kmeans_clustering(colors):
    kmeans = KMeans(n_clusters=12)
    kmeans.fit(colors)
    return kmeans.cluster_centers_

# Initialize an empty DataFrame to hold clustered colors
clustered_colors_df = pd.DataFrame(columns=['Neighborhood', 'Clustered Color'])

# Initialize an empty list to hold clustered colors
clustered_colors_list = []

# Group data by neighborhood and apply KMeans clustering
for neighborhood, group in df.groupby('Neighborhood'):
    # Convert RGB to array
    colors_array = np.array(group['RGB'].tolist())
    # Apply KMeans clustering
    clustered_colors = kmeans_clustering(colors_array)
    # Add clustered colors to the DataFrame
    for color in clustered_colors:
        clustered_colors_list.append({'Neighborhood': neighborhood, 'Clustered Color': color})

# Convert the list of clustered colors to a DataFrame
clustered_colors_df = pd.DataFrame(clustered_colors_list)

print(clustered_colors_df)

/Users/caoyujia/anaconda3/lib/python3.11/site-packages/sklearn/cluster/_kmeans.py:1412: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  super()._check_params_vs_input(X, default_n_init=10)
/Users/caoyujia/anaconda3/lib/python3.11/site-packages/sklearn/cluster/_kmeans.py:1412: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  super()._check_params_vs_input(X, default_n_init=10)
/Users/caoyujia/anaconda3/lib/python3.11/site-packages/sklearn/cluster/_kmeans.py:1412: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  super()._check_params_vs_input(X, default_n_init=10)

    Neighborhood                                    Clustered Color
0         Castro                         [202.125, 158.75, 138.625]
1         Castro  [79.66666666666667, 106.33333333333334, 134.83...
2         Castro    [227.16666666666666, 223.0, 206.33333333333331]
3         Castro                            [163.0, 166.75, 169.75]
4         Castro                              [227.8, 200.0, 161.6]
5         Castro                            [126.25, 136.25, 141.0]
6         Castro                                 [47.0, 66.5, 94.0]
7         Castro                              [154.8, 176.6, 202.2]
8         Castro    [128.0, 157.83333333333334, 182.66666666666666]
9         Castro    [189.28571428571428, 186.71428571428572, 178.0]
10        Castro                            [198.75, 212.0, 223.75]
11        Castro                              [250.0, 211.0, 106.0]
12  Hayes Valley                 [175.5, 165.16666666666666, 168.5]
13  Hayes Valley                             [94.75, 120.25, 165.5]
14  Hayes Valley  [202.55555555555554, 207.44444444444446, 203.2...
15  Hayes Valley                              [131.2, 147.2, 167.2]
16  Hayes Valley                                [24.0, 76.0, 154.0]
17  Hayes Valley  [233.33333333333331, 189.33333333333334, 134.6...
18  Hayes Valley                                [100.0, 96.0, 96.0]
19  Hayes Valley                              [130.0, 132.5, 123.0]
20  Hayes Valley                              [158.0, 176.6, 201.4]
21  Hayes Valley                 [152.5, 149.5, 140.83333333333334]
22  Hayes Valley    [221.33333333333331, 229.33333333333331, 231.0]
23  Hayes Valley  [195.28571428571428, 187.85714285714286, 164.8...
24       Mission  [224.07692307692307, 226.84615384615387, 227.0...
25       Mission                         [179.25, 150.625, 123.375]
26       Mission       [73.0, 86.00000000000001, 98.42857142857143]
27       Mission  [211.64285714285714, 193.07142857142858, 165.7...
28       Mission  [142.33333333333334, 76.00000000000001, 66.166...
29       Mission                                [53.0, 62.0, 253.0]
30       Mission  [178.9090909090909, 187.04545454545456, 189.59...
31       Mission  [129.45454545454544, 123.36363636363637, 124.4...
32       Mission  [211.05882352941174, 209.64705882352942, 200.2...
33       Mission                             [184.75, 110.5, 83.25]
34       Mission                                 [47.5, 44.5, 45.0]
35       Mission    [156.86666666666667, 163.6, 163.06666666666666]
36      Nob Hill                 [201.0, 187.83333333333334, 162.0]
37      Nob Hill                  [174.66666666666666, 113.0, 91.0]
38      Nob Hill                              [116.2, 130.4, 142.4]
39      Nob Hill    [224.33333333333331, 225.66666666666666, 225.0]
40      Nob Hill                                 [90.0, 53.0, 32.0]
41      Nob Hill                              [189.8, 201.4, 209.6]
42      Nob Hill                             [199.75, 168.0, 122.5]
43      Nob Hill  [220.66666666666666, 202.16666666666666, 180.3...
44      Nob Hill  [176.1818181818182, 179.8181818181818, 176.454...
45      Nob Hill                              [244.8, 233.4, 204.0]
46      Nob Hill                                [144.0, 80.5, 61.5]
47      Nob Hill    [155.0, 148.33333333333334, 136.33333333333334]
48    Noe Valley  [225.90909090909088, 194.63636363636363, 144.5...
49    Noe Valley                   [132.8, 87.0, 80.99999999999999]
50    Noe Valley  [170.0952380952381, 173.95238095238096, 166.52...
51    Noe Valley   [214.75, 219.66666666666666, 222.66666666666666]
52    Noe Valley  [93.99999999999999, 104.39999999999999, 116.79...
53    Noe Valley  [191.77272727272725, 195.72727272727272, 196.0...
54    Noe Valley  [144.63636363636363, 148.63636363636363, 134.2...
55    Noe Valley  [135.83333333333334, 163.33333333333334, 190.3...
56    Noe Valley             [123.87499999999999, 142.375, 157.625]
57    Noe Valley                              [250.0, 251.0, 248.0]
58    Noe Valley                                 [46.5, 59.5, 73.0]
59    Noe Valley                            [226.25, 216.5, 190.25]

/Users/caoyujia/anaconda3/lib/python3.11/site-packages/sklearn/cluster/_kmeans.py:1412: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  super()._check_params_vs_input(X, default_n_init=10)
/Users/caoyujia/anaconda3/lib/python3.11/site-packages/sklearn/cluster/_kmeans.py:1412: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  super()._check_params_vs_input(X, default_n_init=10)


# Set the number of columns for the grid
num_columns = 4  # You can adjust this based on your preference

n = 0

for neighborhood, rgb_tuples in clustered_colors_df.groupby('Neighborhood')['Clustered Color']:
    n += 1
    num_rgb_tuples = len(rgb_tuples)
    num_columns = 4  # Adjust this as needed
    num_rows = (num_rgb_tuples - 1) // num_columns + 1
    
    fig, ax = plt.subplots(figsize=(num_columns, num_rows))
    
    for i, rgb_tuple in enumerate(rgb_tuples):
        row = i // num_columns
        col = i % num_columns
        # Create a square with the RGB tuple color
        color = np.array(rgb_tuple) / 255.0
        ax.fill_between([col, col + 1], -row, -row - 1, color=color)
        # plt.text(col + 0.5, -row - 0.5, hex_code, ha='center', va='center', fontsize=8, color='white')
    
    ax.axis('equal')
    ax.axis('off')
    ax.set_xlim(0, num_columns)
    ax.set_ylim(-num_rows, 0)
    
    print(f'{neighborhood}:')
    plt.savefig(f"../images/hover-image-{n}.png", bbox_inches='tight', pad_inches=0)
    plt.show()

Castro:

Hayes Valley:

Mission:

Nob Hill:

Noe Valley:


from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns

df["RGB"] = [hex_to_rgb(hex_code) for hex_code in df["Hex Code"]]

# Plot one plot for each neighborhood
sns.set_style("white")
for neighborhood, group in df.groupby("Neighborhood"):
    fig = plt.figure(figsize=(8, 6))
    ax = fig.add_subplot(111, projection='3d')
    ax.scatter(group["RGB"].apply(lambda x: x[0]), 
               group["RGB"].apply(lambda x: x[1]), 
               group["RGB"].apply(lambda x: x[2]), 
               c=[(r/255, g/255, b/255) for (r, g, b) in group['RGB']], 
               label=neighborhood,
               s=100,
               alpha=0.7)

    ax.set_xlabel("Red")
    ax.set_ylabel("Green")
    ax.set_zlabel("Blue")
    ax.set_title(f"Colors in {neighborhood}")
    ax.legend()
    ax.grid(True)
    plt.show()


def RGB_to_HEX(rgb):
    # Convert RGB values to integers
    r = int(rgb[0])
    g = int(rgb[1])
    b = int(rgb[2])
    
    # Format as hexadecimal
    return '#{:02x}{:02x}{:02x}'.format(r, g, b)

# Initialize an empty DataFrame to hold clustered colors
clustered_colors_df = pd.DataFrame(columns=['Clustered Color'])

# Initialize an empty list to hold clustered colors
clustered_colors_list = []

# Group data by neighborhood and apply KMeans clustering
colors_array = np.array(df['RGB'].tolist())
clustered_colors = kmeans_clustering(colors_array)
for color in clustered_colors:
    hex_color = RGB_to_HEX(color)
    clustered_colors_list.append(hex_color)

# Output clustered colors as a list of hex codes
print(clustered_colors_list)

# Reshape the clustered colors list into a 3x4 grid
clustered_colors_grid = np.array(clustered_colors_list).reshape(3, 4)

# Plotting the grid
fig, ax = plt.subplots(figsize=(8, 6))
ax.imshow([[(0, 0, 0)] * 3] * 4)  # Black background
for i in range(3):
    for j in range(4):
        color = clustered_colors_grid[i, j]
        ax.text(j, i, color, ha='center', va='center', color='white', fontsize=10)
        ax.add_patch(plt.Rectangle((j - 0.5, i - 0.5), 1, 1, color=color))
ax.set_xlim(-0.5, 3.5)
ax.set_ylim(2.5, -0.5)
ax.set_xticks([])
ax.set_yticks([])
plt.show()

['#c6a07e', '#e3e4df', '#626771', '#868a88', '#b7bbbb', '#e1c69b', '#4668a6', '#975649', '#cfcfc8', '#3a4048', '#a8a8a1', '#8aa1b7']

/Users/caoyujia/anaconda3/lib/python3.11/site-packages/sklearn/cluster/_kmeans.py:1412: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  super()._check_params_vs_input(X, default_n_init=10)

Import all hex codes¶

Visualize all colors (grouped by neighborhood)¶

Calculate representative colors + Visualization¶

Visualize all colors in vector space¶

Get representative colors of SF as a whole¶

	Neighborhood	Hex Code
0	Nob Hill	#C7C1B1
1	Nob Hill	#C2C3CE
2	Nob Hill	#B06A4D
3	Nob Hill	#BDBAB2
4	Nob Hill	#AFBFCB
...	...	...
595	Mission	#CBCAC5
596	Mission	#9F9B90
597	Mission	#D6A875
598	Mission	#EAEBE6
599	Mission	#5A5D6E