import React from "react";
import Gallery from "../components/Gallery";
import GoToTop from "../utils/GoToTop";

function MachineLearning() {
  let kmeans_children = [
    <iframe
      src="https://storage.googleapis.com/pyramindintern-bucket-html/ML_kmeans_map.html"
      height="100%"
      width="100%"
      title="FDIC Choropleth Map"
    ></iframe>,
    <iframe
      src="https://storage.googleapis.com/pyramindintern-bucket-html/ML_KNN_kmeans_map.html"
      height="100%"
      width="100%"
      title="FDIC Choropleth Map"
    ></iframe>,
  ];

  let dbscan_children = [
    <iframe
      src="https://storage.googleapis.com/pyramindintern-bucket-html/ML_dbscan_raw_map.html"
      height="100%"
      width="100%"
      title="FDIC Choropleth Map"
    ></iframe>,
    <iframe
      src="https://storage.googleapis.com/pyramindintern-bucket-html/ML_KNN_dbscan_raw_map.html"
      height="100%"
      width="100%"
      title="FDIC Choropleth Map"
    ></iframe>,
    <iframe
      src="https://storage.googleapis.com/pyramindintern-bucket-html/ML_dbscan_standardized_map.html"
      height="100%"
      width="100%"
      title="FDIC Choropleth Map"
    ></iframe>,
    <iframe
      src="https://storage.googleapis.com/pyramindintern-bucket-html/ML_KNN_dbscan_standardized_KNN_map.html"
      height="100%"
      width="100%"
      title="FDIC Choropleth Map"
    ></iframe>,
  ];

  let stdbscan_children = [
    <iframe
      src="https://storage.googleapis.com/pyramindintern-bucket-html/ML_stdbscan_map.html"
      height="100%"
      width="100%"
      title="FDIC Choropleth Map"
    ></iframe>,
    <iframe
      src="https://storage.googleapis.com/pyramindintern-bucket-html/ML_KNN_stdbscan_map.html"
      height="100%"
      width="100%"
      title="FDIC Choropleth Map"
    ></iframe>,
  ];

  return (
    <div className="container">
      <h1 className="page-title">Machine Learning Results</h1>

      <p className="general-information">
        On this page, the FEMA data is clustered with one of the below machine
        learning models and then the K-Nearest Neighbor (KNN) algorithm is used
        to map banks to clustered zones. The KNN algorithm classifies each bank
        location into a disaster cluster by finding the closest neighboring
        point and its cluster. The results of whether a specific bank is in a
        certain disaster hot zone could aid emergency planning and new branch
        site selection.
      </p>

      <Gallery
        children={kmeans_children}
        title={"K-Means Model"}
        description={`K-means groups similar data points by taking a fixed number of clusters, K, which refers to the number of clusters or centroids in the data set. It then maps every data point to the nearest cluster while keeping clusters as small as possible. In order to find the best number of clusters, K, a silhouette score calculation was done to find the optimal number of clusters. The silhouette score determined how strongly points mapped to their respective cluster.\n\nAs there were 11 clusters used for the data set, the clusters ended up being regionally based, with areas like the Northeast and Mid-Atlantic clustered together for both datasets. The clusters have a more geometric and defined shape than other modeling methods. This is a result of K-means clustering data points based on the distance from a centroid without any consideration of the density of the points. 
        `}
      />

      <Gallery
        children={dbscan_children}
        title={"DBSCAN Model"}
        description={`DBSCAN, or Density-Based Spatial Clustering of Applications with Noise, differs from K-means as it allows for noise/outliers in data. Therefore cluster shapes and sizes can vary more than the previous clustering strategy and zones of high density can be more accurately identified.\n\nWhen running the model with raw positional coordinates, the DBSCAN model identified clusters in the eastern half of the US from the disasters and the resulting corresponding banks. These clusters tend to be focused around geographical regions such as the Gulf Coast and the Appalachian Mountains. This is largely due to the FIPS regions on the eastern half of the US having a smaller land area and therefore disasters being mapped closer together.\n\nIf the approach of standardizing disaster points is taken, clustering is far better in the western half of the US. This results in clusters around areas such as the Pacific Northwest and Salt Lake regions. However, this does lead to the entire eastern half of the US being partitioned into a single cluster as the standardization made the east coast points far more dense.
        `}
      />

      <Gallery
        children={stdbscan_children}
        title={"Time-Filtered Density Varying DBSCAN"}
        description={`Certain types of disasters are strongly correlated to the time of the year. In this experiment, the disaster data was grouped by each point's season before clustering.\n\nThe clustering used is a variation of the DBSCAN algorithm, derived from the ST-DBSCAN algorithm; clusters are allowed to have varying densities. This gives more flexibility in cluster identification, and the algorithm is able to extract clusters from both the denser east half of the US and the sparser west coast regions.\n\nThe clustering algorithm identifies different disaster clusters during different seasons, with some clusters corresponding to intuitive disaster hot zones during  different times of the year. For example, the Southeast coastal area during the summer due to a high number of Hurricanes, and the regions that experience snow/freezing in the winter seasons.`}
      />
      <GoToTop />
    </div>
  );
}

export default MachineLearning;
