axes ( rect_histy ) # define the axis for the colorbar left, width = width + left + 0.13, 0.01 rect_colorbar = ax_colorbar = plt. axes ( rect_scatter ) ax_histx_zoom = plt. axes ( rect_histy ) # define the axis for the zoomed-in plot left = width + left + 0.2 left_h = left + width + 0.02 rect_scatter = rect_histx = rect_histy = ax_scatter_zoom = plt. suptitle ( title ) # define the axis for the first plot left, width = 0.1, 0.22 bottom, height = 0.1, 0.7 bottom_h = height + 0.15 left_h = left + width + 0.02 rect_scatter = rect_histx = rect_histy = ax_scatter = plt. hot_r ) def create_axes ( title, figsize = ( 16, 6 )): fig = plt. features = features_idx = X = X_full distributions = # scale the output between 0 and 1 for the colorbar y = minmax_scale ( y_full ) # plasma does not exist in matplotlib < 1.5 cmap = getattr ( cm, "plasma_r", cm. # Feature AveOccup has a few but very large outliers. feature_names feature_mapping = # Take only 2 features to make visualization easier # Feature MedInc has a long tail distribution. # Author: Raghav RV # Guillaume Lemaitre # Thomas Unterthiner # License: BSD 3 clause import numpy as np import matplotlib as mpl from matplotlib import pyplot as plt from matplotlib import cm from sklearn.preprocessing import MinMaxScaler from sklearn.preprocessing import minmax_scale from sklearn.preprocessing import MaxAbsScaler from sklearn.preprocessing import StandardScaler from sklearn.preprocessing import RobustScaler from sklearn.preprocessing import Normalizer from sklearn.preprocessing import QuantileTransformer from sklearn.preprocessing import PowerTransformer from sklearn.datasets import fetch_california_housing dataset = fetch_california_housing () X_full, y_full = dataset. The following code is a bit verbose, feel free to jump directly to the analysis Transformation instead of a per feature transformation. Unlike the previous transformations, normalization refers to a per sample Stabilize variance and minimize skewness. Non-linear transformations in which data is mapped to a normal distribution to Other in the way they estimate the parameters used to shift and scale eachīetween marginal outliers and inliers are shrunk. Scalers are linear (or more precisely affine) transformers and differ from each This example uses different scalers, transformers, and normalizers to bring the Notable exception are decision tree-based estimators that are robust to In particular, metric-based and gradient-based estimators often assumeĪpproximately standardized data (centered features with unit variances). Values close to zero or more importantly that all features vary on comparable Indeed many estimators are designed with the assumption that each feature takes Unscaled data can also slow down or even prevent theĬonvergence of many gradient-based estimators. Importantly, they can degrade the predictive performance of many machine These twoĬharacteristics lead to difficulties to visualize the data and, more To download the full example code or to run this example in your browser via Binder Compare the effect of different scalers on data with outliers ¶įeature 0 (median income in a block) and feature 5 (average house occupancy) ofĭifferent scales and contain some very large outliers.
0 Comments
Leave a Reply. |