3 Visualize Data
(ns assignment.visualize-data
(:require
[aerial.hanami.templates :as ht]
[assignment.generate-data :refer [data]]
[scicloj.kindly.v4.kind :as kind]
[scicloj.ml.dataset :as ds]
[scicloj.noj.v1.vis.hanami :as hanami]))(-> data
ds/shuffle
(ds/head 7))_unnamed [7 3]:
| :x1 | :x2 | :group |
|---|---|---|
| 1.15632892 | -2.34357298 | normal |
| 3.67532450 | 5.10155574 | normal |
| 5.40007373 | 2.62960971 | gamma |
| 9.36665576 | 6.72907653 | gamma |
| 1.59971439 | -2.67785685 | normal |
| 8.55412024 | 4.42748644 | gamma |
| 8.08554275 | 11.39241993 | gamma |
(def cols-of-interest [:x1 :x2])3.1 Group: normal
(def norm-dat
(-> data
(ds/select-rows #(= (:group %) "normal"))))^kind/vega
(let [dat (ds/rows norm-dat :as-maps)
column-names cols-of-interest]
{:data {:values dat}
:repeat {:column column-names}
:spec {:mark "bar"
:encoding {:x {:field {:repeat "column"}
:bin {:steps [1 3]} :type "quantitative"}
:y {:aggregate "count"}
:color {:field :group}}}})(ds/info norm-dat)_unnamed: descriptive-stats [3 12]:
| :col-name | :datatype | :n-valid | :n-missing | :min | :mean | :mode | :max | :standard-deviation | :skew | :first | :last |
|---|---|---|---|---|---|---|---|---|---|---|---|
| :x1 | :float64 | 200 | 0 | -3.52032814 | 3.15359995 | 8.96857501 | 1.98163550 | -0.22145647 | 3.581 | 5.726 | |
| :x2 | :float64 | 200 | 0 | -12.27623596 | -0.44328147 | 13.93989001 | 4.29960213 | 0.13687752 | 1.448 | -3.545 | |
| :group | :string | 200 | 0 | normal | normal | normal |
3.2 Group: gamma
(def gamma-dat
(ds/select-rows data #(= (:group %) "gamma")))^kind/vega
(let [dat (ds/rows gamma-dat :as-maps)
column-names cols-of-interest]
{:data {:values dat}
:repeat {:column column-names}
:spec {:mark "bar"
:encoding {:x {:field {:repeat "column"}
:bin {:steps [1 3]} :type "quantitative"}
:y {:aggregate "count"}
:color {:field :group}}}})(ds/info gamma-dat)_unnamed: descriptive-stats [3 12]:
| :col-name | :datatype | :n-valid | :n-missing | :min | :mean | :mode | :max | :standard-deviation | :skew | :first | :last |
|---|---|---|---|---|---|---|---|---|---|---|---|
| :x1 | :float64 | 200 | 0 | 2.46503412 | 7.06417638 | 12.26392965 | 1.85266087 | 0.24642603 | 2.465 | 5.841 | |
| :x2 | :float64 | 200 | 0 | 1.95034441 | 7.50807335 | 19.84870098 | 3.48813161 | 0.84906043 | 8.546 | 19.85 | |
| :group | :string | 200 | 0 | gamma | gamma | gamma |
3.3 Group: log-normal
(def log-normal-dat
(ds/select-rows data #(= (:group %) "log-normal")))^kind/vega
(let [dat (ds/rows log-normal-dat :as-maps)
column-names cols-of-interest]
{:data {:values dat}
:repeat {:column column-names}
:spec {:mark "bar"
:encoding {:x {:field {:repeat "column"}
:bin {:steps [1 3]} :type "quantitative"}
:y {:aggregate "count"}
:color {:field :group}}}})(ds/info log-normal-dat)_unnamed: descriptive-stats [3 12]:
| :col-name | :datatype | :n-valid | :n-missing | :min | :mean | :mode | :max | :standard-deviation | :skew | :first | :last |
|---|---|---|---|---|---|---|---|---|---|---|---|
| :x1 | :float64 | 200 | 0 | -5.96223122 | -0.98826246 | 4.59510840 | 1.99868266 | 0.16848382 | -3.653 | -3.305 | |
| :x2 | :float64 | 200 | 0 | -30.50059718 | -6.18813429 | -1.58353073 | 3.23981886 | -2.81709917 | -5.231 | -4.002 | |
| :group | :string | 200 | 0 | log-normal | log-normal | log-normal |
3.4 Full data
(-> data
(hanami/plot ht/point-chart
{:X "x1" :Y "x2" :COLOR "group"}))^kind/vega
(let [dat (ds/rows data :as-maps)
column-names cols-of-interest]
{:data {:values dat}
:repeat {:column column-names}
:spec {:mark "bar"
:encoding {:x {:field {:repeat "column"}
:bin {:steps [1 3]} :type "quantitative"}
:y {:aggregate "count"}
:color {:field :group}}}})(comment
(defn dist-range [dist]
(-> (apply max dist)
(-
(apply min dist)))))(comment ; live works, but wont render
(hanami/hconcat norm-dat {}
[(hanami/histogram norm-dat :x1 {:nbins 20})
(hanami/histogram norm-dat :x2 {:nbins 20})])
(hanami/hconcat gamma-dat {}
[(hanami/histogram gamma-dat :x1 {:nbins 20})
(hanami/histogram gamma-dat :x2 {:nbins 20})])
(hanami/hconcat log-normal-dat {}
[(hanami/histogram log-normal-dat :x1 {:nbins 20})
(hanami/histogram log-normal-dat :x2 {:nbins 20})])
^kind/vega
(let [data (ds/rows data :as-maps)]
{:data {:values data}
:mark "bar"
:encoding {:x {:field :x1 :bin {:steps [2 3]} :type "quantitative"}
:y {:aggregate "count"}
:color {:field :group}}}))