3  Visualize Data

(ns assignment.visualize-data
  (:require
    [aerial.hanami.templates :as ht]
    [assignment.generate-data :refer [data]]
    [scicloj.kindly.v4.kind :as kind]
    [scicloj.ml.dataset :as ds]
    [scicloj.noj.v1.vis.hanami :as hanami]))
(-> data
    ds/shuffle
    (ds/head 7))

_unnamed [7 3]:

:x1 :x2 :group
1.15632892 -2.34357298 normal
3.67532450 5.10155574 normal
5.40007373 2.62960971 gamma
9.36665576 6.72907653 gamma
1.59971439 -2.67785685 normal
8.55412024 4.42748644 gamma
8.08554275 11.39241993 gamma
(def cols-of-interest [:x1 :x2])

3.1 Group: normal

(def norm-dat
  (-> data
      (ds/select-rows #(= (:group %) "normal"))))
^kind/vega
(let [dat (ds/rows norm-dat :as-maps)
      column-names cols-of-interest]
  {:data   {:values dat}
   :repeat {:column column-names}
   :spec   {:mark     "bar"
            :encoding {:x     {:field {:repeat "column"}
                               :bin   {:steps [1 3]} :type "quantitative"}
                       :y     {:aggregate "count"}
                       :color {:field :group}}}})
(ds/info norm-dat)

_unnamed: descriptive-stats [3 12]:

:col-name :datatype :n-valid :n-missing :min :mean :mode :max :standard-deviation :skew :first :last
:x1 :float64 200 0 -3.52032814 3.15359995 8.96857501 1.98163550 -0.22145647 3.581 5.726
:x2 :float64 200 0 -12.27623596 -0.44328147 13.93989001 4.29960213 0.13687752 1.448 -3.545
:group :string 200 0 normal normal normal

3.2 Group: gamma

(def gamma-dat
  (ds/select-rows data #(= (:group %) "gamma")))
^kind/vega
(let [dat (ds/rows gamma-dat :as-maps)
      column-names cols-of-interest]
  {:data   {:values dat}
   :repeat {:column column-names}
   :spec   {:mark     "bar"
            :encoding {:x     {:field {:repeat "column"}
                               :bin   {:steps [1 3]} :type "quantitative"}
                       :y     {:aggregate "count"}
                       :color {:field :group}}}})
(ds/info gamma-dat)

_unnamed: descriptive-stats [3 12]:

:col-name :datatype :n-valid :n-missing :min :mean :mode :max :standard-deviation :skew :first :last
:x1 :float64 200 0 2.46503412 7.06417638 12.26392965 1.85266087 0.24642603 2.465 5.841
:x2 :float64 200 0 1.95034441 7.50807335 19.84870098 3.48813161 0.84906043 8.546 19.85
:group :string 200 0 gamma gamma gamma

3.3 Group: log-normal

(def log-normal-dat
  (ds/select-rows data #(= (:group %) "log-normal")))
^kind/vega
(let [dat (ds/rows log-normal-dat :as-maps)
      column-names cols-of-interest]
  {:data   {:values dat}
   :repeat {:column column-names}
   :spec   {:mark     "bar"
            :encoding {:x     {:field {:repeat "column"}
                               :bin   {:steps [1 3]} :type "quantitative"}
                       :y     {:aggregate "count"}
                       :color {:field :group}}}})
(ds/info log-normal-dat)

_unnamed: descriptive-stats [3 12]:

:col-name :datatype :n-valid :n-missing :min :mean :mode :max :standard-deviation :skew :first :last
:x1 :float64 200 0 -5.96223122 -0.98826246 4.59510840 1.99868266 0.16848382 -3.653 -3.305
:x2 :float64 200 0 -30.50059718 -6.18813429 -1.58353073 3.23981886 -2.81709917 -5.231 -4.002
:group :string 200 0 log-normal log-normal log-normal

3.4 Full data

(-> data
    (hanami/plot ht/point-chart
                 {:X "x1" :Y "x2" :COLOR "group"}))
^kind/vega
(let [dat (ds/rows data :as-maps)
      column-names cols-of-interest]
  {:data   {:values dat}
   :repeat {:column column-names}
   :spec   {:mark     "bar"
            :encoding {:x     {:field {:repeat "column"}
                               :bin   {:steps [1 3]} :type "quantitative"}
                       :y     {:aggregate "count"}
                       :color {:field :group}}}})
(comment
  (defn dist-range [dist]
    (-> (apply max dist)
        (-
          (apply min dist)))))
(comment                                                    ; live works, but wont render
  (hanami/hconcat norm-dat {}
                  [(hanami/histogram norm-dat :x1 {:nbins 20})
                   (hanami/histogram norm-dat :x2 {:nbins 20})])

  (hanami/hconcat gamma-dat {}
                  [(hanami/histogram gamma-dat :x1 {:nbins 20})
                   (hanami/histogram gamma-dat :x2 {:nbins 20})])

  (hanami/hconcat log-normal-dat {}
                  [(hanami/histogram log-normal-dat :x1 {:nbins 20})
                   (hanami/histogram log-normal-dat :x2 {:nbins 20})])

  ^kind/vega
  (let [data (ds/rows data :as-maps)]
    {:data     {:values data}
     :mark     "bar"
     :encoding {:x     {:field :x1 :bin {:steps [2 3]} :type "quantitative"}
                :y     {:aggregate "count"}
                :color {:field :group}}}))
source: src/assignment/visualize_data.clj