Skip to content

Commit

Permalink
Merge branch 'dev' into devPaso
Browse files Browse the repository at this point in the history
  • Loading branch information
PasoStudio73 committed Nov 20, 2024
2 parents 92a782a + 9ce30fc commit 2ac77d3
Show file tree
Hide file tree
Showing 10 changed files with 155 additions and 149 deletions.
1 change: 1 addition & 0 deletions .JuliaFormatter.toml
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
style = "sciml"
trailing_comma = true
2 changes: 1 addition & 1 deletion .cirrus.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
freebsd_instance:
image_family: freebsd-14-0
image_family: freebsd-14-1
task:
name: FreeBSD
artifacts_cache:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Set up Julia 1.9.0
- name: Set up Julia 1.9
uses: julia-actions/setup-julia@v1
with:
version: "1.9.0"
version: "1.9"
- uses: julia-actions/julia-buildpkg@v1
- uses: julia-actions/julia-runtest@v1
- uses: julia-actions/julia-processcoverage@v1
Expand Down
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
<div align="center"><a href="https://github.com/aclai-lab/Sole.jl"><img src="logo.png" alt="" title="This package is part of Sole.jl" width="200"></a></div>

# SoleBase.jl

<!-- - Base layer for [*Sole.jl*](https://github.com/aclai-lab/Sole.jl) -->

[![Stable](https://img.shields.io/badge/docs-stable-blue.svg)](https://aclai-lab.github.io/SoleBase.jl/)
[![Dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://aclai-lab.github.io/SoleBase.jl/dev)
[![Build Status](https://api.cirrus-ci.com/github/aclai-lab/SoleBase.jl.svg?branch=main)](https://cirrus-ci.com/github/aclai-lab/SoleBase.jl)
[![Coverage](https://codecov.io/gh/aclai-lab/SoleBase.jl/branch/main/graph/badge.svg?token=LT9IYIYNFI)](https://codecov.io/gh/aclai-lab/SoleBase.jl)

<!-- [![Code Style: Blue](https://img.shields.io/badge/code%20style-blue-4495d1.svg)](https://github.com/invenia/BlueStyle) -->

This package lays the foundations for [*Sole.jl*](https://github.com/aclai-lab/Sole.jl), an open-source framework for *symbolic machine learning*.
This package lays the foundations for [_Sole.jl_](https://github.com/aclai-lab/Sole.jl), an open-source framework for _symbolic machine learning_.

## About

Expand Down
39 changes: 22 additions & 17 deletions src/SoleBase.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@

module SoleBase


export AbstractDataset
export humansize
export ninstances
Expand All @@ -16,11 +15,19 @@ export moving_window, movingwindow, movingaverage
Abstract supertype for all datasets.
# Interface
- [`concatdatasets`](@ref)
- [`instances`](@ref)
- [`ninstances`](@ref)
- [`eachinstance`](@ref)
# Utility functions
- [`slicedataset`](@ref)
See also [`ninstances`](@ref).
"""
abstract type AbstractDataset end


_doc_slicedataset = """
slicedataset(
dataset::D,
Expand All @@ -46,11 +53,11 @@ provide the following method:

"""$(_doc_slicedataset)"""
function slicedataset(
dataset::D,
dataset_slice::Union{Colon,Integer,AbstractVector,Tuple};
allow_no_instances = false,
return_view = false,
kwargs...,
dataset::D,
dataset_slice::Union{Colon, Integer, AbstractVector, Tuple};
allow_no_instances = false,
return_view = false,
kwargs...,
) where {D}
if dataset_slice isa Colon
return deepcopy(dataset)
Expand All @@ -60,8 +67,8 @@ function slicedataset(
return error("Cannot slice dataset with slice of type $(eltype(dataset_slice))")
end
if !(allow_no_instances ||
(!(dataset_slice isa Union{AbstractVector{<:Integer},Tuple{<:Integer}}) ||
length(dataset_slice) > 0))
(!(dataset_slice isa Union{AbstractVector{<:Integer}, Tuple{<:Integer}}) ||
length(dataset_slice) > 0))
return error("Cannot apply empty slice to dataset.")
end
return instances(dataset, dataset_slice, Val(return_view); kwargs...)
Expand All @@ -70,21 +77,20 @@ end

function concatdatasets(datasets::D...) where {D}
return error("`concatdatasets` method not implemented for type "
* string(typejoin(typeof.(datasets)...))) * "."
* string(typejoin(typeof.(datasets)...))) * "."
end

"""$(_doc_slicedataset)"""
function instances(
dataset::D,
inds::AbstractVector,
return_view::Union{Val{true},Val{false}};
kwargs...
dataset::D,
inds::AbstractVector,
return_view::Union{Val{true}, Val{false}};
kwargs...,
) where {D}
return error("`instances` method not implemented for type "
* string(typeof(dataset))) * "."
* string(typeof(dataset))) * "."
end


# -------------------------------------------------------------
# AbstractDataset - ninstances

Expand Down Expand Up @@ -128,5 +134,4 @@ include("movingwindow.jl")

@deprecate moving_window(args...; kwargs...) movingwindow(args...; kwargs...)


end
52 changes: 26 additions & 26 deletions src/machine-learning-utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,21 +14,21 @@ Types for supervised machine learning labels (classification and regression).
"""$(doc_supervised_ml)"""
const CLabel = Union{AbstractString, CategoricalValue, Integer} # TODO it is improper to consider Integer's as categorical labels. Should actually be RLabel's, and CategoricalValue's should be used, instead. However, atm some algorithms still rely on Int's labels to be intended as indices of a vector of class names...
"""$(doc_supervised_ml)"""
const RLabel = AbstractFloat # Real
const RLabel = AbstractFloat # TODO: Real
"""$(doc_supervised_ml)"""
const Label = Union{CLabel,RLabel}
const Label = Union{CLabel, RLabel}

# Raw labels
const _CLabel = Integer # (classification labels are internally represented as integers)
const _Label = Union{_CLabel,RLabel}
const _Label = Union{_CLabel, RLabel}

############################################################################################

# Convert a list of labels to categorical form
Base.@propagate_inbounds @inline function get_categorical_form(Y::AbstractVector)
class_names = unique(Y)

dict = Dict{eltype(Y),Int64}()
dict = Dict{eltype(Y), Int64}()
@simd for i in 1:length(class_names)
@inbounds dict[class_names[i]] = i
end
Expand Down Expand Up @@ -62,16 +62,16 @@ See also
[`Label`](@ref).
"""
function bestguess(
labels::AbstractVector{<:Label},
weights::Union{Nothing,AbstractVector} = nothing;
suppress_parity_warning = false,
labels::AbstractVector{<:Label},
weights::Union{Nothing, AbstractVector} = nothing;
suppress_parity_warning = false,
) end

# Classification: (weighted) majority vote
function bestguess(
labels::AbstractVector{<:CLabel},
weights::Union{Nothing,AbstractVector} = nothing;
suppress_parity_warning = false,
labels::AbstractVector{<:CLabel},
weights::Union{Nothing, AbstractVector} = nothing;
suppress_parity_warning = false,
)
if length(labels) == 0
return nothing
Expand All @@ -82,33 +82,33 @@ function bestguess(
# return StatsBase.mode(labels) ..?
countmap(labels)
else
@assert length(labels) === length(weights) "Cannot compute " *
"best guess with mismatching number of votes " *
"$(length(labels)) and weights $(length(weights))."
@assert length(labels)===length(weights) "Cannot compute "*
"best guess with mismatching number of votes "*
"$(length(labels)) and weights $(length(weights))."
countmap(labels, weights)
end
end

if !suppress_parity_warning && sum(counts[argmax(counts)] .== values(counts)) > 1
@warn "Parity encountered in bestguess! " *
"counts ($(length(labels)) elements): $(counts), " *
"argmax: $(argmax(counts)), " *
"max: $(counts[argmax(counts)]) (sum = $(sum(values(counts))))"
"counts ($(length(labels)) elements): $(counts), " *
"argmax: $(argmax(counts)), " *
"max: $(counts[argmax(counts)]) (sum = $(sum(values(counts))))"
end
argmax(counts)
end

# Regression: (weighted) mean (or other central tendency measure?)
function bestguess(
labels::AbstractVector{<:RLabel},
weights::Union{Nothing,AbstractVector} = nothing;
suppress_parity_warning = false,
labels::AbstractVector{<:RLabel},
weights::Union{Nothing, AbstractVector} = nothing;
suppress_parity_warning = false,
)
if length(labels) == 0
return nothing
end

(isnothing(weights) ? StatsBase.mean(labels) : sum(labels .* weights)/sum(weights))
(isnothing(weights) ? StatsBase.mean(labels) : sum(labels .* weights) / sum(weights))
end

############################################################################################
Expand All @@ -130,22 +130,22 @@ default_weights(Y::AbstractVector) = default_weights(length(Y))
Return a class-rebalancing weight vector, given a label vector `Y`.
"""
function balanced_weights(Y::AbstractVector{L}) where {L<:CLabel}
function balanced_weights(Y::AbstractVector{L}) where {L <: CLabel}
class_counts_dict = countmap(Y)
if length(unique(values(class_counts_dict))) == 1 # balanced case
default_weights(length(Y))
else
# Assign weights in such a way that the dataset becomes balanced
tot = sum(values(class_counts_dict))
balanced_tot_per_class = tot/length(class_counts_dict)
weights_map = Dict{L,Float64}([class => (balanced_tot_per_class/n_instances)
for (class,n_instances) in class_counts_dict])
balanced_tot_per_class = tot / length(class_counts_dict)
weights_map = Dict{L, Float64}([class => (balanced_tot_per_class / n_instances)
for (class, n_instances) in class_counts_dict])
W = [weights_map[y] for y in Y]
W ./ sum(W)
end
end

slice_weights(W::Ones{Int64}, inds::AbstractVector) = default_weights(length(inds))
slice_weights(W::Any, inds::AbstractVector) = @view W[inds]
slice_weights(W::Any, inds::AbstractVector) = @view W[inds]
slice_weights(W::Ones{Int64}, i::Integer) = 1
slice_weights(W::Any, i::Integer) = W[i]
slice_weights(W::Any, i::Integer) = W[i]
Loading

0 comments on commit 2ac77d3

Please sign in to comment.