Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 10 additions & 11 deletions src/ARFFFiles.jl
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,7 @@ end
const CatVal = eltype(CategoricalVector{String,UInt32}(undef, 0))
const CatVec = typeof(CategoricalVector{String,UInt32}(undef, 0))
const CatMissVec = typeof(CategoricalVector{Union{Missing,String},UInt32}(undef, 0))
const CatPool = typeof(CategoricalVector{String,UInt32}(undef, 0).pool)
const CatPool = CategoricalPool{String,UInt32}

"""
ARFFReader
Expand Down Expand Up @@ -1016,11 +1016,10 @@ end
push!(col, str)
elseif kind == :C || kind == :CX
str = Parsing.get_parsed_string(chunk, res)
pool = info
if haskey(pool.invindex, str)
push!(col.refs, get(pool, str))
if str in levels(col)
push!(col, str)
else
error("Invalid nominal $(repr(str)) in column '$(r.colnames[i])' of row $nrows, expecting one of $(join(map(repr, pool.levels), ", ", " or "))")
error("Invalid nominal $(repr(str)) in column '$(r.colnames[i])' of row $nrows, expecting one of $(join(map(repr, levels(col)), ", ", " or ")))")
end
elseif kind == :R || kind == :RX
str = Parsing.get_parsed_string(chunk, res)
Expand All @@ -1044,7 +1043,7 @@ function _zero(::AbstractVector{<:Union{<:AbstractString,Missing}}, r, i, nrows)
""
end
function _zero(::CategoricalVector, r, i, nrows)
r.pools[r.colkindidxs[i]][1]
levels(r.pools[r.colkindidxs[i]])[1]
end
@inline function _readcolumns_pushzero(r, i, col, nrows, avail)
n = length(col)
Expand Down Expand Up @@ -1111,7 +1110,7 @@ write_datum(io::IO, x::Integer) = write_datum(io, convert(BigInt, x))
write_datum(io::IO, x::Real) = write_datum(io, convert(BigFloat, x))
write_datum(io::IO, x::DateTime) = write_datum(io, Dates.format(x, dateformat"YYYY-mm-dd\THH:MM:SS.sss"))
write_datum(io::IO, x::Date) = write_datum(io, DateTime(x))
write_datum(io::IO, x::CategoricalValue{<:AbstractString}) = write_datum(io, x.pool.levels[x.ref])
write_datum(io::IO, x::CategoricalValue{<:AbstractString}) = write_datum(io, String(x))
write_datum(io::IO, ::Missing) = write(io, "?")

@generated function write_data(io::IO, rows, ::Val{N}) where {N}
Expand Down Expand Up @@ -1170,17 +1169,17 @@ function save(io::IO, df;
println(io, "DATE \"yyyy-MM-dd'T'HH:mm:ss.SSS\"")
elseif type <: Union{<:CategoricalValue{<:AbstractString},Missing}
# find the levels of the first non-missing entry
levels = nothing
collevels = nothing
for x in Tables.getcolumn(Tables.columns(df), name)
if x !== missing
levels = x.pool.levels
collevels = levels(x)
break
end
end
if levels === nothing || isempty(levels)
if collevels === nothing || isempty(collevels)
println(io, "{}")
else
for (i, level) in enumerate(levels)
for (i, level) in enumerate(collevels)
print(io, i == 1 ? "{" : ",")
write_datum(io, level)
end
Expand Down
2 changes: 1 addition & 1 deletion test/load.jl
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ end
@test typeof(df[k]) == typeof(case.df[k])
@test isequal(df[k], case.df[k])
if case.df[k] isa CategoricalArray
@test df[k].pool.levels == case.df[k].pool.levels
@test levels(df[k]) == levels(case.df[k])
end
end
@test isequal(df, case.df)
Expand Down
Loading