Skip to content

Commit 224fafe

Browse files
fangqclaude
andcommitted
[jdata4] add JData Draft 4 features and fix cross-platform prefix detection
New features in jdataencode.m: - _ArrayShape_ "range" encoding for uniform vectors (formatversion>=4) - _ArrayShape_ "zero"/"identity" for zero/scaled-identity matrices - _ArrayFillValue_ support - _ArrayCoords_/_ArrayUnits_ annotation via jdict attributes - _TableIndex_/_TableSortOrder_ for table encoding - _DataSchema_ emission from jdict.schema (formatversion>=4) - _EnumKey_/_EnumValue_/_EnumOrdered_ for categorical arrays - _ArrayChunks_ chunked compression (formatversion>=4) - float32/float64 ArrayType aliases in jdatadecode Bug fixes: - jdataencode: use getschema() instead of .schema to bypass jdict subsref - jdataencode: allow range/fast-path for small arrays when compression is set - jdataencode: transpose categories() column to row for flat JSON array - jdatadecode: split chained (:)' subscript to fix MATLAB<R2019b syntax error - jdatadecode: preserve char enum keys without num2str conversion - jdatadecode: auto-detect x0x5F_ vs _ field prefix from struct field names (fixes cross-platform decode of MATLAB-created structs in Octave) Tests: - add jdata4 test section in run_jsonlab_test.m covering all Draft 4 features - all tests pass in both MATLAB R2024b and Octave Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 7b38ce6 commit 224fafe

3 files changed

Lines changed: 649 additions & 44 deletions

File tree

jdatadecode.m

Lines changed: 197 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,11 @@
66
% (accepts JData objects loaded from either loadjson/loadubjson or
77
% jsondecode for MATLAB R2016b or later)
88
%
9-
% This function implements the JData Specification Draft 3 (Jun. 2020)
9+
% This function implements the JData Specification Draft 3 (Jun. 2020) and
10+
% select keywords from Draft 4 (Apr. 2026): _ArrayShape_ types range/identity/zero/
11+
% circulant/hankel, _ArrayFillValue_, _ArrayCoords_, _ArrayUnits_, _TableIndex_,
12+
% _TableSortOrder_, _DataSchema_, _EnumKey_/_EnumValue_/_EnumOrdered_,
13+
% _ArrayChunks_, and ArrayType aliases float16/32/64.
1014
% see http://github.com/NeuroJSON/jdata for details
1115
%
1216
% authors:Qianqian Fang (q.fang <at> neu.edu)
@@ -85,6 +89,23 @@
8589
else
8690
prefix = jsonopt('Prefix', 'x0x5F', opt);
8791
end
92+
93+
% Auto-detect prefix from struct field names when the user has not explicitly
94+
% specified one. This handles cross-platform structs, e.g. a struct built with
95+
% x0x5F_-prefixed names in MATLAB and then decoded by Octave's jdatadecode
96+
% (or the reverse direction).
97+
if ~isfield(opt, 'Prefix') && ~isfield(opt, 'prefix') && isstruct(data) && numel(data) >= 1
98+
fnames = fieldnames(data(1));
99+
if ~isempty(fnames)
100+
has_x0x5F = any(cellfun(@(f) numel(f) > 6 && strncmp(f, 'x0x5F_', 6) && f(end) == '_', fnames));
101+
has_empty = any(cellfun(@(f) numel(f) > 2 && f(1) == '_' && f(end) == '_', fnames));
102+
if has_x0x5F
103+
prefix = 'x0x5F';
104+
elseif has_empty
105+
prefix = '';
106+
end
107+
end
108+
end
88109
opt.prefix_ = prefix;
89110

90111
% Pre-compute all prefixed field names
@@ -110,6 +131,16 @@
110131
opt.N_ByteStream_ = [prefix '_ByteStream_'];
111132
opt.N_DataInfo_ = [prefix '_DataInfo_'];
112133
opt.N_DataLink_ = [prefix '_DataLink_'];
134+
opt.N_ArrayCoords_ = [prefix '_ArrayCoords_'];
135+
opt.N_ArrayUnits_ = [prefix '_ArrayUnits_'];
136+
opt.N_ArrayFillValue_ = [prefix '_ArrayFillValue_'];
137+
opt.N_ArrayChunks_ = [prefix '_ArrayChunks_'];
138+
opt.N_TableIndex_ = [prefix '_TableIndex_'];
139+
opt.N_TableSortOrder_ = [prefix '_TableSortOrder_'];
140+
opt.N_DataSchema_ = [prefix '_DataSchema_'];
141+
opt.N_EnumKey_ = [prefix '_EnumKey_'];
142+
opt.N_EnumValue_ = [prefix '_EnumValue_'];
143+
opt.N_EnumOrdered_ = [prefix '_EnumOrdered_'];
113144
end
114145

115146
%% process non-structure inputs
@@ -149,6 +180,16 @@
149180
opt.N_ArrayShape_ = 'x_ArrayShape_';
150181
opt.N_ArrayOrder_ = 'x_ArrayOrder_';
151182
opt.N_ArrayLabel_ = 'x_ArrayLabel_';
183+
opt.N_ArrayCoords_ = 'x_ArrayCoords_';
184+
opt.N_ArrayUnits_ = 'x_ArrayUnits_';
185+
opt.N_ArrayFillValue_ = 'x_ArrayFillValue_';
186+
opt.N_ArrayChunks_ = 'x_ArrayChunks_';
187+
opt.N_TableIndex_ = 'x_TableIndex_';
188+
opt.N_TableSortOrder_ = 'x_TableSortOrder_';
189+
opt.N_DataSchema_ = 'x_DataSchema_';
190+
opt.N_EnumKey_ = 'x_EnumKey_';
191+
opt.N_EnumValue_ = 'x_EnumValue_';
192+
opt.N_EnumOrdered_ = 'x_EnumOrdered_';
152193
end
153194

154195
%% recursively process subfields
@@ -176,6 +217,12 @@
176217
N_ArrayShape = opt.N_ArrayShape_;
177218
N_ArrayOrder = opt.N_ArrayOrder_;
178219
N_ArrayLabel = opt.N_ArrayLabel_;
220+
N_ArrayFillValue = opt.N_ArrayFillValue_;
221+
N_ArrayCoords = opt.N_ArrayCoords_;
222+
N_ArrayUnits = opt.N_ArrayUnits_;
223+
N_ArrayChunks = opt.N_ArrayChunks_;
224+
N_TableIndex = opt.N_TableIndex_;
225+
N_TableSortOrder = opt.N_TableSortOrder_;
179226

180227
%% handle array data
181228
if (isfield(data, N_ArrayType) && (isfield(data, N_ArrayData) || (isfield(data, N_ArrayZipData) && ~isstruct(data.(N_ArrayZipData)))))
@@ -201,13 +248,35 @@
201248
else
202249
decompfun = str2func([zipmethod 'decode']);
203250
end
204-
arraytype = data(j).(N_ArrayType);
251+
arraytype = char(data(j).(N_ArrayType));
252+
switch arraytype % normalize float16/32/64 aliases (JData spec)
253+
case 'float16', arraytype = 'half';
254+
case 'float32', arraytype = 'single';
255+
case 'float64', arraytype = 'double';
256+
end
205257
chartype = 0;
206258
if (strcmp(arraytype, 'char') || strcmp(arraytype, 'logical'))
207259
chartype = 1;
208260
arraytype = 'uint8';
209261
end
210-
if (needbase64 && strcmp(zipmethod, 'base64') == 0)
262+
if (isfield(data, N_ArrayChunks) && iscell(data(j).(N_ArrayZipData)))
263+
% Chunked decode: reassemble all chunk payloads into flat buffer
264+
chunks_cell = data(j).(N_ArrayZipData);
265+
nchunks = numel(chunks_cell);
266+
allbytes = uint8([]);
267+
for ci = 1:nchunks
268+
chunkblob = uint8(chunks_cell{ci}(:)');
269+
if (needbase64 && ~strcmp(zipmethod, 'base64'))
270+
chunkblob = base64decode(chunkblob);
271+
end
272+
tmpchunk = decompfun(chunkblob, decodeparam{:});
273+
allbytes = [allbytes, tmpchunk(:)'];
274+
end
275+
ndata = typecast(allbytes, arraytype);
276+
% Update ZipSize so the downstream reshape block is a no-op
277+
data(j).(N_ArrayZipSize) = [1, numel(ndata)];
278+
dims = data(j).(N_ArrayZipSize);
279+
elseif (needbase64 && strcmp(zipmethod, 'base64') == 0)
211280
ndata = reshape(typecast(decompfun(base64decode(data(j).(N_ArrayZipData)), decodeparam{:}), arraytype), dims);
212281
else
213282
ndata = reshape(typecast(decompfun(data(j).(N_ArrayZipData), decodeparam{:}), arraytype), dims);
@@ -228,7 +297,13 @@
228297
if (iscell(data(j).(N_ArrayData)))
229298
data(j).(N_ArrayData) = cell2mat(cellfun(@(x) double(x(:)), data(j).(N_ArrayData), 'uniformoutput', 0)).';
230299
end
231-
ndata = cast(data(j).(N_ArrayData), char(data(j).(N_ArrayType)));
300+
arrtype = char(data(j).(N_ArrayType));
301+
switch arrtype % normalize float16/32/64 aliases (JData spec)
302+
case 'float16', arrtype = 'half';
303+
case 'float32', arrtype = 'single';
304+
case 'float64', arrtype = 'double';
305+
end
306+
ndata = cast(data(j).(N_ArrayData), arrtype);
232307
end
233308
if (isfield(data, N_ArrayZipSize))
234309
if (isstruct(data(j).(N_ArrayZipSize)))
@@ -337,6 +412,12 @@
337412
shapeid = {shapeid};
338413
end
339414
arraydata = double(arraydata).';
415+
arraytypestr = char(data(j).(N_ArrayType));
416+
switch arraytypestr % normalize float16/32/64 aliases (JData spec)
417+
case 'float16', arraytypestr = 'half';
418+
case 'float32', arraytypestr = 'single';
419+
case 'float64', arraytypestr = 'double';
420+
end
340421
if (strcmpi(shapeid{1}, 'diag'))
341422
ndata = spdiags(arraydata(:), 0, arraysize(1), arraysize(2));
342423
elseif (strcmpi(shapeid{1}, 'upper') || strcmpi(shapeid{1}, 'uppersymm'))
@@ -381,9 +462,44 @@
381462
elseif (strcmpi(shapeid{1}, 'toeplitz'))
382463
arraydata = reshape(arraydata, flipud(datasize(:))');
383464
ndata = toeplitz(arraydata(1:arraysize(1), 2), arraydata(1:arraysize(2), 1));
465+
elseif (strcmpi(shapeid{1}, 'identity'))
466+
ndata = cast(double(arraydata(1)) * eye(arraysize(1)), arraytypestr);
467+
elseif (strcmpi(shapeid{1}, 'zero'))
468+
ndata = cast(zeros(arraysize), arraytypestr);
469+
elseif (strcmpi(shapeid{1}, 'circulant'))
470+
c = arraydata(:);
471+
n = length(c);
472+
if (n > 1)
473+
colidx = [1; (n:-1:2)']; % first-column index: [1, n, n-1, ..., 2]
474+
ndata = cast(toeplitz(c(colidx), c.'), arraytypestr);
475+
else
476+
ndata = cast(c(1), arraytypestr);
477+
end
478+
elseif (strcmpi(shapeid{1}, 'hankel'))
479+
m = arraysize(1);
480+
n = arraysize(2);
481+
first_row = arraydata(1:n, 1).';
482+
last_col = arraydata(1:m, 2).';
483+
h = [first_row, last_col(2:end)];
484+
hc = h(1:m);
485+
hr = h(m:m + n - 1);
486+
ndata = cast(hankel(hc, hr), arraytypestr);
487+
elseif (strcmpi(shapeid{1}, 'range'))
488+
if (size(arraydata, 2) == 1)
489+
% 1-D range: _ArrayData_ = [start; end] (2x1 after .')
490+
ndata = cast(reshape(linspace(double(arraydata(1)), double(arraydata(2)), prod(arraysize)), arraysize), arraytypestr);
491+
else
492+
% N-D separable grid: _ArrayData_ is 2 x ndim after .'
493+
ndim = length(arraysize);
494+
axes = cell(1, ndim);
495+
for idim = 1:ndim
496+
axes{idim} = cast(linspace(double(arraydata(1, idim)), double(arraydata(2, idim)), arraysize(idim)), arraytypestr);
497+
end
498+
ndata = axes;
499+
end
384500
end
385501
if (opt.fullarrayshape_ && issparse(ndata))
386-
ndata = cast(full(ndata), data(j).(N_ArrayType));
502+
ndata = cast(full(ndata), arraytypestr);
387503
end
388504
elseif (isfield(data, N_ArraySize))
389505
if (isstruct(data(j).(N_ArraySize)))
@@ -407,6 +523,12 @@
407523
ndata = permute(ndata, ndims(ndata):-1:1);
408524
end
409525
end
526+
if (isfield(data, N_ArrayFillValue) && ~isempty(data(j).(N_ArrayFillValue)) && isnumeric(ndata))
527+
fillval = double(data(j).(N_ArrayFillValue));
528+
if (isfloat(ndata))
529+
ndata(ndata == fillval) = NaN;
530+
end
531+
end
410532
newdata{j} = ndata;
411533
end
412534
if (len == 1)
@@ -416,6 +538,17 @@
416538
newdata = jdict(newdata);
417539
newdata.setattr('dims', data(j).(N_ArrayLabel));
418540
end
541+
if (isfield(data, N_ArrayCoords) || isfield(data, N_ArrayUnits))
542+
if (~isa(newdata, 'jdict'))
543+
newdata = jdict(newdata);
544+
end
545+
if (isfield(data, N_ArrayCoords))
546+
newdata.setattr('coords', data(j).(N_ArrayCoords));
547+
end
548+
if (isfield(data, N_ArrayUnits))
549+
newdata.setattr('units', data(j).(N_ArrayUnits));
550+
end
551+
end
419552
end
420553

421554
%% handle table data
@@ -449,6 +582,18 @@
449582
if (isfield(data(j), N_TableCols) && ~isempty(data(j).(N_TableCols)))
450583
newdata{j}.Properties.VariableNames = data(j).(N_TableCols);
451584
end
585+
if (isfield(data(j), N_TableIndex) && ~isempty(data(j).(N_TableIndex)))
586+
if (isempty(newdata{j}.Properties.UserData))
587+
newdata{j}.Properties.UserData = struct;
588+
end
589+
newdata{j}.Properties.UserData.TableIndex = data(j).(N_TableIndex);
590+
end
591+
if (isfield(data(j), N_TableSortOrder) && ~isempty(data(j).(N_TableSortOrder)))
592+
if (isempty(newdata{j}.Properties.UserData))
593+
newdata{j}.Properties.UserData = struct;
594+
end
595+
newdata{j}.Properties.UserData.TableSortOrder = data(j).(N_TableSortOrder);
596+
end
452597
end
453598
if (len == 1)
454599
newdata = newdata{1};
@@ -474,6 +619,44 @@
474619
end
475620
end
476621

622+
%% handle enum data (_EnumKey_ / _EnumValue_ / _EnumOrdered_)
623+
N_EnumKey = opt.N_EnumKey_;
624+
N_EnumValue = opt.N_EnumValue_;
625+
if (isfield(data, N_EnumKey) && isfield(data, N_EnumValue))
626+
newdata = cell(len, 1);
627+
N_EnumOrdered = opt.N_EnumOrdered_;
628+
for j = 1:len
629+
enumkeys = data(j).(N_EnumKey);
630+
if (~iscell(enumkeys))
631+
enumkeys = num2cell(enumkeys);
632+
end
633+
% ensure all keys are char strings (categorical requires string categories)
634+
for ki = 1:numel(enumkeys)
635+
ek = enumkeys{ki};
636+
if (iscell(ek) && numel(ek) == 1)
637+
ek = ek{1};
638+
end
639+
if (ischar(ek) || isa(ek, 'string'))
640+
enumkeys{ki} = char(ek);
641+
else
642+
enumkeys{ki} = num2str(ek);
643+
end
644+
end
645+
enumvals = double(data(j).(N_EnumValue));
646+
origsize = size(enumvals);
647+
valueset = 1:numel(enumkeys);
648+
isordered = isfield(data, N_EnumOrdered) && data(j).(N_EnumOrdered);
649+
if (isordered)
650+
newdata{j} = reshape(categorical(enumvals(:)', valueset, enumkeys, 'Ordinal', true), origsize);
651+
else
652+
newdata{j} = reshape(categorical(enumvals(:)', valueset, enumkeys), origsize);
653+
end
654+
end
655+
if (len == 1)
656+
newdata = newdata{1};
657+
end
658+
end
659+
477660
%% handle graph data
478661
N_GraphNodes = opt.N_GraphNodes_;
479662
if (isfield(data, N_GraphNodes) && exist('graph', 'file') && exist('digraph', 'file'))
@@ -570,4 +753,13 @@
570753
end
571754
end
572755

756+
%% handle data schema
757+
N_DataSchema = opt.N_DataSchema_;
758+
if (isfield(data, N_DataSchema) && ~isempty(data.(N_DataSchema)))
759+
if (~isa(newdata, 'jdict'))
760+
newdata = jdict(newdata);
761+
end
762+
newdata.setschema(data.(N_DataSchema));
763+
end
764+
573765
end

0 commit comments

Comments
 (0)