Giter Site home page Giter Site logo

Comments (16)

rabernat avatar rabernat commented on June 27, 2024 2

I'm interested in this issue. To try to understand it better, I looked at the diff between the ncdump of Release3/nctiles_grid/GRID.0001.nc and Release4/nctiles_grid/ECCO-GRID_00.nc. Is this the right comparison to be making? The results are below

1c1
< netcdf ECCO-GRID_00 {
---
> netcdf GRID.0001 {
3,10c3,6
< 	k_p1 = 51 ;
< 	j_g = 90 ;
< 	i_g = 90 ;
< 	k = 50 ;
< 	j = 90 ;
< 	k_u = 50 ;
< 	i = 90 ;
< 	k_l = 50 ;
---
> 	itxt = 30 ;
> 	i1 = 50 ;
> 	i2 = 90 ;
> 	i3 = 90 ;
12,53c8,26
< 	int64 k_p1(k_p1) ;
< 		k_p1:long_name = "z-dimension of the w grid" ;
< 		k_p1:swap_dim = "Zp1" ;
< 		k_p1:c_grid_axis_shift = -0.5, 0.5 ;
< 		k_p1:axis = "Z" ;
< 	int64 j_g(j_g) ;
< 		j_g:long_name = "y-dimension of the v grid" ;
< 		j_g:swap_dim = "YG" ;
< 		j_g:c_grid_axis_shift = -0.5 ;
< 		j_g:axis = "Y" ;
< 	int64 i_g(i_g) ;
< 		i_g:long_name = "x-dimension of the u grid" ;
< 		i_g:swap_dim = "XG" ;
< 		i_g:c_grid_axis_shift = -0.5 ;
< 		i_g:axis = "X" ;
< 	int64 k(k) ;
< 		k:long_name = "z-dimension of the t grid" ;
< 		k:swap_dim = "Z" ;
< 		k:axis = "Z" ;
< 	int64 j(j) ;
< 		j:long_name = "y-dimension of the t grid" ;
< 		j:swap_dim = "YC" ;
< 		j:axis = "Y" ;
< 	int64 k_u(k_u) ;
< 		k_u:long_name = "z-dimension of the w grid" ;
< 		k_u:swap_dim = "Zu" ;
< 		k_u:c_grid_axis_shift = 0.5 ;
< 		k_u:axis = "Z" ;
< 	int64 i(i) ;
< 		i:long_name = "x-dimension of the t grid" ;
< 		i:swap_dim = "XC" ;
< 		i:axis = "X" ;
< 	int64 k_l(k_l) ;
< 		k_l:long_name = "z-dimension of the w grid" ;
< 		k_l:swap_dim = "Zl" ;
< 		k_l:c_grid_axis_shift = -0.5 ;
< 		k_l:axis = "Z" ;
< 	int64 tile ;
< 		tile:long_name = "index of llc grid tile" ;
< 	float XC(j, i) ;
< 		XC:coordinate = "YC XC" ;
< 		XC:units = "degrees_east" ;
---
> 	double i1(i1) ;
> 		i1:long_name = "array index 1" ;
> 		i1:units = "1" ;
> 	double i2(i2) ;
> 		i2:long_name = "array index 2" ;
> 		i2:units = "1" ;
> 	double i3(i3) ;
> 		i3:long_name = "array index 3" ;
> 		i3:units = "1" ;
> 	double hFacC(i1, i2, i3) ;
> 		hFacC:long_name = "fractional thickness" ;
> 		hFacC:units = "1" ;
> 	double hFacW(i1, i2, i3) ;
> 		hFacW:long_name = "fractional thickness" ;
> 		hFacW:units = "1" ;
> 	double hFacS(i1, i2, i3) ;
> 		hFacS:long_name = "fractional thickness" ;
> 		hFacS:units = "1" ;
> 	double XC(i2, i3) ;
55,57c28,29
< 	float YC(j, i) ;
< 		YC:coordinate = "YC XC" ;
< 		YC:units = "degrees_north" ;
---
> 		XC:units = "degrees_east" ;
> 	double YC(i2, i3) ;
59,61c31,32
< 	float XG(j_g, i_g) ;
< 		XG:coordinate = "YG XG" ;
< 		XG:units = "degrees_east" ;
---
> 		YC:units = "degrees_north" ;
> 	double XG(i2, i3) ;
63,64c34,35
< 	float YG(j_g, i_g) ;
< 		YG:units = "degrees_north" ;
---
> 		XG:units = "degrees_east" ;
> 	double YG(i2, i3) ;
66,104c37,57
< 		YG:coordinates = "YG XG" ;
< 	float CS(j, i) ;
< 		CS:coordinate = "YC XC" ;
< 		CS:units = " " ;
< 		CS:long_name = "AngleCS" ;
< 	float SN(j, i) ;
< 		SN:coordinate = "YC XC" ;
< 		SN:units = " " ;
< 		SN:long_name = "AngleSN" ;
< 	float Zl(k_l) ;
< 		Zl:units = "m" ;
< 		Zl:positive = "down" ;
< 		Zl:long_name = "vertical coordinate of upper cell interface" ;
< 	float Zu(k_u) ;
< 		Zu:units = "m" ;
< 		Zu:positive = "down" ;
< 		Zu:long_name = "vertical coordinate of lower cell interface" ;
< 	float Z(k) ;
< 		Z:units = "m" ;
< 		Z:positive = "down" ;
< 		Z:long_name = "vertical coordinate of cell center" ;
< 	float Zp1(k_p1) ;
< 		Zp1:units = "m" ;
< 		Zp1:positive = "down" ;
< 		Zp1:long_name = "vertical coordinate of cell interface" ;
< 	float dxC(j, i_g) ;
< 		dxC:coordinate = "YC XG" ;
< 		dxC:units = "m" ;
< 		dxC:long_name = "cell x size" ;
< 	float rAs(j_g, i) ;
< 		rAs:units = "m2" ;
< 		rAs:long_name = "cell area" ;
< 		rAs:coordinates = "YG XC" ;
< 	float rAw(j, i_g) ;
< 		rAw:coordinate = "YG XC" ;
< 		rAw:units = "m2" ;
< 		rAw:long_name = "cell area" ;
< 	float Depth(j, i) ;
< 		Depth:coordinate = "XC YC" ;
---
> 		YG:units = "degrees_north" ;
> 	double RAC(i2, i3) ;
> 		RAC:long_name = "grid cell area" ;
> 		RAC:units = "m^2" ;
> 	double RAZ(i2, i3) ;
> 		RAZ:long_name = "grid cell area" ;
> 		RAZ:units = "m^2" ;
> 	double DXC(i2, i3) ;
> 		DXC:long_name = "grid spacing" ;
> 		DXC:units = "m" ;
> 	double DYC(i2, i3) ;
> 		DYC:long_name = "grid spacing" ;
> 		DYC:units = "m" ;
> 	double DXG(i2, i3) ;
> 		DXG:long_name = "grid spacing" ;
> 		DXG:units = "m" ;
> 	double DYG(i2, i3) ;
> 		DYG:long_name = "grid spacing" ;
> 		DYG:units = "m" ;
> 	double Depth(i2, i3) ;
> 		Depth:long_name = "sea floor depth" ;
106,153c59,76
< 		Depth:long_name = "ocean depth" ;
< 	float rA(j, i) ;
< 		rA:coordinate = "YC XC" ;
< 		rA:units = "m2" ;
< 		rA:long_name = "cell area" ;
< 	float dxG(j_g, i) ;
< 		dxG:coordinate = "YG XC" ;
< 		dxG:units = "m" ;
< 		dxG:long_name = "cell x size" ;
< 	float dyG(j, i_g) ;
< 		dyG:coordinate = "YC XG" ;
< 		dyG:units = "m" ;
< 		dyG:long_name = "cell y size" ;
< 	float rAz(j_g, i_g) ;
< 		rAz:coordinate = "YG XG" ;
< 		rAz:units = "m" ;
< 		rAz:long_name = "cell area" ;
< 	float dyC(j_g, i) ;
< 		dyC:coordinate = "YG XC" ;
< 		dyC:units = "m" ;
< 		dyC:long_name = "cell y size" ;
< 	float PHrefC(k) ;
< 		PHrefC:units = "m2 s-2" ;
< 		PHrefC:long_name = "Reference Hydrostatic Pressure" ;
< 	float drC(k_p1) ;
< 		drC:units = "m" ;
< 		drC:long_name = "cell z size" ;
< 	float PHrefF(k_p1) ;
< 		PHrefF:units = "m2 s-2" ;
< 		PHrefF:long_name = "Reference Hydrostatic Pressure" ;
< 	float drF(k) ;
< 		drF:units = "m" ;
< 		drF:long_name = "cell z size" ;
< 	float hFacS(k, j_g, i) ;
< 		hFacS:long_name = "vertical fraction of open cell" ;
< 	float hFacC(k, j, i) ;
< 		hFacC:long_name = "vertical fraction of open cell" ;
< 	float hFacW(k, j, i_g) ;
< 		hFacW:long_name = "vertical fraction of open cell" ;
< 	byte maskW(k, j, i_g) ;
< 		maskW:long_name = "mask denoting wet point at interface" ;
< 		maskW:dtype = "bool" ;
< 	byte maskC(k, j, i) ;
< 		maskC:long_name = "mask denoting wet point at center" ;
< 		maskC:dtype = "bool" ;
< 	byte maskS(k, j_g, i) ;
< 		maskS:long_name = "mask denoting wet point at interface" ;
< 		maskS:dtype = "bool" ;
---
> 	double AngleCS(i2, i3) ;
> 		AngleCS:long_name = "grid orientation (cosine)" ;
> 		AngleCS:units = "m" ;
> 	double AngleSN(i2, i3) ;
> 		AngleSN:long_name = "grid orientation (sine)" ;
> 		AngleSN:units = "m" ;
> 	double RC(i1) ;
> 		RC:long_name = "vertical coordinate" ;
> 		RC:units = "m" ;
> 	double RF(i1) ;
> 		RF:long_name = "vertical coordinate" ;
> 		RF:units = "m" ;
> 	double DRC(i1) ;
> 		DRC:long_name = "grid spacing" ;
> 		DRC:units = "m" ;
> 	double DRF(i1) ;
> 		DRF:long_name = "grid spacing" ;
> 		DRF:units = "m" ;
156,158c79,107
< 		:date_created = "Mon Dec 30 11:13:26 2019" ;
< 		:title = "ECCOv4 MITgcm grid information" ;
< 		:coordinates = "hFacS rAs hFacW Zp1 XC YC XG YG hFacC dxG Depth rA CS drC Z drF Zl dxC maskW maskS PHrefC rAw SN tile Zu PHrefF dyG rAz maskC dyC" ;
---
> 		:description = "C-grid parameters (see MITgcm documentation for details). -- ECCO v4 ocean state estimate, release 3 -- 1992-2015" ;
> 		:A = ":Format      = native grid (nctiles w. 13 tiles)" ;
> 		:B = ":source      = ECCO consortium (http://ecco-group.org/)" ;
> 		:C = ":institution = JPL/UT/MIT/AER" ;
> 		:D = ":history     = files revision history :" ;
> 		:E = "                   04/20/2017: fill in geometry info for blank tiles. \\n" ;
> 		:F = "                   11/06/2016: third release of ECCO v4 (Ou Wang) \\n" ;
> 		:G = "               estimates revision history (from second release) : \\n" ;
> 		:H = "                   employs bi-harmonic viscosity (enhanced near lands), revised \\n" ;
> 		:I = "                   sea-ice parameters, updated or novel observations (including \\n" ;
> 		:J = "                   GRACE OBP, Aquarius SSS, global mean scalar SSH & OBP \\n" ;
> 		:K = "                   time-series, extended and/or expanded in situ TS profiles), \\n" ;
> 		:L = "                   revised weights including data and control with factors \\n" ;
> 		:M = "                   to account for grid-size variation and sampling frequency, \\n" ;
> 		:N = "                   separate time-mean and time-variable data constraint \\n" ;
> 		:O = "                   and controls, sea-ice costs, and initial U, V, and eta as \\n" ;
> 		:P = "                   additional controls.\\n " ;
> 		:Q = ":references  = Fukumori, I., O. Wang, I. Fenty, G. Forget, P. Heimbach, and R. M. Ponte, 2017: \\n" ;
> 		:R = "                ECCO Version 4 Release 3, http://hdl.handle.net/1721.1/110380, doi:1721.1/110380.\\n" ;
> 		:S = "                Available at ftp://ecco.jpl.nasa.gov/Version4/Release3/doc/v4r3_estimation_synopsis.pdf \\n" ;
> 		:T = "               Forget, G., J.-M. Campin, P. Heimbach, C. N. Hill, R. M. Ponte, \\n" ;
> 		:U = "                and C. Wunsch, 2015: ECCO version 4: an integrated framework for \\n" ;
> 		:V = "                non-linear inverse modeling and global ocean state estimation. \\n" ;
> 		:W = "                Geoscientific Model Development, 8, 3071-3104, doi:10.5194/gmd-8-3071-2015 \\n" ;
> 		:date = "21-Apr-2017" ;
> 		:Conventions = "CF-1.6" ;
> 		:_FillValue = NaN ;
> 		:missing_value = NaN ;
> 		:program = "file created using gcmfaces_IO/write2nctiles.m \\n" ;

It appears that the Release 4 grid files have been reformatted to use the xmitgcm conventions. As @gaelforget notes, the xmitgcm conventions were derived directly from the pkg/mnc conventions in MITgcm. The goal was to achieve as much compatibility between the mnc output netcdf files and the "virtual" netcdf files created by xmitgcm.

The original Release 3 netCDF files used rather different conventions and were certainly quirky in some respects. However, I definitely side with @gaelforget's central point that changes to file conventions between versions /releases should be avoided at all costs, since this can break downstream processing code (in any language). If such changes must be made, the proper way to do it is with extensive documentation, automated testing, plus a deprecation cycle. For example, release 4 could include both old and new file formats, plus a message that the next release (e.g. release 5) will include only the new format. This would give people time to adapt their tools.

From the point of view of xmitgcm, afaik, none of this has anything to do with xmitgcm. Xmitgcm is for reading MDS files, not netCDF files. If one wants to rename variables / dimensions after reading, that's trivial to do with xarray.

from ecco-v4-configurations.

gaelforget avatar gaelforget commented on June 27, 2024 2

fewer distinct dimension names

@rabernat , I feel this is a fair point but maybe one that has already been resolved in gcmfaces -- since v1.3.0 (late 2018) to be specific. This is how the case of THETA generated with the uptodate gcmafecs should differ from v4r2 / r3 :

<       i3 = 90 ;
<       i4 = 90 ;
<       i2 = 50 ;
<       i1 = 12 ;
---
>       t = 12 ;
>       k_c = 50 ;
>       j_c = 30 ;
>       i_c = 30 ;

(the tile sizes are different but that's beside the point here)

A complete set is available in https://www.dropbox.com/sh/7c0rx3hbw80guh9/AADjAX8M-5nPzyYoDOzMKDzxa?dl=0 for example if you'd like to look things over more closely. For UVELMASS e.g. the dimensions are now named as follows:

	k_c = 50 ;
	j_w = 30 ;
	i_w = 30 ;

etc

This coordinate naming convention differs slightly from the one used in v4r4 but seems equivalent to me - no?

One design difference is that I favored more familiar / expressive names for the coordinate variables (lon, lat, dep). Indeed, associating lon, lat, dep with i_c, j_c, k_c vs i_w, j_w, k_c etc is equally distinctive (no need to do it both via coord and var names) and it just seems unnecessary to carry over obscure variable names like YG, XG.

This should work with all diagnostics and maintain compatibility with the standard analysis, which has proven a useful way to compare releases in consistent fashion over time, without requiring any additional work.

I did not try to regenerate the nctiles_grid/ files though. While gcmfaces should work just the same for those, the issue here is that generating more grid files has just led to confusion -- so I am not sure we should go down that road anyway.

Don't hesitate to post an issue on https://github.com/MITgcm/gcmfaces/issues though if something seems lacking of is not working in the current gcmfaces tough.

from ecco-v4-configurations.

timothyas avatar timothyas commented on June 27, 2024 1

Thanks for the flag @gaelforget, I have missed all of these conversations since I'm not really involved with the netcdf generation. However, since I was involved with some of the python stuff, I'll try to respond where I can.

I don’t immediately understand / recall why there was a need for a new grid folder. My guess is that it may have had to do with overly specific python libraries not being able to handle the standard nctiles_grid or something like that but I am nit sure.

I'm actually not sure why it needed to change either, and if you're referring to the name change and adding the single ECCO-GRID.nc file, this is not required by any python package. I'm also not sure why maskC/W/S are not there - they should be! The only thing that changed for python packages was meta information (I'll comment on that below).

why and when does one need to worry about using either new chunked version (ECCO-GRID_*.nc) vs or the new ECCO-GRID.nc?

It was my assumption that ECCO-GRID.nc is the same, just with all variables wrapped into one file just as a convenience.

Were any modifications made to the actual grid variables (hfacc etc) that users need to be aware of / should not overlook?

Not that I know of.

why and how did we change various naming conventions for variables & dimensions, and where is the documentation of these changes?

I'm only familiar with the conventions that changed for the meta information/attributes, which was for consistency with python packages. The change was to make it so that a user would see the same attributes whether they read in ECCO output from the netcdf product on the web or from a binary file if they reran it and used xmitgcm. For instance the variable DXC has the attributes:

standard_name : cell_x_size_at_u_location
long_name : cell x size
units : m
coordinate : YC XG

Is this what you're referring to? Is this causing problems with gcmfaces? Sorry, I'm just catching up with all of this!

Either way, I agree that this needs to be smooth for users no matter which release they are looking at and no matter which tool they are using. Something I can do is try to add gcmfaces grid_load.m to the ecco_v4_py testing suite which I'm revamping in my free time ... If this function is the one being used to generate the ecco release, then (1) there are some easy fixes, such as changing the naming convention to how it was before and (2) I'll look into testing netcdf files created by this function with gcmfaces with travis so that we can catch these issues sooner.

Lastly, I agree with @gaelforget's suggestions to updating the READMEs. Additionally, I wonder if the grid files can simply be remade with the old GRID.XXXX.nc format, and with maskC/W/S included. Would this be possible/helpful?

from ecco-v4-configurations.

gaelforget avatar gaelforget commented on June 27, 2024 1

Many thanks @timothyas for this super fast and very useful reponse. Btw, just in case my posts are confusing, I am not suggesting that you are responsible for any of this or for fixing it either. Your 4 cents, your willingness to spend time on this, or in fact contribute to MITgcm etc, are greatly appreciated.

I have not messed with the mnc package

Probably a good move imho since the original dev has long moved on and mnc has been kind of an orphan package ever since

  1. I must be missing the issue as to how adding a little bit of meta information has made the ECCO product incompatible with gcmfaces, and it's on my todo list to get to the bottom of it. ...

Not sure I understand the part related to adding meta data but that's ok. In any event, I do my best to alleviate these sort of issues in gcmfaces when they emerge or even refactor gcmfaces when that seems useful + doable in the little time that I can afford to spend on it these days.

Along those lines, I revised the chunking / tiling order to match mitgcm's (only matters with ntiles>13 for llc), modified dimension names at some point based on @rabernat 's recommendations, and with @owang01 's help we have already accommodated (I think) the v4r4 reformatting choice as far pkg/diagnostics output is concerned.

At the moment, though, I remain perplexed by why we would need to add support for these reformatted versions of nctiles_grid/ which mostly seem to complicate things for ECCO users. It's not your doing or mine but maybe @ifenty and @owang01 will be able to clarify / resolve some of these issues.

  1. MITgcm/xmitgcm#197 I don't see how this PR is related to any of these issues. ...

What you are doing in MITgcm/xmitgcm#197 sounds like it might help a lot of people. Please dont let this distract you from that.

Also don't worry too much about the older issue I brought up. While I do see relations between what happened then and what's happening now, maybe I should not have mentioned it at all in second thought.

As far as linking to MITgcm/xmitgcm#197 -- I just noticed it earlier as I was looking through https://github.com/orgs/MITgcm/dashboard and to me it seemed related at first glance. Plus others involved over there might not follow threads over here. I don't generally follow any of these threads closely myself but when someone links me in then I try to take a look. Again though, maybe I should have refrained from linking the two issues.

ps. I dont really have a sense of how consistent eccov4py and xmitgcm are with one another. Full disclosure: I dont think I have ever used either one directly outside of re-running someone else's notebook here or there. And, taking a closer look at the various docs which could provide useful guidance for the Julia stuff remains my to do list...

pps. appologies if sometimes I read and write these posts too quickly and end up generating more confusion. I just have very little time to spend on something like this and can only try to help the best I can in the time I have

from ecco-v4-configurations.

gaelforget avatar gaelforget commented on June 27, 2024 1

Thanks @rabernat for joining the conversation and for your very agreeable and practical (imho) suggestion. The time you took to do this is well appreciated on my end for sure.

Sorry about the confusion wrt xmitgcm -- I should probably to be more knowledgeable about it at this point... But the fact that you were interested enough to jump in makes me feel better about linking issues.

B.t.w., I was hoping that renaming dimensions on the fly was not an issue with xarray (or no longer an issue?) but I was unsure, so I am glad that you made that point explicitly. Thanks

from ecco-v4-configurations.

rabernat avatar rabernat commented on June 27, 2024 1

I think the biggest challenge in renaming dimensions lies in the fact that the Release 3 file conventions use fewer distinct dimension names than the Release 4 / mnc / xmitgcm conventions. This is evident at the top of my diff, i.e.

1c1
< netcdf ECCO-GRID_00 {
---
> netcdf GRID.0001 {
3,10c3,6
< 	k_p1 = 51 ;
< 	j_g = 90 ;
< 	i_g = 90 ;
< 	k = 50 ;
< 	j = 90 ;
< 	k_u = 50 ;
< 	i = 90 ;
< 	k_l = 50 ;
---
> 	itxt = 30 ;
> 	i1 = 50 ;
> 	i2 = 90 ;
> 	i3 = 90 ;

The R3 conventions only have i1, i2, and i3: they do not distinguish between cell center, cell face, etc, as the R4 / mnc conventions do. So, for example, with the R4 / mnc conventions, rA and rAz have different dimensions

	float rA(j, i) ;
		rA:coordinate = "YC XC" ;
		rA:units = "m2" ;
		rA:long_name = "cell area" ;
	float rAz(j_g, i_g) ;
		rAz:coordinate = "YG XG" ;
		rAz:units = "m" ;
		rAz:long_name = "cell area" ;

while in R3 they have the same:

	double RAC(i2, i3) ;
		RAC:long_name = "grid cell area" ;
		RAC:units = "m^2" ;
	double RAZ(i2, i3) ;
		RAZ:long_name = "grid cell area" ;
		RAZ:units = "m^2" ;

For processing purposes, we certainly find it useful to label these points with different dimensions. Xarray and many other netCDF processing libraries assume that variables with the same dimensions are aligned and can thus be added / multiplied, etc. without any interpolation.

But this means it's not as simple as renaming i2 --> j. You have to figure out, based on other metadata, whether to pick j or j_g. I personally favor the R4 / mnc conventions because they are more explicit. It's easy to go back to the R3 conventions from R4 (j --> i2 and j_g --> i2), but harder to do the reverse.

from ecco-v4-configurations.

gaelforget avatar gaelforget commented on June 27, 2024

Arghh… there seems to be more of this type of issues out there:

Below I am reposting my email response to the latter here just in case others are wondering about this and not monitoring the Ecco support mailing list

from ecco-v4-configurations.

gaelforget avatar gaelforget commented on June 27, 2024

I don’t immediately understand / recall why there was a need for a new grid folder. My guess is that it may have had to do with overly specific python libraries not being able to handle the standard nctiles_grid or something like that but I am nit sure.

Could you guys at JPL please clarify a few things for us:

  • why and when does one need to worry about using either the refomated version of nctiles_grid or the original?
  • why and when does one need to worry about using either new chunked version (ECCO-GRID_*.nc) vs or the new ECCO-GRID.nc?
  • Were any modifications made to the actual grid variables (hfacc etc) that users need to be aware of / should not overlook?
  • why and how did we change various naming conventions for variables & dimensions, and where is the documentation of these changes?

Sorry if I overlooked some of the details up to this point but I feel that users should not be left wondering about such things, and that’s why the embedded READMEs are so important in my mind. As I look more closely at the READMEs currently in the release4 folder, I am starting to be more and more concerned that issues like Ivana’s might emerge for either new or returning users more in the future.

One obvious issue is that READMEs across the Release4 folder (at least some of them) seem to incorrectly point users to old versions of gcmfaces found in the MIT opendap/ecco_for_las/ server. The READMEs should instead point to the official gcmfaces repository & the (con)current software version, right?

But more generally, the READMEs currently in the JPL release4/ folder might be creating sources of potential confusion or even frustration for returning or new ECCOv4 users due to the recent reformatings. I have not looked at UT’s servers but assume they're synced with JPL's

My sense is that all of the READMEs (probably from release1 to release4 and beyond) might now need to point users to a webpage with details about the reformatting and its implications returning or new users.

I might be able to update the ones on MIT's ftp site but will need to know which webpage to point to for details about the reformatting (a version controlled GitHub page might be best so we don’t have to go back to the READMEs later e.g. if you decided to reformat again at some point) and someone else will need to take care of the JPL and UT server sides.

Should we aim to do something like that?

from ecco-v4-configurations.

gaelforget avatar gaelforget commented on June 27, 2024

linking @owang01 and @timothyas here just in case

from ecco-v4-configurations.

gaelforget avatar gaelforget commented on June 27, 2024

Thanks @timothyas for the very helpful post & clarifications. A few thoughts in response:

Lastly, I agree with @gaelforget's suggestions to updating the READMEs. Additionally, I wonder if the grid files can simply be remade with the old GRID.XXXX.nc format, and with maskC/W/S included. Would this be possible/helpful?

Thanks for supporting the notion that more work is needed on the READMEs. We'll have to wait and see what @ifenty @owang01 et al want to do about this. Hopefully JPL is willing to put in the extra work that seems needed to help ECCO's users here.

I am also glad for your comment on renaming etc. I just sent another post to ecco-support with more about this, which I will repost below. The addition of maskC/W/S could make sense but is probably less of a priority for me for two reasons: 1) it's easily derived from hFacC; 2) it's not an output from write_grid.F (cause of #1 I think).

Here I link MITgcm/xmitgcm#197 as it seems related to all this.

The change was to make it so that a user would see the same attributes whether they read in ECCO output from the netcdf product on the web or from a binary file if they reran it and used xmitgcm.

That's sort of what I thought I remembered. Too bad -- a small patch in xmitgcm to rename a few variables on the fly would probably have been sufficient but instead the issue has now morphed & grown into making ECCO users' life harder as it seems.

As a side note, this reminded me of the confusion that might have stemmed from the pkg/mnc devs deciding, way back when, to get creative with variable names, adding an extra column or row to arrays, etc for apparently no good reason and somewhat inconsistently.

https://github.com/MITgcm/MITgcm/commits/master/model/src/write_grid.F

I got curious and went back to the first commit in the history of write_grid.F. You might get a kick out of this -- it's very creation seem to have come from the very sort of issues we are still struggling with today!!

I got a pretty good laugh out of the May 13 commit message too for example. But aside from how language / rhetoric has generally become softer, I see a lot of parallels with today over there. Maybe none of this would occur today if the cooler voice had prevailed back then too.

@jm-c please feel free to correct me if needed -- you have a deeper understanding of all of this than any of us and probably more foresight too.

ps. At one point or another in the 20+ years commit time line of mitgcm, it's likely that many amongst us (myself for example) went for undue creativity that's made others work harder down the road. No-one should feel guilty here, I just felt that highlighting how deep these issues can go, and how long they can affect us (today and in the future) could be useful. Enough said...

from ecco-v4-configurations.

gaelforget avatar gaelforget commented on June 27, 2024

Here is my repost from ecco-support:

Why not just (1) make sure that nctiles_grid/ in r3 and r4 is the same as in r2 to avoid disrupting returning users, (2) move the other GRID* stuff to a separate folder to avoid confusing new users, and (3) document in your READMEs the circumstances when each grid folder should be used?

Ultimately it’s obviously up you guys at JPL to decide on what you want to do to resolve this situation. I am just worried we might be losing more users until JPL remedies the confusion that’s emerged from the reformatting. The added documentation in READMEs is probably needed regardless of anything else.

Hope this helps.

from ecco-v4-configurations.

timothyas avatar timothyas commented on June 27, 2024

Thanks for the history @gaelforget! I have not messed with the mnc package, for instance, but yes it seems some of these standards are challenging. I'm certainly only a padawan when it comes to these issues. However, I just want to add my two cents on 2 issues (so, four cents?) you mentioned.

That's sort of what I thought I remembered. Too bad -- a small patch in xmitgcm to rename a few variables on the fly would probably have been sufficient but instead the issue has now morphed & grown into making ECCO users' life harder as it seems.

  1. I must be missing the issue as to how adding a little bit of meta information has made the ECCO product incompatible with gcmfaces, and it's on my todo list to get to the bottom of it. Variables in xarray can easily be renamed on the fly, and xmitgcm is not a very specific tool, it just transforms MITgcm binaries into xarray with some extra meta information, so I will have to dig into this a little bit more to totally understand the issue. As an extensive user who has benefited massively from both gcmfaces and xmitgcm, I don't see any reason why they would be mutually exclusive. Perhaps it is merely because of the way things are implemented in eccov4py, but I'll get back to you all on that.

  2. MITgcm/xmitgcm#197 I don't see how this PR is related to any of these issues. That PR simply allows users to access the grid variables (MITgcm binary output) for LLC4320 or its cousins on the web portal using xmitgcm e.g. in a jupyter notebook on the fly. Basically users don't have to download the files, they just write a one liner in a jupyter notebook and they can access the files. Fun stuff, but I don't see that it's related here. (The issue we were running into is simply that not all files are available on the web portal).

from ecco-v4-configurations.

rabernat avatar rabernat commented on June 27, 2024

Ok thanks for the update. I had not seen the v1.3.0 format yet. I agree they are functionally equivalent, modulo the actual dimension name.

Another general point: we try to avoid having processing software rely on specific dimension names and instead rely on metadata to figure out what dimensions / variables we want. This is only plausible thanks to CF conventions, which are, imo, very well thought-through and comprehensive. There are some gaps regarding details of how you represent grids, but we are working on that (see e.g. cf-convention/discuss#5).

from ecco-v4-configurations.

gaelforget avatar gaelforget commented on June 27, 2024

Ok thanks for the update. I had not seen the v1.3.0 format yet. I agree they are functionally equivalent, modulo the actual dimension name.

Another general point: we try to avoid having processing software rely on specific dimension names and instead rely on metadata to figure out what dimensions / variables we want. This is only plausible thanks to CF conventions, which are, imo, very well thought-through and comprehensive. There are some gaps regarding details of how you represent grids, but we are working on that (see e.g. cf-convention/discuss#5).

Agreed and good to know about the next gen discussion. So far "CF-1.6" has seemed sufficient to me but I am open revisiting that aspect in gcmfaces at some point (assuming that's indeed justified). Not entirely sure what post processing code has been used in r4v4 btw.

from ecco-v4-configurations.

ifenty avatar ifenty commented on June 27, 2024

There are a lot of good questions brought up in theses posts @gaelforget , and I know I won't be addressing all of them but here are some reponses. (Also, apologies for not replying sooner -- I only noticed this chain now, I blame... Covid?).

New dimensions were added to the netCDF files to 1) clarify where fields are with respect to the Arakawa C grid and 2) allow compatiblility with xgcm and 3) to be consistent with xmitgcm. xmitgcm is used as part of the workflow to generate the netCDF files from mds and some xgcm routines are used in ecco-v4-py.

When xmitgcm loads mds files it applies descriptions to each variable based on the contents of "available diagnostics". Many of those variables descriptions are misleading, incomplete, or at worst incorrect for fields distributed in the native grid format (e.g., UVEL) that I decided that we needed to thoroughly reexamine and potentially rewrite every one that we include as part of ECCOv4 products. One day maybe issue #248 will be resolved. In the interim we'll use these.

If you are curious, the current set of variable metadata is available in the "ECCO-ACCESS" github repo (here in ECCO-GROUP). Take a look at the files inside ECCO-ACCESS/metadata/ECCOv4r4_metadata_json/, and in particular

ECCO Version 4 Release 4 Product and Variable Metadata v10-20200707 - native_grid_variables.json

and

ECCO Version 4 Release 4 Product and Variable Metadata v10-20200707 - latlon_grid_variables.json

These json files contain the new variable descriptions for variables that will be distributed on the native and latlon grids. @owang01 and I worked on these in the 2019. They include edits from a few people who responded to our request (email to ecco-telecon mailing list on 11/25 for feedback. I put all of these on github so that people can help us continue to improve them and to make it easier for us to track changes.

With PO.DAAC poised to distribute our ECCO fields in both lat-lon and native format we have some new constraints such as only providing one file per time level (what they call a 'granule'). One granule per time level means that we cannot separate fields by 'tile' and we cannot group more than one time level together. Interestingly, they do allow more than one field to be included in a single granule. In fact, they insist on it. So of the 80+ fields, there wil lbe about 20 or so groupings.

PODAAC also has much stricter requirements on the use of fill values (no more nans) and high level 'granule' metadata on the lower-level variable metadata (standard_name, units). These changes don't affect the values of the fields themelves (except in the case of the fill_values).

On the topic of backwards compatibility, we can probably solve all of these issues with appropriate 'wrappers' on existing tools (e.g., gcmfaces) that work on the front end to make old files look like new files or vice versa. I believe @owang01 contributed to a wrapper for gcmfaces backward compatibility.

Ryan's @rabernat suggestion for automatic testing is a good one. Like the MITgcm verification experiments, ECCO tool verification experiments could be made to ensure they work on different ECCO versions. Users could provide a 'version/release' argument when loading fields with different tools so that the tools use the correct wrapper.

Another idea is to regnerate earlier v4 releases using the new netCDF conventions. If we did, then we would be able to operate with them using xgcm, xmitgcm, and ecco-v4-py (and presumably gcmfaces with the new wrapper) . In addition, if they were regenerated using the new conventions, PO.DAAC could host/archive them alongside the newer v4 releases.

With respect to the concern that users will be confused with endless changes: I don't actually forsee very much changing in the near term regarding the naming of dimensions or coordinates in the netCDF files. The transition from having the spatial dimensions named i1, i2, i3, to i,j,i_g, j_g, k_u, k_l, etc. was big, but that transition has is complete. I mentioned that the biggest change is going to be the grouping of two or more fields together in each granule to satisfy PODAAC. Although grouping fields together is unconventional (at least for us), I am not really worried about it because

  1. People will be still be able to access individual fields through OPeNDAP
  2. netCDF libraries can only read fields that are needed and tools like xarray + Dask only read fields when they are needed
  3. For cloud computing our netCDF granules will probably be converted to another format anyway (e.g., zarr @rabernat).

from ecco-v4-configurations.

ifenty avatar ifenty commented on June 27, 2024

As an update, all of the metadata that we used for the PO.DAAC distribution of V4 R4 is available here: https://github.com/ECCO-GROUP/ECCO-ACCESS/tree/master/metadata/ECCOv4r4_metadata_json

from ecco-v4-configurations.

Related Issues (20)

Recommend Projects

  • React photo React

    A declarative, efficient, and flexible JavaScript library for building user interfaces.

  • Vue.js photo Vue.js

    🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.

  • Typescript photo Typescript

    TypeScript is a superset of JavaScript that compiles to clean JavaScript output.

  • TensorFlow photo TensorFlow

    An Open Source Machine Learning Framework for Everyone

  • Django photo Django

    The Web framework for perfectionists with deadlines.

  • D3 photo D3

    Bring data to life with SVG, Canvas and HTML. 📊📈🎉

Recommend Topics

  • javascript

    JavaScript (JS) is a lightweight interpreted programming language with first-class functions.

  • web

    Some thing interesting about web. New door for the world.

  • server

    A server is a program made to process requests and deliver data to clients.

  • Machine learning

    Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.

  • Game

    Some thing interesting about game, make everyone happy.

Recommend Org

  • Facebook photo Facebook

    We are working to build community through open source technology. NB: members must have two-factor auth.

  • Microsoft photo Microsoft

    Open source projects and samples from Microsoft.

  • Google photo Google

    Google ❤️ Open Source for everyone.

  • D3 photo D3

    Data-Driven Documents codes.