Hi, I'm unable to load the included umap models. PCA models work.
# write all the super population dimred models for kidd and Seldin
for aisnps_set, df, df_labels in zip(
["kidd", "Seldin"],
[df_kidd_encoded, df_seldin_encoded],
[df_kidd["superpopulation"], df_seldin["superpopulation"]]
):
for algorithm, labels in zip(["pca", "umap", "nca"], [None, None, None, df_labels]):
print(algorithm,aisnps_set,OVERWRITE_MODEL,labels)
df_reduced = dimensionality_reduction(df, algorithm=algorithm, aisnps_set=aisnps_set, overwrite_model=OVERWRITE_MODEL, labels=labels, population_level="super population")
knn_model = train(df_reduced, df_labels, algorithm=algorithm, aisnps_set=aisnps_set, k=9, population_level="superpopulation", overwrite_model=OVERWRITE_MODEL)
2022-08-22 17:16:03.823 | INFO | ezancestry.dimred:dimensionality_reduction:126 - Successfully loaded a dimensionality reduction model
pca kidd False None
umap kidd False None
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
Input In [17], in <cell line: 2>()
7 for algorithm, labels in zip(["pca", "umap", "nca"], [None, None, None, df_labels]):
8 print(algorithm,aisnps_set,OVERWRITE_MODEL,labels)
----> 9 df_reduced = dimensionality_reduction(df, algorithm=algorithm, aisnps_set=aisnps_set, overwrite_model=OVERWRITE_MODEL, labels=labels, population_level="super population")
10 knn_model = train(df_reduced, df_labels, algorithm=algorithm, aisnps_set=aisnps_set, k=9, population_level="superpopulation", overwrite_model=OVERWRITE_MODEL)
File ~/ezancestry/ezancestry/dimred.py:107, in dimensionality_reduction(df, algorithm, aisnps_set, n_components, overwrite_model, labels, population_level, models_directory, random_state)
105 if algorithm in set(["pca", "umap"]):
106 try:
--> 107 reducer = joblib.load(
108 models_directory.joinpath(f"{algorithm}.{aisnps_set}.bin")
109 )
110 except FileNotFoundError:
111 return None
File ~/opt/anaconda3/lib/python3.9/site-packages/joblib/numpy_pickle.py:587, in load(filename, mmap_mode)
581 if isinstance(fobj, str):
582 # if the returned file object is a string, this means we
583 # try to load a pickle file generated with an version of
584 # Joblib so we load it with joblib compatibility function.
585 return load_compatibility(fobj)
--> 587 obj = _unpickle(fobj, filename, mmap_mode)
588 return obj
File ~/opt/anaconda3/lib/python3.9/site-packages/joblib/numpy_pickle.py:506, in _unpickle(fobj, filename, mmap_mode)
504 obj = None
505 try:
--> 506 obj = unpickler.load()
507 if unpickler.compat_mode:
508 warnings.warn("The file '%s' has been generated with a "
509 "joblib version less than 0.10. "
510 "Please regenerate this pickle file."
511 % filename,
512 DeprecationWarning, stacklevel=3)
File ~/opt/anaconda3/lib/python3.9/pickle.py:1212, in _Unpickler.load(self)
1210 raise EOFError
1211 assert isinstance(key, bytes_types)
-> 1212 dispatch[key[0]](self)
1213 except _Stop as stopinst:
1214 return stopinst.value
File ~/opt/anaconda3/lib/python3.9/pickle.py:1589, in _Unpickler.load_reduce(self)
1587 args = stack.pop()
1588 func = stack[-1]
-> 1589 stack[-1] = func(*args)
File ~/opt/anaconda3/lib/python3.9/site-packages/numba/core/serialize.py:97, in _unpickle__CustomPickled(serialized)
92 def _unpickle__CustomPickled(serialized):
93 """standard unpickling for `_CustomPickled`.
94
95 Uses `NumbaPickler` to load.
96 """
---> 97 ctor, states = loads(serialized)
98 return _CustomPickled(ctor, states)
AttributeError: Can't get attribute '_rebuild_function' on <module 'numba.core.serialize' from '/Users/jacksonc08/opt/anaconda3/lib/python3.9/site-packages/numba/core/serialize.py'>
I have tested that it is certainly the UMAP model that is causing the issue.
This gives the same error.
Looking online, it seems to be an issue with the numba package (a dependency of joblib), which no longer includes the _rebuild_function
function. See here.
Do you have any recommendations on how to fix this error? Many thanks.