diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 0000000..181d745 --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,18 @@ +cff-version: 1.2.0 +message: "If you use this software, please cite it as below." +authors: + - family-names: "Ramos-Carreño" + given-names: "Carlos" + orcid: "https://orcid.org/0000-0003-2566-7058" + affiliation: "Universidad Autónoma de Madrid" + email: vnmabus@gmail.com +title: "rdata: Read R datasets from Python" +date-released: 2022-03-24 +url: "https://github.com/vnmabus/rdata" +license: MIT +keywords: + - rdata + - Python + - R + - parser + - conversion \ No newline at end of file diff --git a/MANIFEST.in b/MANIFEST.in index 56e0267..4e06f8e 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,5 +1,5 @@ include MANIFEST.in -include VERSION +include rdata/VERSION include LICENSE include rdata/py.typed include *.txt \ No newline at end of file diff --git a/VERSION b/VERSION deleted file mode 100644 index 490f510..0000000 --- a/VERSION +++ /dev/null @@ -1 +0,0 @@ -0.6 \ No newline at end of file diff --git a/rdata/VERSION b/rdata/VERSION new file mode 100644 index 0000000..0e2c939 --- /dev/null +++ b/rdata/VERSION @@ -0,0 +1 @@ +0.7 \ No newline at end of file diff --git a/rdata/__init__.py b/rdata/__init__.py index 90d9fa6..c83f931 100644 --- a/rdata/__init__.py +++ b/rdata/__init__.py @@ -1,3 +1,5 @@ +"""rdata: Read R datasets from Python.""" +import errno as _errno import os as _os import pathlib as _pathlib @@ -13,3 +15,15 @@ def _get_test_data_path() -> _pathlib.Path: Path of the test data. """ + +try: + with open( + _pathlib.Path(_os.path.dirname(__file__)) / 'VERSION', + 'r', + ) as version_file: + __version__ = version_file.read().strip() +except IOError as e: + if e.errno != _errno.ENOENT: + raise + + __version__ = "0.0" diff --git a/rdata/conversion/_conversion.py b/rdata/conversion/_conversion.py index 460e237..8c9c217 100644 --- a/rdata/conversion/_conversion.py +++ b/rdata/conversion/_conversion.py @@ -6,7 +6,6 @@ Any, Callable, ChainMap, - Hashable, List, Mapping, MutableMapping, @@ -355,12 +354,16 @@ def convert_array( dimnames = attrs.get('dimnames') if dimnames: - dimension_names = ["dim_" + str(i) for i, _ in enumerate(dimnames)] - coords: Mapping[Hashable, Any] = { - dimension_names[i]: d - for i, d in enumerate(dimnames) - if d is not None - } + if isinstance(dimnames, Mapping): + dimension_names = list(dimnames.keys()) + coords = dimnames + else: + dimension_names = [f"dim_{i}" for i, _ in enumerate(dimnames)] + coords = { + dimension_names[i]: d + for i, d in enumerate(dimnames) + if d is not None + } value = xarray.DataArray(value, dims=dimension_names, coords=coords) diff --git a/rdata/tests/data/test_full_named_matrix.rda b/rdata/tests/data/test_full_named_matrix.rda new file mode 100644 index 0000000..1b20735 Binary files /dev/null and b/rdata/tests/data/test_full_named_matrix.rda differ diff --git a/rdata/tests/data/test_half_named_matrix.rda b/rdata/tests/data/test_half_named_matrix.rda new file mode 100644 index 0000000..557a765 Binary files /dev/null and b/rdata/tests/data/test_half_named_matrix.rda differ diff --git a/rdata/tests/data/test_named_matrix.rda b/rdata/tests/data/test_named_matrix.rda new file mode 100644 index 0000000..401391e Binary files /dev/null and b/rdata/tests/data/test_named_matrix.rda differ diff --git a/rdata/tests/test_rdata.py b/rdata/tests/test_rdata.py index 1f4f57a..2b99cdf 100644 --- a/rdata/tests/test_rdata.py +++ b/rdata/tests/test_rdata.py @@ -8,8 +8,8 @@ import numpy as np import pandas as pd - import rdata +import xarray TESTDATA_PATH = rdata.TESTDATA_PATH @@ -93,6 +93,74 @@ def test_matrix(self) -> None: ]), }) + def test_named_matrix(self) -> None: + """Test that a named matrix can be parsed.""" + parsed = rdata.parser.parse_file( + TESTDATA_PATH / "test_named_matrix.rda", + ) + converted = rdata.conversion.convert(parsed) + reference = xarray.DataArray( + [ + [1.0, 2.0, 3.0], + [4.0, 5.0, 6.0], + ], + dims=["dim_0", "dim_1"], + coords={ + "dim_0": ["dim0_0", "dim0_1"], + "dim_1": ["dim1_0", "dim1_1", "dim1_2"], + }, + ) + + xarray.testing.assert_identical( + converted["test_named_matrix"], + reference, + ) + + def test_half_named_matrix(self) -> None: + """Test that a named matrix with no name for a dim can be parsed.""" + parsed = rdata.parser.parse_file( + TESTDATA_PATH / "test_half_named_matrix.rda", + ) + converted = rdata.conversion.convert(parsed) + reference = xarray.DataArray( + [ + [1.0, 2.0, 3.0], + [4.0, 5.0, 6.0], + ], + dims=["dim_0", "dim_1"], + coords={ + "dim_0": ["dim0_0", "dim0_1"], + }, + ) + + xarray.testing.assert_identical( + converted["test_half_named_matrix"], + reference, + ) + + def test_full_named_matrix(self) -> None: + """Test that a named matrix with dim names can be parsed.""" + parsed = rdata.parser.parse_file( + TESTDATA_PATH / "test_full_named_matrix.rda", + ) + converted = rdata.conversion.convert(parsed) + reference = xarray.DataArray( + [ + [1.0, 2.0, 3.0], + [4.0, 5.0, 6.0], + ], + dims=["my_dim_0", "my_dim_1"], + coords={ + "my_dim_0": ["dim0_0", "dim0_1"], + "my_dim_1": ["dim1_0", "dim1_1", "dim1_2"], + }, + ) + + xarray.testing.assert_identical( + converted["test_full_named_matrix"], + reference, + ) + def test_list(self) -> None: """Test that list can be parsed.""" parsed = rdata.parser.parse_file(TESTDATA_PATH / "test_list.rda") diff --git a/setup.py b/setup.py index ff0d6d5..2e8bfe4 100644 --- a/setup.py +++ b/setup.py @@ -7,6 +7,7 @@ language or its libraries, and thus it is released under a MIT license. """ import os +import pathlib import sys from setuptools import find_packages, setup @@ -16,44 +17,51 @@ DOCLINES = (__doc__ or '').split("\n") -with open(os.path.join(os.path.dirname(__file__), - 'VERSION'), 'r') as version_file: +with open( + pathlib.Path(os.path.dirname(__file__)) / 'rdata' / 'VERSION', + 'r', +) as version_file: version = version_file.read().strip() -setup(name='rdata', - version=version, - description=DOCLINES[1], - long_description="\n".join(DOCLINES[3:]), - url='https://github.com/vnmabus/rdata', - author='Carlos Ramos Carreño', - author_email='vnmabus@gmail.com', - include_package_data=True, - platforms=['any'], - license='MIT', - packages=find_packages(), - python_requires='>=3.7, <4', - classifiers=[ - 'Development Status :: 4 - Beta', - 'Intended Audience :: Developers', - 'Intended Audience :: Science/Research', - 'License :: OSI Approved :: MIT License', - 'Natural Language :: English', - 'Operating System :: OS Independent', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - 'Topic :: Scientific/Engineering :: Mathematics', - 'Topic :: Software Development :: Libraries :: Python Modules', - 'Typing :: Typed', - ], - keywords=['rdata', 'r', 'dataset'], - install_requires=['numpy', - 'xarray', - 'pandas'], - setup_requires=pytest_runner, - tests_require=['pytest-cov', - 'numpy>=1.14' # The printing format for numpy changes - ], - test_suite='rdata.tests', - zip_safe=False) +setup( + name='rdata', + version=version, + description=DOCLINES[1], + long_description="\n".join(DOCLINES[3:]), + url='https://github.com/vnmabus/rdata', + author='Carlos Ramos Carreño', + author_email='vnmabus@gmail.com', + include_package_data=True, + platforms=['any'], + license='MIT', + packages=find_packages(), + python_requires='>=3.7, <4', + classifiers=[ + 'Development Status :: 4 - Beta', + 'Intended Audience :: Developers', + 'Intended Audience :: Science/Research', + 'License :: OSI Approved :: MIT License', + 'Natural Language :: English', + 'Operating System :: OS Independent', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + 'Topic :: Scientific/Engineering :: Mathematics', + 'Topic :: Software Development :: Libraries :: Python Modules', + 'Typing :: Typed', + ], + keywords=['rdata', 'r', 'dataset'], + install_requires=[ + 'numpy', + 'xarray', + 'pandas', + ], + setup_requires=pytest_runner, + tests_require=[ + 'pytest-cov', + 'numpy>=1.14', # The printing format for numpy changes + ], + test_suite='rdata.tests', + zip_safe=False, +)