From 3a6664d4d39d7c08ed59e7c591f2a186e012f0c8 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Thu, 5 Sep 2024 17:13:05 +0300
Subject: [PATCH 001/100] Add reference type to unparser

---
 rdata/unparser/_unparser.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/rdata/unparser/_unparser.py b/rdata/unparser/_unparser.py
index 7361b65..1524317 100644
--- a/rdata/unparser/_unparser.py
+++ b/rdata/unparser/_unparser.py
@@ -98,8 +98,9 @@ def unparse_r_object(self, obj: RObject) -> None:  # noqa: C901, PLR0912
         # Unparse data
         value = obj.value
         if info.type in {
-           RObjectType.NIL,
-           RObjectType.NILVALUE,
+            RObjectType.NIL,
+            RObjectType.NILVALUE,
+            RObjectType.REF,
         }:
             # These types don't have any data
             assert value is None

From 153f80368fd2097bcdaf8de6a1e2cffb60938ddc Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Thu, 5 Sep 2024 17:13:53 +0300
Subject: [PATCH 002/100] Add draft dataframe conversion

---
 rdata/conversion/to_r.py | 107 +++++++++++++++++++++++++++++++++++----
 1 file changed, 98 insertions(+), 9 deletions(-)

diff --git a/rdata/conversion/to_r.py b/rdata/conversion/to_r.py
index 39df39e..7fc4c86 100644
--- a/rdata/conversion/to_r.py
+++ b/rdata/conversion/to_r.py
@@ -7,6 +7,7 @@
 from typing import TYPE_CHECKING
 
 import numpy as np
+import pandas as pd
 
 from rdata.parser import (
     CharFlags,
@@ -49,6 +50,23 @@ def __call__(self, data: Any, *, encoding: Encoding) -> RObject: # noqa: ANN401
 })
 
 
+def find_is_object(attributes: RObject | None):
+    if attributes is None:
+        return False
+    info = attributes.info
+    if info.type != RObjectType.LIST:
+        return False
+    if not info.tag:
+        return False
+    tag = attributes.tag
+    if tag.info.type == RObjectType.REF:
+        tag = tag.referenced_object
+    if (tag.info.type == RObjectType.SYM
+        and tag.value.value == b"class"):
+        return True
+    return find_is_object(attributes.value[1])
+
+
 def build_r_object(
         r_type: RObjectType,
         *,
@@ -56,6 +74,7 @@ def build_r_object(
         attributes: RObject | None = None,
         tag: RObject | None = None,
         gp: int = 0,
+        reference: int = 0,
 ) -> RObject:
     """
     Build R object.
@@ -66,6 +85,7 @@ def build_r_object(
         attributes: Same as in RObject.
         tag: Same as in RObject.
         gp: Same as in RObjectInfo.
+        reference: Same as in RObjectInfo.
 
     Returns:
         R object.
@@ -75,19 +95,24 @@ def build_r_object(
         RObjectInfo
     """
     assert r_type is not None
+    if reference == 0:
+        assert reference_name_list[reference] is None
+    else:
+        assert r_type == RObjectType.REF
+    is_object = find_is_object(attributes)
     return RObject(
         RObjectInfo(
             r_type,
-            object=False,
+            object=is_object,
             attributes=attributes is not None,
             tag=tag is not None,
             gp=gp,
-            reference=0,
+            reference=reference,
          ),
          value,
          attributes,
          tag,
-         None,
+         reference_obj_list[reference],
      )
 
 
@@ -138,8 +163,13 @@ def build_r_list(
         )
 
 
+# XXX global lists
+reference_name_list = [None]
+reference_obj_list = [None]
+
+
 def build_r_sym(
-        data: str,
+        name: str,
         *,
         encoding: Encoding,
 ) -> RObject:
@@ -147,15 +177,26 @@ def build_r_sym(
     Build R object representing symbol.
 
     Args:
-        data: String.
-        encoding: Encoding to be used for strings within data.
+        name: String.
+        encoding: Encoding to be used for the name.
 
     Returns:
         R object.
     """
-    r_type = RObjectType.SYM
-    r_value = convert_to_r_object(data.encode(encoding), encoding=encoding)
-    return build_r_object(r_type, value=r_value)
+    # Reference to existing symbol if exists
+    if name in reference_name_list:
+        # XXX can any symbol be referenced???
+        reference = reference_name_list.index(name)
+        return build_r_object(RObjectType.REF, reference=reference)
+
+    # Create a new symbol
+    r_value = convert_to_r_object(name.encode(encoding), encoding=encoding)
+    r_object = build_r_object(RObjectType.SYM, value=r_value)
+
+    # Add to reference list
+    reference_name_list.append(name)
+    reference_obj_list.append(r_object)
+    return r_object
 
 
 def build_r_data(
@@ -336,6 +377,54 @@ def convert_to_r_object(  # noqa: C901, PLR0912, PLR0915
             raise ValueError(msg)
         r_value = data
 
+    elif isinstance(data, pd.Series):
+        array = data.array
+        if isinstance(array, pd.Categorical):
+            return convert_to_r_object(array, encoding=encoding)
+        elif isinstance(array, pd.arrays.IntegerArray):
+            return convert_to_r_object(data.to_numpy(), encoding=encoding)
+        else:
+            msg = f"pd.Series {type(array)} not implemented"
+            raise NotImplementedError(msg)
+
+    elif isinstance(data, pd.Categorical):
+        r_type = RObjectType.INT
+        r_value = data.codes + 1
+        attributes = build_r_list({
+            "levels": np.asarray(list(data.categories)),
+            "class": "factor",
+            },
+            encoding=encoding)
+
+    elif isinstance(data, pd.DataFrame):
+        r_type = RObjectType.VEC
+        names = []
+        r_value = []
+        for column, series in data.items():
+            names.append(column)
+            r_value.append(convert_to_r_object(series, encoding=encoding))
+
+        index = data.index
+        if (isinstance(index, pd.RangeIndex)
+            and index.start == 1
+            and index.stop == data.shape[0] + 1
+            and index.step == 1
+            ):
+            row_names = np.ma.array(
+                    data=[0, -data.shape[0]],
+                    mask=[True, False],
+                )
+        else:
+            msg = f"pd.DataFrame index {type(index)} not implemented"
+            raise NotImplementedError(msg)
+
+        attributes = build_r_list({
+            "names": np.asarray(names),
+            "row.names": row_names,
+            "class": "data.frame",
+            },
+            encoding=encoding)
+
     else:
         msg = f"type {type(data)} not implemented"
         raise NotImplementedError(msg)

From 45575593818c56cd099bd5edeec3b46a137d058f Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Thu, 5 Sep 2024 17:51:43 +0300
Subject: [PATCH 003/100] Add helper function for creating unicode arrays

---
 rdata/conversion/to_r.py | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/rdata/conversion/to_r.py b/rdata/conversion/to_r.py
index 7fc4c86..08aef77 100644
--- a/rdata/conversion/to_r.py
+++ b/rdata/conversion/to_r.py
@@ -25,6 +25,8 @@
 )
 
 if TYPE_CHECKING:
+    import numpy.typing as npt
+
     from collections.abc import Mapping
     from typing import Any, Final, Literal, Protocol
 
@@ -50,6 +52,21 @@ def __call__(self, data: Any, *, encoding: Encoding) -> RObject: # noqa: ANN401
 })
 
 
+def create_unicode_array(
+        names: Any,
+) -> npt.NDArray[Any]:
+    """
+    Create unicode array from sequence/iterator of strings.
+
+    Args:
+        names: Strings.
+
+    Returns:
+        Array.
+    """
+    return np.array(list(names), dtype=np.dtype("U"))
+
+
 def find_is_object(attributes: RObject | None):
     if attributes is None:
         return False
@@ -311,7 +328,7 @@ def convert_to_r_object(  # noqa: C901, PLR0912, PLR0915
         r_value = [convert_to_r_object(el, encoding=encoding) for el in values]
 
         if isinstance(data, dict):
-            names = np.array(list(data.keys()), dtype=np.dtype("U"))
+            names = create_unicode_array(data.keys())
             attributes = build_r_list({"names": names},
                                       encoding=encoding)
 
@@ -391,7 +408,7 @@ def convert_to_r_object(  # noqa: C901, PLR0912, PLR0915
         r_type = RObjectType.INT
         r_value = data.codes + 1
         attributes = build_r_list({
-            "levels": np.asarray(list(data.categories)),
+            "levels": create_unicode_array(data.categories),
             "class": "factor",
             },
             encoding=encoding)
@@ -410,7 +427,7 @@ def convert_to_r_object(  # noqa: C901, PLR0912, PLR0915
             and index.stop == data.shape[0] + 1
             and index.step == 1
             ):
-            row_names = np.ma.array(
+            row_names = np.ma.array(  # type: ignore [no-untyped-call]
                     data=[0, -data.shape[0]],
                     mask=[True, False],
                 )
@@ -419,7 +436,7 @@ def convert_to_r_object(  # noqa: C901, PLR0912, PLR0915
             raise NotImplementedError(msg)
 
         attributes = build_r_list({
-            "names": np.asarray(names),
+            "names": create_unicode_array(names),
             "row.names": row_names,
             "class": "data.frame",
             },

From 6eeb992ac82e1e4d749833a0c344768dc10ce570 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Thu, 5 Sep 2024 18:17:24 +0300
Subject: [PATCH 004/100] Add more pd.Series types

---
 rdata/conversion/to_r.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/rdata/conversion/to_r.py b/rdata/conversion/to_r.py
index 08aef77..72e4180 100644
--- a/rdata/conversion/to_r.py
+++ b/rdata/conversion/to_r.py
@@ -398,7 +398,10 @@ def convert_to_r_object(  # noqa: C901, PLR0912, PLR0915
         array = data.array
         if isinstance(array, pd.Categorical):
             return convert_to_r_object(array, encoding=encoding)
-        elif isinstance(array, pd.arrays.IntegerArray):
+        elif isinstance(array, pd.arrays.StringArray):
+            return convert_to_r_object(create_unicode_array(array), encoding=encoding)
+        elif (isinstance(array, pd.arrays.IntegerArray)
+              or isinstance(array, pd.arrays.NumpyExtensionArray)):
             return convert_to_r_object(data.to_numpy(), encoding=encoding)
         else:
             msg = f"pd.Series {type(array)} not implemented"

From ffddf74edf088b87b4a3ae0fc5139a58d65958fb Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Thu, 5 Sep 2024 18:18:01 +0300
Subject: [PATCH 005/100] Fix the order of symbol references

---
 rdata/conversion/to_r.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rdata/conversion/to_r.py b/rdata/conversion/to_r.py
index 72e4180..dd17b3a 100644
--- a/rdata/conversion/to_r.py
+++ b/rdata/conversion/to_r.py
@@ -161,8 +161,8 @@ def build_r_list(
     if isinstance(data, dict):
         data = data.copy()
         key = next(iter(data))
-        value1 = convert_value(data.pop(key), encoding=encoding)
         tag = build_r_sym(key, encoding=encoding)
+        value1 = convert_value(data.pop(key), encoding=encoding)
     elif isinstance(data, list):
         value1 = convert_value(data[0], encoding=encoding)
         data = data[1:]

From eb82ff657dc6f6ea41b2bbc7051f105776878468 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Tue, 10 Sep 2024 09:10:28 +0300
Subject: [PATCH 006/100] Add a converter class for Python-to-R conversion

---
 rdata/_write.py              |  12 +-
 rdata/conversion/__init__.py |   3 +-
 rdata/conversion/to_r.py     | 529 +++++++++++++++++------------------
 rdata/tests/test_write.py    |  32 ++-
 4 files changed, 282 insertions(+), 294 deletions(-)

diff --git a/rdata/_write.py b/rdata/_write.py
index 39a8255..0630d1e 100644
--- a/rdata/_write.py
+++ b/rdata/_write.py
@@ -3,7 +3,7 @@
 
 from typing import TYPE_CHECKING
 
-from .conversion import build_r_data, convert_to_r_object, convert_to_r_object_for_rda
+from .conversion import build_r_data, ConverterFromPythonToR
 from .conversion.to_r import DEFAULT_FORMAT_VERSION
 from .unparser import unparse_file
 
@@ -52,10 +52,7 @@ def write_rds(
         >>> data = ["hello", 1, 2.2, 3.3+4.4j]
         >>> rdata.write_rds("test.rds", data)
     """
-    r_object = convert_to_r_object(
-        data,
-        encoding=encoding,
-    )
+    r_object = ConverterFromPythonToR(encoding=encoding).convert_to_r_object(data)
     r_data = build_r_data(
         r_object,
         encoding=encoding,
@@ -107,10 +104,7 @@ def write_rda(
         >>> data = {"name": "hello", "values": [1, 2.2, 3.3+4.4j]}
         >>> rdata.write_rda("test.rda", data)
     """
-    r_object = convert_to_r_object_for_rda(
-        data,
-        encoding=encoding,
-    )
+    r_object = ConverterFromPythonToR(encoding=encoding).convert_to_r_object_for_rda(data)
     r_data = build_r_data(
         r_object,
         encoding=encoding,
diff --git a/rdata/conversion/__init__.py b/rdata/conversion/__init__.py
index 44f6ad7..2ec4f44 100644
--- a/rdata/conversion/__init__.py
+++ b/rdata/conversion/__init__.py
@@ -26,6 +26,5 @@
 )
 from .to_r import (
     build_r_data as build_r_data,
-    convert_to_r_object as convert_to_r_object,
-    convert_to_r_object_for_rda as convert_to_r_object_for_rda,
+    ConverterFromPythonToR as ConverterFromPythonToR,
 )
diff --git a/rdata/conversion/to_r.py b/rdata/conversion/to_r.py
index dd17b3a..918b89b 100644
--- a/rdata/conversion/to_r.py
+++ b/rdata/conversion/to_r.py
@@ -91,7 +91,7 @@ def build_r_object(
         attributes: RObject | None = None,
         tag: RObject | None = None,
         gp: int = 0,
-        reference: int = 0,
+        reference: tuple(int, RObject | None) = (0, None),
 ) -> RObject:
     """
     Build R object.
@@ -102,7 +102,7 @@ def build_r_object(
         attributes: Same as in RObject.
         tag: Same as in RObject.
         gp: Same as in RObjectInfo.
-        reference: Same as in RObjectInfo.
+        reference: Tuple of integer and object.
 
     Returns:
         R object.
@@ -112,10 +112,8 @@ def build_r_object(
         RObjectInfo
     """
     assert r_type is not None
-    if reference == 0:
-        assert reference_name_list[reference] is None
-    else:
-        assert r_type == RObjectType.REF
+    reference_id, referenced_object = reference
+    assert (reference_id == 0) == (referenced_object == None) == (r_type != RObjectType.REF)
     is_object = find_is_object(attributes)
     return RObject(
         RObjectInfo(
@@ -124,98 +122,15 @@ def build_r_object(
             attributes=attributes is not None,
             tag=tag is not None,
             gp=gp,
-            reference=reference,
+            reference=reference_id,
          ),
          value,
          attributes,
          tag,
-         reference_obj_list[reference],
+         referenced_object,
      )
 
 
-def build_r_list(
-        data: Mapping[str, Any] | list[Any],
-        *,
-        encoding: Encoding,
-        convert_value: Converter | None = None,
-) -> RObject:
-    """
-    Build R object representing named linked list.
-
-    Args:
-        data: Non-empty dictionary or list.
-        encoding: Encoding to be used for strings within data.
-        convert_value: Function used for converting value to R object
-            (for example, convert_to_r_object).
-
-    Returns:
-        R object.
-    """
-    if convert_value is None:
-        convert_value = convert_to_r_object
-
-    if len(data) == 0:
-        msg = "data must not be empty"
-        raise ValueError(msg)
-
-    if isinstance(data, dict):
-        data = data.copy()
-        key = next(iter(data))
-        tag = build_r_sym(key, encoding=encoding)
-        value1 = convert_value(data.pop(key), encoding=encoding)
-    elif isinstance(data, list):
-        value1 = convert_value(data[0], encoding=encoding)
-        data = data[1:]
-        tag = None
-
-    if len(data) == 0:
-        value2 = build_r_object(RObjectType.NILVALUE)
-    else:
-        value2 = build_r_list(data, encoding=encoding, convert_value=convert_value)
-
-    return build_r_object(
-        RObjectType.LIST,
-        value=(value1, value2),
-        tag=tag,
-        )
-
-
-# XXX global lists
-reference_name_list = [None]
-reference_obj_list = [None]
-
-
-def build_r_sym(
-        name: str,
-        *,
-        encoding: Encoding,
-) -> RObject:
-    """
-    Build R object representing symbol.
-
-    Args:
-        name: String.
-        encoding: Encoding to be used for the name.
-
-    Returns:
-        R object.
-    """
-    # Reference to existing symbol if exists
-    if name in reference_name_list:
-        # XXX can any symbol be referenced???
-        reference = reference_name_list.index(name)
-        return build_r_object(RObjectType.REF, reference=reference)
-
-    # Create a new symbol
-    r_value = convert_to_r_object(name.encode(encoding), encoding=encoding)
-    r_object = build_r_object(RObjectType.SYM, value=r_value)
-
-    # Add to reference list
-    reference_name_list.append(name)
-    reference_obj_list.append(r_object)
-    return r_object
-
-
 def build_r_data(
         r_object: RObject,
         *,
@@ -228,7 +143,7 @@ def build_r_data(
 
     Args:
         r_object: R object.
-        encoding: Encoding to be used for strings within data.
+        encoding: Encoding saved in the metadata.
         format_version: File format version.
         r_version_serialized: R version written as the creator of the object.
 
@@ -252,201 +167,275 @@ def build_r_data(
     return RData(versions, extra, r_object)
 
 
-def convert_to_r_object_for_rda(
-        data: Mapping[str, Any],
-        *,
-        encoding: Encoding = "utf-8",
-) -> RObject:
+class ConverterFromPythonToR:
     """
-    Convert Python dictionary to R object for RDA file.
+    Class converting Python objects to R objects.
 
     Args:
-        data: Python dictionary with data and variable names.
         encoding: Encoding to be used for strings within data.
-
-    Returns:
-        Corresponding R object.
-
-    See Also:
-        convert_to_r_object
     """
-    if not isinstance(data, dict):
-        msg = f"for RDA file, data must be a dictionary, not type {type(data)}"
-        raise TypeError(msg)
-    return build_r_list(data, encoding=encoding)
 
+    def __init__(self, *, encoding: Encoding = "utf-8"):
+        self.encoding = encoding
+        self.reference_name_list = [None]
+        self.reference_obj_list = [None]
+
+
+    def build_r_list(self,
+            data: Mapping[str, Any] | list[Any],
+            *,
+            convert_value: Converter | None = None,
+    ) -> RObject:
+        """
+        Build R object representing named linked list.
+
+        Args:
+            data: Non-empty dictionary or list.
+            convert_value: Function used for converting value to R object
+                (for example, convert_to_r_object).
+
+        Returns:
+            R object.
+        """
+        if convert_value is None:
+            convert_value = self.convert_to_r_object
+
+        if len(data) == 0:
+            msg = "data must not be empty"
+            raise ValueError(msg)
 
-def convert_to_r_object(  # noqa: C901, PLR0912, PLR0915
-        data: Any,  # noqa: ANN401
-        *,
-        encoding: Encoding = "utf-8",
-) -> RObject:
-    """
-    Convert Python data to R object.
-
-    Args:
-        data: Any Python object.
-        encoding: Encoding to be used for strings within data.
+        if isinstance(data, dict):
+            data = data.copy()
+            key = next(iter(data))
+            tag = self.build_r_sym(key)
+            value1 = convert_value(data.pop(key))
+        elif isinstance(data, list):
+            value1 = convert_value(data[0])
+            data = data[1:]
+            tag = None
+
+        if len(data) == 0:
+            value2 = build_r_object(RObjectType.NILVALUE)
+        else:
+            value2 = self.build_r_list(data, convert_value=convert_value)
+
+        return build_r_object(
+            RObjectType.LIST,
+            value=(value1, value2),
+            tag=tag,
+            )
+
+
+    def build_r_sym(self,
+            name: str,
+    ) -> RObject:
+        """
+        Build R object representing symbol.
+
+        Args:
+            name: String.
+
+        Returns:
+            R object.
+        """
+        # Reference to existing symbol if exists
+        if name in self.reference_name_list:
+            idx = self.reference_name_list.index(name)
+            obj = self.reference_obj_list[idx]
+            return build_r_object(RObjectType.REF, reference=(idx, obj))
+
+        # Create a new symbol
+        r_value = self.convert_to_r_object(name.encode(self.encoding))
+        r_object = build_r_object(RObjectType.SYM, value=r_value)
+
+        # Add to reference list
+        self.reference_name_list.append(name)
+        self.reference_obj_list.append(r_object)
+        return r_object
+
+
+    def convert_to_r_object_for_rda(self,
+            data: Mapping[str, Any],
+    ) -> RObject:
+        """
+        Convert Python dictionary to R object for RDA file.
+
+        Args:
+            data: Python dictionary with data and variable names.
+
+        Returns:
+            Corresponding R object.
+
+        See Also:
+            convert_to_r_object
+        """
+        if not isinstance(data, dict):
+            msg = f"for RDA file, data must be a dictionary, not type {type(data)}"
+            raise TypeError(msg)
+        return self.build_r_list(data)
+
+
+    def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
+            data: Any,  # noqa: ANN401
+    ) -> RObject:
+        """
+        Convert Python data to R object.
+
+        Args:
+            data: Any Python object.
+
+        Returns:
+            Corresponding R object.
+        """
+        # Default args for most types (None/False/0)
+        r_type = None
+        values: list[Any] | tuple[Any, ...]
+        r_value: Any = None
+        gp = 0
+        attributes = None
+        tag = None
 
-    Returns:
-        Corresponding R object.
+        if data is None:
+            r_type = RObjectType.NILVALUE
 
-    See Also:
-        convert_to_r_data
-    """
-    # Default args for most types (None/False/0)
-    r_type = None
-    values: list[Any] | tuple[Any, ...]
-    r_value: Any = None
-    gp = 0
-    attributes = None
-    tag = None
-
-    if data is None:
-        r_type = RObjectType.NILVALUE
-
-    elif isinstance(data, RExpression):
-        r_type = RObjectType.EXPR
-        r_value = [convert_to_r_object(el, encoding=encoding) for el in data.elements]
-
-    elif isinstance(data, RLanguage):
-        r_type = RObjectType.LANG
-        values = data.elements
-        r_value = (build_r_sym(str(values[0]), encoding=encoding),
-                   build_r_list(values[1:], encoding=encoding,
-                                convert_value=build_r_sym))
-
-        if len(data.attributes) > 0:
-            # The following might work here (untested)
-            # attributes = build_r_list(data.attributes, encoding=encoding)  # noqa: ERA001,E501
-            msg = f"type {r_type} with attributes not implemented"
-            raise NotImplementedError(msg)
+        elif isinstance(data, RExpression):
+            r_type = RObjectType.EXPR
+            r_value = [self.convert_to_r_object(el) for el in data.elements]
 
-    elif isinstance(data, (list, tuple, dict)):
-        r_type = RObjectType.VEC
-        values = list(data.values()) if isinstance(data, dict) else data
-        r_value = [convert_to_r_object(el, encoding=encoding) for el in values]
+        elif isinstance(data, RLanguage):
+            r_type = RObjectType.LANG
+            values = data.elements
+            r_value = (self.build_r_sym(str(values[0])),
+                       self.build_r_list(values[1:],
+                                         convert_value=self.build_r_sym))
 
-        if isinstance(data, dict):
-            names = create_unicode_array(data.keys())
-            attributes = build_r_list({"names": names},
-                                      encoding=encoding)
-
-    elif isinstance(data, np.ndarray):
-        if data.dtype.kind in ["O"]:
-            # This is a special case handling only np.array([None])
-            if data.size != 1 or data[0] is not None:
-                msg = "general object array not implemented"
+            if len(data.attributes) > 0:
+                # The following might work here (untested)
+                # attributes = build_r_list(data.attributes)  # noqa: ERA001,E501
+                msg = f"type {r_type} with attributes not implemented"
                 raise NotImplementedError(msg)
-            r_type = RObjectType.STR
-            r_value = [build_r_object(RObjectType.CHAR)]
 
-        elif data.dtype.kind in ["S"]:
-            assert data.ndim == 1
-            r_type = RObjectType.STR
-            r_value = [convert_to_r_object(el, encoding=encoding) for el in data]
+        elif isinstance(data, (list, tuple, dict)):
+            r_type = RObjectType.VEC
+            values = list(data.values()) if isinstance(data, dict) else data
+            r_value = [self.convert_to_r_object(el) for el in values]
+
+            if isinstance(data, dict):
+                names = create_unicode_array(data.keys())
+                attributes = self.build_r_list({"names": names})
+
+        elif isinstance(data, np.ndarray):
+            if data.dtype.kind in ["O"]:
+                # This is a special case handling only np.array([None])
+                if data.size != 1 or data[0] is not None:
+                    msg = "general object array not implemented"
+                    raise NotImplementedError(msg)
+                r_type = RObjectType.STR
+                r_value = [build_r_object(RObjectType.CHAR)]
+
+            elif data.dtype.kind in ["S"]:
+                assert data.ndim == 1
+                r_type = RObjectType.STR
+                r_value = [self.convert_to_r_object(el) for el in data]
+
+            elif data.dtype.kind in ["U"]:
+                assert data.ndim == 1
+                data = np.array([s.encode(self.encoding) for s in data], dtype=np.dtype("S"))
+                return self.convert_to_r_object(data)
 
-        elif data.dtype.kind in ["U"]:
-            assert data.ndim == 1
-            data = np.array([s.encode(encoding) for s in data], dtype=np.dtype("S"))
-            return convert_to_r_object(data, encoding=encoding)
+            else:
+                r_type = {
+                    "b": RObjectType.LGL,
+                    "i": RObjectType.INT,
+                    "f": RObjectType.REAL,
+                    "c": RObjectType.CPLX,
+                }[data.dtype.kind]
+
+                if data.ndim == 0:
+                    r_value = data[np.newaxis]
+                elif data.ndim == 1:
+                    r_value = data
+                else:
+                    # R uses column-major order like Fortran
+                    r_value = np.ravel(data, order="F")
+                    attributes = self.build_r_list({"dim": np.array(data.shape)})
+
+        elif isinstance(data, (bool, int, float, complex)):
+            return self.convert_to_r_object(np.array(data))
+
+        elif isinstance(data, str):
+            r_type = RObjectType.STR
+            r_value = [self.convert_to_r_object(data.encode(self.encoding))]
+
+        elif isinstance(data, bytes):
+            r_type = RObjectType.CHAR
+            if all(chr(byte) in string.printable for byte in data):
+                gp = CharFlags.ASCII
+            elif self.encoding == "utf-8":
+                gp = CharFlags.UTF8
+            elif self.encoding == "cp1252":
+                # Note!
+                # CP1252 and Latin1 are not the same.
+                # Does CharFlags.LATIN1 mean actually CP1252
+                # as R on Windows mentions CP1252 as encoding?
+                # Or does CP1252 change to e.g. CP1250 depending on localization?
+                gp = CharFlags.LATIN1
+            else:
+                msg = f"unsupported encoding: {self.encoding}"
+                raise ValueError(msg)
+            r_value = data
+
+        elif isinstance(data, pd.Series):
+            array = data.array
+            if isinstance(array, pd.Categorical):
+                return self.convert_to_r_object(array)
+            elif isinstance(array, pd.arrays.StringArray):
+                return self.convert_to_r_object(create_unicode_array(array))
+            elif (isinstance(array, pd.arrays.IntegerArray)
+                  or isinstance(array, pd.arrays.NumpyExtensionArray)):
+                return self.convert_to_r_object(data.to_numpy())
+            else:
+                msg = f"pd.Series {type(array)} not implemented"
+                raise NotImplementedError(msg)
 
-        else:
-            r_type = {
-                "b": RObjectType.LGL,
-                "i": RObjectType.INT,
-                "f": RObjectType.REAL,
-                "c": RObjectType.CPLX,
-            }[data.dtype.kind]
-
-            if data.ndim == 0:
-                r_value = data[np.newaxis]
-            elif data.ndim == 1:
-                r_value = data
+        elif isinstance(data, pd.Categorical):
+            r_type = RObjectType.INT
+            r_value = data.codes + 1
+            attributes = self.build_r_list({
+                "levels": create_unicode_array(data.categories),
+                "class": "factor",
+                })
+
+        elif isinstance(data, pd.DataFrame):
+            r_type = RObjectType.VEC
+            names = []
+            r_value = []
+            for column, series in data.items():
+                names.append(column)
+                r_value.append(self.convert_to_r_object(series))
+
+            index = data.index
+            if (isinstance(index, pd.RangeIndex)
+                and index.start == 1
+                and index.stop == data.shape[0] + 1
+                and index.step == 1
+                ):
+                row_names = np.ma.array(  # type: ignore [no-untyped-call]
+                        data=[0, -data.shape[0]],
+                        mask=[True, False],
+                    )
             else:
-                # R uses column-major order like Fortran
-                r_value = np.ravel(data, order="F")
-                attributes = build_r_list({"dim": np.array(data.shape)},
-                                          encoding=encoding)
-
-    elif isinstance(data, (bool, int, float, complex)):
-        return convert_to_r_object(np.array(data), encoding=encoding)
-
-    elif isinstance(data, str):
-        r_type = RObjectType.STR
-        r_value = [convert_to_r_object(data.encode(encoding), encoding=encoding)]
-
-    elif isinstance(data, bytes):
-        r_type = RObjectType.CHAR
-        if all(chr(byte) in string.printable for byte in data):
-            gp = CharFlags.ASCII
-        elif encoding == "utf-8":
-            gp = CharFlags.UTF8
-        elif encoding == "cp1252":
-            # Note!
-            # CP1252 and Latin1 are not the same.
-            # Does CharFlags.LATIN1 mean actually CP1252
-            # as R on Windows mentions CP1252 as encoding?
-            # Or does CP1252 change to e.g. CP1250 depending on localization?
-            gp = CharFlags.LATIN1
-        else:
-            msg = f"unsupported encoding: {encoding}"
-            raise ValueError(msg)
-        r_value = data
-
-    elif isinstance(data, pd.Series):
-        array = data.array
-        if isinstance(array, pd.Categorical):
-            return convert_to_r_object(array, encoding=encoding)
-        elif isinstance(array, pd.arrays.StringArray):
-            return convert_to_r_object(create_unicode_array(array), encoding=encoding)
-        elif (isinstance(array, pd.arrays.IntegerArray)
-              or isinstance(array, pd.arrays.NumpyExtensionArray)):
-            return convert_to_r_object(data.to_numpy(), encoding=encoding)
-        else:
-            msg = f"pd.Series {type(array)} not implemented"
-            raise NotImplementedError(msg)
+                msg = f"pd.DataFrame index {type(index)} not implemented"
+                raise NotImplementedError(msg)
+
+            attributes = self.build_r_list({
+                "names": create_unicode_array(names),
+                "row.names": row_names,
+                "class": "data.frame",
+                })
 
-    elif isinstance(data, pd.Categorical):
-        r_type = RObjectType.INT
-        r_value = data.codes + 1
-        attributes = build_r_list({
-            "levels": create_unicode_array(data.categories),
-            "class": "factor",
-            },
-            encoding=encoding)
-
-    elif isinstance(data, pd.DataFrame):
-        r_type = RObjectType.VEC
-        names = []
-        r_value = []
-        for column, series in data.items():
-            names.append(column)
-            r_value.append(convert_to_r_object(series, encoding=encoding))
-
-        index = data.index
-        if (isinstance(index, pd.RangeIndex)
-            and index.start == 1
-            and index.stop == data.shape[0] + 1
-            and index.step == 1
-            ):
-            row_names = np.ma.array(  # type: ignore [no-untyped-call]
-                    data=[0, -data.shape[0]],
-                    mask=[True, False],
-                )
         else:
-            msg = f"pd.DataFrame index {type(index)} not implemented"
+            msg = f"type {type(data)} not implemented"
             raise NotImplementedError(msg)
 
-        attributes = build_r_list({
-            "names": create_unicode_array(names),
-            "row.names": row_names,
-            "class": "data.frame",
-            },
-            encoding=encoding)
-
-    else:
-        msg = f"type {type(data)} not implemented"
-        raise NotImplementedError(msg)
-
-    return build_r_object(r_type, value=r_value, attributes=attributes, tag=tag, gp=gp)
+        return build_r_object(r_type, value=r_value, attributes=attributes, tag=tag, gp=gp)
diff --git a/rdata/tests/test_write.py b/rdata/tests/test_write.py
index 3413c6d..57c358a 100644
--- a/rdata/tests/test_write.py
+++ b/rdata/tests/test_write.py
@@ -10,6 +10,7 @@
 import pytest
 
 import rdata
+from rdata.conversion import ConverterFromPythonToR, build_r_data
 from rdata.unparser import unparse_data
 
 if TYPE_CHECKING:
@@ -127,13 +128,12 @@ def test_convert_to_r(fname: str) -> None:
             encoding = encoding.lower()  # type: ignore [assignment]
 
         try:
+            converter = ConverterFromPythonToR(encoding=encoding)
             if file_type == "rds":
-                r_obj = rdata.conversion.convert_to_r_object(
-                    py_data, encoding=encoding)
+                r_obj = converter.convert_to_r_object(py_data)
             else:
-                r_obj = rdata.conversion.convert_to_r_object_for_rda(
-                    py_data, encoding=encoding)
-            new_r_data = rdata.conversion.build_r_data(
+                r_obj = converter.convert_to_r_object_for_rda(py_data)
+            new_r_data = build_r_data(
                 r_obj,
                 encoding=encoding,
                 format_version=r_data.versions.format,
@@ -150,21 +150,24 @@ def test_convert_to_r_bad_rda() -> None:
     """Test checking that data for RDA has variable names."""
     py_data = "hello"
     with pytest.raises(TypeError, match="(?i)data must be a dictionary"):
-        rdata.conversion.convert_to_r_object_for_rda(py_data)  # type: ignore [arg-type]
+        converter = ConverterFromPythonToR()
+        converter.convert_to_r_object_for_rda(py_data)  # type: ignore [arg-type]
 
 
 def test_convert_to_r_empty_rda() -> None:
     """Test checking that data for RDA has variable names."""
     py_data: dict[str, Any] = {}
     with pytest.raises(ValueError, match="(?i)data must not be empty"):
-        rdata.conversion.convert_to_r_object_for_rda(py_data)
+        converter = ConverterFromPythonToR()
+        converter.convert_to_r_object_for_rda(py_data)
 
 
 def test_unparse_bad_rda() -> None:
     """Test checking that data for RDA has variable names."""
     py_data = "hello"
-    r_obj = rdata.conversion.convert_to_r_object(py_data)
-    r_data = rdata.conversion.build_r_data(r_obj)
+    converter = ConverterFromPythonToR()
+    r_obj = converter.convert_to_r_object(py_data)
+    r_data = build_r_data(r_obj)
     with pytest.raises(ValueError, match="(?i)must be dictionary-like"):
         unparse_data(r_data, file_type="rda")
 
@@ -172,20 +175,23 @@ def test_unparse_bad_rda() -> None:
 def test_convert_to_r_bad_encoding() -> None:
     """Test checking encoding."""
     with pytest.raises(LookupError, match="(?i)unknown encoding"):
-        rdata.conversion.convert_to_r_object("ä", encoding="non-existent")  # type: ignore [arg-type]
+        converter = ConverterFromPythonToR(encoding="non-existent")
+        converter.convert_to_r_object("ä")  # type: ignore [arg-type]
 
 
 def test_convert_to_r_unsupported_encoding() -> None:
     """Test checking encoding."""
     with pytest.raises(ValueError, match="(?i)unsupported encoding"):
-        rdata.conversion.convert_to_r_object("ä", encoding="cp1250")  # type: ignore [arg-type]
+        converter = ConverterFromPythonToR(encoding="cp1250")
+        converter.convert_to_r_object("ä")  # type: ignore [arg-type]
 
 
 def test_unparse_big_int() -> None:
     """Test checking too large integers."""
     big_int = 2**32
-    r_obj = rdata.conversion.convert_to_r_object(big_int)
-    r_data = rdata.conversion.build_r_data(r_obj)
+    converter = ConverterFromPythonToR()
+    r_obj = converter.convert_to_r_object(big_int)
+    r_data = build_r_data(r_obj)
     with pytest.raises(ValueError, match="(?i)not castable"):
         unparse_data(r_data, file_format="xdr")
 

From 1868d8acffb2dab8a7cbe1d0efed4baecefef4a6 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Tue, 10 Sep 2024 09:12:01 +0300
Subject: [PATCH 007/100] Fix masked values in masked array

---
 rdata/conversion/to_r.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/rdata/conversion/to_r.py b/rdata/conversion/to_r.py
index 918b89b..3f68fac 100644
--- a/rdata/conversion/to_r.py
+++ b/rdata/conversion/to_r.py
@@ -10,6 +10,7 @@
 import pandas as pd
 
 from rdata.parser import (
+    R_INT_NA,
     CharFlags,
     RData,
     RExtraInfo,
@@ -421,8 +422,9 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
                 and index.step == 1
                 ):
                 row_names = np.ma.array(  # type: ignore [no-untyped-call]
-                        data=[0, -data.shape[0]],
+                        data=[R_INT_NA, -data.shape[0]],
                         mask=[True, False],
+                        fill_value=R_INT_NA,
                     )
             else:
                 msg = f"pd.DataFrame index {type(index)} not implemented"

From 8d9cb55b96d9e60b2bab100741c24c15f8357405 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Tue, 10 Sep 2024 09:12:40 +0300
Subject: [PATCH 008/100] Compare first string representations

---
 rdata/tests/test_write.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rdata/tests/test_write.py b/rdata/tests/test_write.py
index 57c358a..a46d8db 100644
--- a/rdata/tests/test_write.py
+++ b/rdata/tests/test_write.py
@@ -142,8 +142,8 @@ def test_convert_to_r(fname: str) -> None:
         except NotImplementedError as e:
             pytest.xfail(str(e))
 
-        assert r_data == new_r_data
         assert str(r_data) == str(new_r_data)
+        assert r_data == new_r_data
 
 
 def test_convert_to_r_bad_rda() -> None:

From 398d1e9baf6dfabc2117b0068f7c71d89c747950 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Tue, 10 Sep 2024 09:39:32 +0300
Subject: [PATCH 009/100] Fix conversion of dataframe columns

---
 rdata/conversion/to_r.py | 28 ++++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/rdata/conversion/to_r.py b/rdata/conversion/to_r.py
index 3f68fac..6225d1b 100644
--- a/rdata/conversion/to_r.py
+++ b/rdata/conversion/to_r.py
@@ -387,17 +387,8 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
             r_value = data
 
         elif isinstance(data, pd.Series):
-            array = data.array
-            if isinstance(array, pd.Categorical):
-                return self.convert_to_r_object(array)
-            elif isinstance(array, pd.arrays.StringArray):
-                return self.convert_to_r_object(create_unicode_array(array))
-            elif (isinstance(array, pd.arrays.IntegerArray)
-                  or isinstance(array, pd.arrays.NumpyExtensionArray)):
-                return self.convert_to_r_object(data.to_numpy())
-            else:
-                msg = f"pd.Series {type(array)} not implemented"
-                raise NotImplementedError(msg)
+            msg = f"pd.Series not implemented"
+            raise NotImplementedError(msg)
 
         elif isinstance(data, pd.Categorical):
             r_type = RObjectType.INT
@@ -413,7 +404,20 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
             r_value = []
             for column, series in data.items():
                 names.append(column)
-                r_value.append(self.convert_to_r_object(series))
+
+                array = series.array
+                if isinstance(array, pd.Categorical):
+                    r_series = self.convert_to_r_object(array)
+                elif isinstance(array, pd.arrays.StringArray):
+                    r_series = self.convert_to_r_object(create_unicode_array(array))
+                elif (isinstance(array, pd.arrays.IntegerArray)
+                      or isinstance(array, pd.arrays.NumpyExtensionArray)):
+                    r_series = self.convert_to_r_object(array.to_numpy())
+                else:
+                    msg = f"pd.DataFrame with pd.Series {type(array)} not implemented"
+                    raise NotImplementedError(msg)
+
+                r_value.append(r_series)
 
             index = data.index
             if (isinstance(index, pd.RangeIndex)

From 9cdd37c27d6d4c93e5db30d5b24cef194586192d Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Tue, 10 Sep 2024 14:03:42 +0300
Subject: [PATCH 010/100] Add support for dataframe with string index

---
 rdata/conversion/to_r.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/rdata/conversion/to_r.py b/rdata/conversion/to_r.py
index 6225d1b..5a959f0 100644
--- a/rdata/conversion/to_r.py
+++ b/rdata/conversion/to_r.py
@@ -420,6 +420,7 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
                 r_value.append(r_series)
 
             index = data.index
+            attr_order = ["names", "row.names", "class"]
             if (isinstance(index, pd.RangeIndex)
                 and index.start == 1
                 and index.stop == data.shape[0] + 1
@@ -430,15 +431,24 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
                         mask=[True, False],
                         fill_value=R_INT_NA,
                     )
+            elif isinstance(index, pd.Index):
+                attr_order = ["names", "class", "row.names"]
+                if index.dtype == 'object':
+                    row_names = create_unicode_array(index)
+                else:
+                    msg = f"pd.DataFrame pd.Index {index.dtype} not implemented"
+                    raise NotImplementedError(msg)
             else:
                 msg = f"pd.DataFrame index {type(index)} not implemented"
                 raise NotImplementedError(msg)
 
-            attributes = self.build_r_list({
+            attr_dict = {
                 "names": create_unicode_array(names),
                 "row.names": row_names,
                 "class": "data.frame",
-                })
+            }
+
+            attributes = self.build_r_list({k: attr_dict[k] for k in attr_order})
 
         else:
             msg = f"type {type(data)} not implemented"

From 5084d2d2bdbaef131f8abbef997f4579e436c8ee Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Thu, 12 Sep 2024 07:44:32 +0300
Subject: [PATCH 011/100] Add assertions for strings

---
 rdata/conversion/to_r.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/rdata/conversion/to_r.py b/rdata/conversion/to_r.py
index 5a959f0..6e12f6d 100644
--- a/rdata/conversion/to_r.py
+++ b/rdata/conversion/to_r.py
@@ -65,7 +65,11 @@ def create_unicode_array(
     Returns:
         Array.
     """
-    return np.array(list(names), dtype=np.dtype("U"))
+    name_list = []
+    for name in names:
+        assert isinstance(name, str)
+        name_list.append(name)
+    return np.array(name_list, dtype=np.dtype("U"))
 
 
 def find_is_object(attributes: RObject | None):
@@ -403,6 +407,7 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
             names = []
             r_value = []
             for column, series in data.items():
+                assert isinstance(column, str)
                 names.append(column)
 
                 array = series.array

From af0f6fe31837d58d12be407e4fb7316a19a5bbab Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Thu, 12 Sep 2024 07:46:28 +0300
Subject: [PATCH 012/100] Add conversion for rangeindex and range

---
 rdata/conversion/to_r.py | 54 +++++++++++++++++++++++++++++-----------
 1 file changed, 40 insertions(+), 14 deletions(-)

diff --git a/rdata/conversion/to_r.py b/rdata/conversion/to_r.py
index 6e12f6d..fc40503 100644
--- a/rdata/conversion/to_r.py
+++ b/rdata/conversion/to_r.py
@@ -213,20 +213,23 @@ def build_r_list(self,
             data = data.copy()
             key = next(iter(data))
             tag = self.build_r_sym(key)
-            value1 = convert_value(data.pop(key))
+            car = data.pop(key)
         elif isinstance(data, list):
-            value1 = convert_value(data[0])
+            car = data[0]
             data = data[1:]
             tag = None
 
+        if not isinstance(car, RObject):
+            car = convert_value(car)
+
         if len(data) == 0:
-            value2 = build_r_object(RObjectType.NILVALUE)
+            cdr = build_r_object(RObjectType.NILVALUE)
         else:
-            value2 = self.build_r_list(data, convert_value=convert_value)
+            cdr = self.build_r_list(data, convert_value=convert_value)
 
         return build_r_object(
             RObjectType.LIST,
-            value=(value1, value2),
+            value=(car, cdr),
             tag=tag,
             )
 
@@ -390,6 +393,27 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
                 raise ValueError(msg)
             r_value = data
 
+        elif isinstance(data, range):
+            if data.step != 1:
+                # R supports compact sequences only with step 1;
+                # convert the range to an array of values
+                return self.convert_to_r_object(np.array(data))
+
+            r_type = RObjectType.ALTREP
+            r_value = (
+                self.build_r_list([
+                    self.build_r_sym("compact_intseq"),
+                    self.build_r_sym("base"),
+                    RObjectType.INT.value,
+                ]),
+                self.convert_to_r_object(np.array([
+                    len(data),
+                    data.start,
+                    data.step,
+                ], dtype=float)),
+                self.convert_to_r_object(None),
+            )
+
         elif isinstance(data, pd.Series):
             msg = f"pd.Series not implemented"
             raise NotImplementedError(msg)
@@ -426,16 +450,18 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
 
             index = data.index
             attr_order = ["names", "row.names", "class"]
-            if (isinstance(index, pd.RangeIndex)
-                and index.start == 1
-                and index.stop == data.shape[0] + 1
-                and index.step == 1
+            if isinstance(index, pd.RangeIndex):
+                if (index.start == 1
+                    and index.stop == data.shape[0] + 1
+                    and index.step == 1
                 ):
-                row_names = np.ma.array(  # type: ignore [no-untyped-call]
-                        data=[R_INT_NA, -data.shape[0]],
-                        mask=[True, False],
-                        fill_value=R_INT_NA,
-                    )
+                    row_names = np.ma.array(  # type: ignore [no-untyped-call]
+                            data=[R_INT_NA, -data.shape[0]],
+                            mask=[True, False],
+                            fill_value=R_INT_NA,
+                        )
+                else:
+                    row_names = range(index.start, index.stop, index.step)
             elif isinstance(index, pd.Index):
                 attr_order = ["names", "class", "row.names"]
                 if index.dtype == 'object':

From 1c71a866a73cafe3c7ee57416245eeb2adf4bd13 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Thu, 12 Sep 2024 07:47:00 +0300
Subject: [PATCH 013/100] Add conversion of integer index

---
 rdata/conversion/to_r.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/rdata/conversion/to_r.py b/rdata/conversion/to_r.py
index fc40503..4743e9f 100644
--- a/rdata/conversion/to_r.py
+++ b/rdata/conversion/to_r.py
@@ -466,6 +466,8 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
                 attr_order = ["names", "class", "row.names"]
                 if index.dtype == 'object':
                     row_names = create_unicode_array(index)
+                elif np.issubdtype(index.dtype, np.integer):
+                    row_names = index.to_numpy()
                 else:
                     msg = f"pd.DataFrame pd.Index {index.dtype} not implemented"
                     raise NotImplementedError(msg)

From 8fa951e71db2ddb8ddd740a1d8b5b7551c30cf6f Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Thu, 12 Sep 2024 07:47:24 +0300
Subject: [PATCH 014/100] Add unparsing altreps

---
 rdata/unparser/_unparser.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/rdata/unparser/_unparser.py b/rdata/unparser/_unparser.py
index 1524317..7dd6243 100644
--- a/rdata/unparser/_unparser.py
+++ b/rdata/unparser/_unparser.py
@@ -111,6 +111,7 @@ def unparse_r_object(self, obj: RObject) -> None:  # noqa: C901, PLR0912
         elif info.type in {
             RObjectType.LIST,
             RObjectType.LANG,
+            RObjectType.ALTREP,
             # Parser treats the following equal to LIST.
             # Not tested if they work
             # RObjectType.CLO,

From b205d8d7da730a876bfb0a30f90d76a866b69189 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Thu, 12 Sep 2024 08:08:09 +0300
Subject: [PATCH 015/100] Move build_r_data function under converter class

---
 rdata/_write.py              | 26 ++++++-------
 rdata/conversion/__init__.py |  1 -
 rdata/conversion/to_r.py     | 72 ++++++++++++++++++------------------
 rdata/tests/test_write.py    | 19 +++++-----
 4 files changed, 57 insertions(+), 61 deletions(-)

diff --git a/rdata/_write.py b/rdata/_write.py
index 0630d1e..a1fd162 100644
--- a/rdata/_write.py
+++ b/rdata/_write.py
@@ -3,7 +3,7 @@
 
 from typing import TYPE_CHECKING
 
-from .conversion import build_r_data, ConverterFromPythonToR
+from .conversion import ConverterFromPythonToR
 from .conversion.to_r import DEFAULT_FORMAT_VERSION
 from .unparser import unparse_file
 
@@ -27,10 +27,7 @@ def write_rds(
     """
     Write an RDS file.
 
-    This is a convenience function that wraps
-    :func:`rdata.conversion.convert_to_r_object`,
-    :func:`rdata.conversion.build_r_data`,
-    and :func:`rdata.unparser.unparse_file`,
+    This is a convenience function that wraps conversion and unparsing
     as it is the common use case.
 
     Args:
@@ -52,12 +49,13 @@ def write_rds(
         >>> data = ["hello", 1, 2.2, 3.3+4.4j]
         >>> rdata.write_rds("test.rds", data)
     """
-    r_object = ConverterFromPythonToR(encoding=encoding).convert_to_r_object(data)
-    r_data = build_r_data(
-        r_object,
+    converter = ConverterFromPythonToR(
         encoding=encoding,
         format_version=format_version,
     )
+    r_object = converter.convert_to_r_object(data)
+    r_data = converter.build_r_data(r_object)
+
     unparse_file(
         path,
         r_data,
@@ -79,10 +77,7 @@ def write_rda(
     """
     Write an RDA or RDATA file.
 
-    This is a convenience function that wraps
-    :func:`rdata.conversion.convert_to_r_object_for_rda`,
-    :func:`rdata.conversion.build_r_data`,
-    and :func:`rdata.unparser.unparse_file`,
+    This is a convenience function that wraps conversion and unparsing
     as it is the common use case.
 
     Args:
@@ -104,12 +99,13 @@ def write_rda(
         >>> data = {"name": "hello", "values": [1, 2.2, 3.3+4.4j]}
         >>> rdata.write_rda("test.rda", data)
     """
-    r_object = ConverterFromPythonToR(encoding=encoding).convert_to_r_object_for_rda(data)
-    r_data = build_r_data(
-        r_object,
+    converter = ConverterFromPythonToR(
         encoding=encoding,
         format_version=format_version,
     )
+    r_object = converter.convert_to_r_object_for_rda(data)
+    r_data = converter.build_r_data(r_object)
+
     unparse_file(
         path,
         r_data,
diff --git a/rdata/conversion/__init__.py b/rdata/conversion/__init__.py
index 2ec4f44..e802758 100644
--- a/rdata/conversion/__init__.py
+++ b/rdata/conversion/__init__.py
@@ -25,6 +25,5 @@
     ts_constructor as ts_constructor,
 )
 from .to_r import (
-    build_r_data as build_r_data,
     ConverterFromPythonToR as ConverterFromPythonToR,
 )
diff --git a/rdata/conversion/to_r.py b/rdata/conversion/to_r.py
index 4743e9f..c8ec7e0 100644
--- a/rdata/conversion/to_r.py
+++ b/rdata/conversion/to_r.py
@@ -51,6 +51,7 @@ def __call__(self, data: Any, *, encoding: Encoding) -> RObject: # noqa: ANN401
     2: 0x20300,
     3: 0x30500,
 })
+R_MINIMUM_VERSION_WITH_ENCODING: Final[int] = 3
 
 
 def create_unicode_array(
@@ -136,54 +137,55 @@ def build_r_object(
      )
 
 
-def build_r_data(
-        r_object: RObject,
-        *,
-        encoding: Encoding = "utf-8",
-        format_version: int = DEFAULT_FORMAT_VERSION,
-        r_version_serialized: int = DEFAULT_R_VERSION_SERIALIZED,
-) -> RData:
+
+class ConverterFromPythonToR:
     """
-    Build RData object from R object.
+    Class converting Python objects to R objects.
 
     Args:
-        r_object: R object.
-        encoding: Encoding saved in the metadata.
+        encoding: Encoding to be used for strings within data.
         format_version: File format version.
         r_version_serialized: R version written as the creator of the object.
+    """
 
-    Returns:
-        Corresponding RData object.
+    def __init__(self, *,
+            encoding: Encoding = "utf-8",
+            format_version: int = DEFAULT_FORMAT_VERSION,
+            r_version_serialized: int = DEFAULT_R_VERSION_SERIALIZED,
+    ) -> None:
+        self.encoding = encoding
+        self.format_version = format_version
+        self.r_version_serialized = r_version_serialized
+        self.reference_name_list = [None]
+        self.reference_obj_list = [None]
 
-    See Also:
-        convert_to_r_object
-    """
-    versions = RVersions(
-        format_version,
-        r_version_serialized,
-        R_MINIMUM_VERSIONS[format_version],
-    )
 
-    minimum_version_with_encoding = 3
-    extra = (RExtraInfo(encoding.upper())
-             if versions.format >= minimum_version_with_encoding
-             else RExtraInfo(None))
+    def build_r_data(self,
+            r_object: RObject,
+    ) -> RData:
+        """
+        Build RData object from R object.
 
-    return RData(versions, extra, r_object)
+        Args:
+            r_object: R object.
 
+        Returns:
+            Corresponding RData object.
 
-class ConverterFromPythonToR:
-    """
-    Class converting Python objects to R objects.
+        See Also:
+            convert_to_r_object
+        """
+        versions = RVersions(
+            self.format_version,
+            self.r_version_serialized,
+            R_MINIMUM_VERSIONS[self.format_version],
+        )
 
-    Args:
-        encoding: Encoding to be used for strings within data.
-    """
+        extra = (RExtraInfo(self.encoding.upper())
+                 if versions.format >= R_MINIMUM_VERSION_WITH_ENCODING
+                 else RExtraInfo(None))
 
-    def __init__(self, *, encoding: Encoding = "utf-8"):
-        self.encoding = encoding
-        self.reference_name_list = [None]
-        self.reference_obj_list = [None]
+        return RData(versions, extra, r_object)
 
 
     def build_r_list(self,
diff --git a/rdata/tests/test_write.py b/rdata/tests/test_write.py
index a46d8db..0f814c1 100644
--- a/rdata/tests/test_write.py
+++ b/rdata/tests/test_write.py
@@ -10,7 +10,7 @@
 import pytest
 
 import rdata
-from rdata.conversion import ConverterFromPythonToR, build_r_data
+from rdata.conversion import ConverterFromPythonToR
 from rdata.unparser import unparse_data
 
 if TYPE_CHECKING:
@@ -128,17 +128,16 @@ def test_convert_to_r(fname: str) -> None:
             encoding = encoding.lower()  # type: ignore [assignment]
 
         try:
-            converter = ConverterFromPythonToR(encoding=encoding)
-            if file_type == "rds":
-                r_obj = converter.convert_to_r_object(py_data)
-            else:
-                r_obj = converter.convert_to_r_object_for_rda(py_data)
-            new_r_data = build_r_data(
-                r_obj,
+            converter = ConverterFromPythonToR(
                 encoding=encoding,
                 format_version=r_data.versions.format,
                 r_version_serialized=r_data.versions.serialized,
             )
+            if file_type == "rds":
+                r_obj = converter.convert_to_r_object(py_data)
+            else:
+                r_obj = converter.convert_to_r_object_for_rda(py_data)
+            new_r_data = converter.build_r_data(r_obj)
         except NotImplementedError as e:
             pytest.xfail(str(e))
 
@@ -167,7 +166,7 @@ def test_unparse_bad_rda() -> None:
     py_data = "hello"
     converter = ConverterFromPythonToR()
     r_obj = converter.convert_to_r_object(py_data)
-    r_data = build_r_data(r_obj)
+    r_data = converter.build_r_data(r_obj)
     with pytest.raises(ValueError, match="(?i)must be dictionary-like"):
         unparse_data(r_data, file_type="rda")
 
@@ -191,7 +190,7 @@ def test_unparse_big_int() -> None:
     big_int = 2**32
     converter = ConverterFromPythonToR()
     r_obj = converter.convert_to_r_object(big_int)
-    r_data = build_r_data(r_obj)
+    r_data = converter.build_r_data(r_obj)
     with pytest.raises(ValueError, match="(?i)not castable"):
         unparse_data(r_data, file_format="xdr")
 

From 963a9bc7d2dd66af17ef7065a9220a9a38431de5 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Thu, 12 Sep 2024 08:08:38 +0300
Subject: [PATCH 016/100] Convert range to array for old format

---
 rdata/conversion/to_r.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/rdata/conversion/to_r.py b/rdata/conversion/to_r.py
index c8ec7e0..b85b6d2 100644
--- a/rdata/conversion/to_r.py
+++ b/rdata/conversion/to_r.py
@@ -396,6 +396,11 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
             r_value = data
 
         elif isinstance(data, range):
+            if self.format_version < 3:
+                # ALTREP support is from R version 3.5.0
+                # (minimum version for format version 3)
+                return self.convert_to_r_object(np.array(data))
+
             if data.step != 1:
                 # R supports compact sequences only with step 1;
                 # convert the range to an array of values

From 61a2ea22774504a7aaf6f88c3abb9b5ce13143bc Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Thu, 12 Sep 2024 08:49:46 +0300
Subject: [PATCH 017/100] Fix ruff

---
 rdata/conversion/to_r.py  | 46 ++++++++++++++++++++++++++-------------
 rdata/tests/test_write.py |  8 +++----
 2 files changed, 35 insertions(+), 19 deletions(-)

diff --git a/rdata/conversion/to_r.py b/rdata/conversion/to_r.py
index b85b6d2..a3041de 100644
--- a/rdata/conversion/to_r.py
+++ b/rdata/conversion/to_r.py
@@ -26,11 +26,11 @@
 )
 
 if TYPE_CHECKING:
-    import numpy.typing as npt
-
     from collections.abc import Mapping
     from typing import Any, Final, Literal, Protocol
 
+    import numpy.typing as npt
+
     Encoding = Literal["utf-8", "cp1252"]
 
 
@@ -52,10 +52,11 @@ def __call__(self, data: Any, *, encoding: Encoding) -> RObject: # noqa: ANN401
     3: 0x30500,
 })
 R_MINIMUM_VERSION_WITH_ENCODING: Final[int] = 3
+R_MINIMUM_VERSION_WITH_ALTREP: Final[int] = 3
 
 
 def create_unicode_array(
-        names: Any,
+        names: Any,  # noqa: ANN401
 ) -> npt.NDArray[Any]:
     """
     Create unicode array from sequence/iterator of strings.
@@ -73,7 +74,7 @@ def create_unicode_array(
     return np.array(name_list, dtype=np.dtype("U"))
 
 
-def find_is_object(attributes: RObject | None):
+def find_is_object(attributes: RObject | None) -> bool:
     if attributes is None:
         return False
     info = attributes.info
@@ -119,7 +120,10 @@ def build_r_object(
     """
     assert r_type is not None
     reference_id, referenced_object = reference
-    assert (reference_id == 0) == (referenced_object == None) == (r_type != RObjectType.REF)
+    assert ((reference_id == 0)
+            == (referenced_object is None)
+            == (r_type != RObjectType.REF)
+            )
     is_object = find_is_object(attributes)
     return RObject(
         RObjectInfo(
@@ -142,17 +146,24 @@ class ConverterFromPythonToR:
     """
     Class converting Python objects to R objects.
 
-    Args:
+    Attributes:
         encoding: Encoding to be used for strings within data.
         format_version: File format version.
         r_version_serialized: R version written as the creator of the object.
     """
-
     def __init__(self, *,
             encoding: Encoding = "utf-8",
             format_version: int = DEFAULT_FORMAT_VERSION,
             r_version_serialized: int = DEFAULT_R_VERSION_SERIALIZED,
     ) -> None:
+        """
+        Init class.
+
+        Args:
+            encoding: Encoding to be used for strings within data.
+            format_version: File format version.
+            r_version_serialized: R version written as the creator of the object.
+        """
         self.encoding = encoding
         self.format_version = format_version
         self.r_version_serialized = r_version_serialized
@@ -321,7 +332,7 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
 
             if len(data.attributes) > 0:
                 # The following might work here (untested)
-                # attributes = build_r_list(data.attributes)  # noqa: ERA001,E501
+                # attributes = build_r_list(data.attributes)  # noqa: ERA001
                 msg = f"type {r_type} with attributes not implemented"
                 raise NotImplementedError(msg)
 
@@ -350,7 +361,8 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
 
             elif data.dtype.kind in ["U"]:
                 assert data.ndim == 1
-                data = np.array([s.encode(self.encoding) for s in data], dtype=np.dtype("S"))
+                data = np.array([s.encode(self.encoding) for s in data],
+                                dtype=np.dtype("S"))
                 return self.convert_to_r_object(data)
 
             else:
@@ -396,7 +408,7 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
             r_value = data
 
         elif isinstance(data, range):
-            if self.format_version < 3:
+            if self.format_version < R_MINIMUM_VERSION_WITH_ALTREP:
                 # ALTREP support is from R version 3.5.0
                 # (minimum version for format version 3)
                 return self.convert_to_r_object(np.array(data))
@@ -422,7 +434,7 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
             )
 
         elif isinstance(data, pd.Series):
-            msg = f"pd.Series not implemented"
+            msg = "pd.Series not implemented"
             raise NotImplementedError(msg)
 
         elif isinstance(data, pd.Categorical):
@@ -446,8 +458,10 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
                     r_series = self.convert_to_r_object(array)
                 elif isinstance(array, pd.arrays.StringArray):
                     r_series = self.convert_to_r_object(create_unicode_array(array))
-                elif (isinstance(array, pd.arrays.IntegerArray)
-                      or isinstance(array, pd.arrays.NumpyExtensionArray)):
+                elif isinstance(array, (
+                         pd.arrays.IntegerArray,
+                         pd.arrays.NumpyExtensionArray,
+                )):
                     r_series = self.convert_to_r_object(array.to_numpy())
                 else:
                     msg = f"pd.DataFrame with pd.Series {type(array)} not implemented"
@@ -471,7 +485,7 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
                     row_names = range(index.start, index.stop, index.step)
             elif isinstance(index, pd.Index):
                 attr_order = ["names", "class", "row.names"]
-                if index.dtype == 'object':
+                if index.dtype == "object":
                     row_names = create_unicode_array(index)
                 elif np.issubdtype(index.dtype, np.integer):
                     row_names = index.to_numpy()
@@ -494,4 +508,6 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
             msg = f"type {type(data)} not implemented"
             raise NotImplementedError(msg)
 
-        return build_r_object(r_type, value=r_value, attributes=attributes, tag=tag, gp=gp)
+        return build_r_object(r_type, value=r_value,
+                              attributes=attributes,
+                              tag=tag, gp=gp)
diff --git a/rdata/tests/test_write.py b/rdata/tests/test_write.py
index 0f814c1..07ac786 100644
--- a/rdata/tests/test_write.py
+++ b/rdata/tests/test_write.py
@@ -148,16 +148,16 @@ def test_convert_to_r(fname: str) -> None:
 def test_convert_to_r_bad_rda() -> None:
     """Test checking that data for RDA has variable names."""
     py_data = "hello"
+    converter = ConverterFromPythonToR()
     with pytest.raises(TypeError, match="(?i)data must be a dictionary"):
-        converter = ConverterFromPythonToR()
         converter.convert_to_r_object_for_rda(py_data)  # type: ignore [arg-type]
 
 
 def test_convert_to_r_empty_rda() -> None:
     """Test checking that data for RDA has variable names."""
     py_data: dict[str, Any] = {}
+    converter = ConverterFromPythonToR()
     with pytest.raises(ValueError, match="(?i)data must not be empty"):
-        converter = ConverterFromPythonToR()
         converter.convert_to_r_object_for_rda(py_data)
 
 
@@ -173,15 +173,15 @@ def test_unparse_bad_rda() -> None:
 
 def test_convert_to_r_bad_encoding() -> None:
     """Test checking encoding."""
+    converter = ConverterFromPythonToR(encoding="non-existent")
     with pytest.raises(LookupError, match="(?i)unknown encoding"):
-        converter = ConverterFromPythonToR(encoding="non-existent")
         converter.convert_to_r_object("ä")  # type: ignore [arg-type]
 
 
 def test_convert_to_r_unsupported_encoding() -> None:
     """Test checking encoding."""
+    converter = ConverterFromPythonToR(encoding="cp1250")
     with pytest.raises(ValueError, match="(?i)unsupported encoding"):
-        converter = ConverterFromPythonToR(encoding="cp1250")
         converter.convert_to_r_object("ä")  # type: ignore [arg-type]
 
 

From 937908bef933a12b1decb21a87fd1b4a58885ee6 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Thu, 12 Sep 2024 08:52:57 +0300
Subject: [PATCH 018/100] Set object flag explicitly

---
 rdata/conversion/to_r.py | 26 +++++++-------------------
 1 file changed, 7 insertions(+), 19 deletions(-)

diff --git a/rdata/conversion/to_r.py b/rdata/conversion/to_r.py
index a3041de..c6471c0 100644
--- a/rdata/conversion/to_r.py
+++ b/rdata/conversion/to_r.py
@@ -74,27 +74,11 @@ def create_unicode_array(
     return np.array(name_list, dtype=np.dtype("U"))
 
 
-def find_is_object(attributes: RObject | None) -> bool:
-    if attributes is None:
-        return False
-    info = attributes.info
-    if info.type != RObjectType.LIST:
-        return False
-    if not info.tag:
-        return False
-    tag = attributes.tag
-    if tag.info.type == RObjectType.REF:
-        tag = tag.referenced_object
-    if (tag.info.type == RObjectType.SYM
-        and tag.value.value == b"class"):
-        return True
-    return find_is_object(attributes.value[1])
-
-
 def build_r_object(
         r_type: RObjectType,
         *,
         value: Any = None,  # noqa: ANN401
+        is_object: bool = False,
         attributes: RObject | None = None,
         tag: RObject | None = None,
         gp: int = 0,
@@ -106,6 +90,7 @@ def build_r_object(
     Args:
         r_type: Type indentifier.
         value: Value for RObject.
+        is_object: True if RObject represents object.
         attributes: Same as in RObject.
         tag: Same as in RObject.
         gp: Same as in RObjectInfo.
@@ -124,7 +109,6 @@ def build_r_object(
             == (referenced_object is None)
             == (r_type != RObjectType.REF)
             )
-    is_object = find_is_object(attributes)
     return RObject(
         RObjectInfo(
             r_type,
@@ -312,9 +296,10 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
         r_type = None
         values: list[Any] | tuple[Any, ...]
         r_value: Any = None
-        gp = 0
+        is_object = False
         attributes = None
         tag = None
+        gp = 0
 
         if data is None:
             r_type = RObjectType.NILVALUE
@@ -438,6 +423,7 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
             raise NotImplementedError(msg)
 
         elif isinstance(data, pd.Categorical):
+            is_object = True
             r_type = RObjectType.INT
             r_value = data.codes + 1
             attributes = self.build_r_list({
@@ -446,6 +432,7 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
                 })
 
         elif isinstance(data, pd.DataFrame):
+            is_object = True
             r_type = RObjectType.VEC
             names = []
             r_value = []
@@ -509,5 +496,6 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
             raise NotImplementedError(msg)
 
         return build_r_object(r_type, value=r_value,
+                              is_object=is_object,
                               attributes=attributes,
                               tag=tag, gp=gp)

From 8eda45413338efa212f9b62bd27c8a22b02cc1e7 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Thu, 12 Sep 2024 09:20:16 +0300
Subject: [PATCH 019/100] Fix mypy

---
 rdata/conversion/to_r.py  | 23 ++++++++++++-----------
 rdata/tests/test_write.py |  8 ++++----
 2 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/rdata/conversion/to_r.py b/rdata/conversion/to_r.py
index c6471c0..415f94b 100644
--- a/rdata/conversion/to_r.py
+++ b/rdata/conversion/to_r.py
@@ -37,7 +37,7 @@
     class Converter(Protocol):
         """Protocol for Py-to-R conversion."""
 
-        def __call__(self, data: Any, *, encoding: Encoding) -> RObject: # noqa: ANN401
+        def __call__(self, data: Any) -> RObject: # noqa: ANN401
             """Convert Python object to R object."""
 
 
@@ -82,7 +82,7 @@ def build_r_object(
         attributes: RObject | None = None,
         tag: RObject | None = None,
         gp: int = 0,
-        reference: tuple(int, RObject | None) = (0, None),
+        reference: tuple[int, RObject | None] = (0, None),
 ) -> RObject:
     """
     Build R object.
@@ -151,8 +151,8 @@ def __init__(self, *,
         self.encoding = encoding
         self.format_version = format_version
         self.r_version_serialized = r_version_serialized
-        self.reference_name_list = [None]
-        self.reference_obj_list = [None]
+        self.reference_name_list: list[None | str] = [None]
+        self.reference_obj_list: list[None | RObject] = [None]
 
 
     def build_r_data(self,
@@ -313,7 +313,7 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
             values = data.elements
             r_value = (self.build_r_sym(str(values[0])),
                        self.build_r_list(values[1:],
-                                         convert_value=self.build_r_sym))
+                                         convert_value=self.build_r_sym))  # type: ignore [arg-type]
 
             if len(data.attributes) > 0:
                 # The following might work here (untested)
@@ -434,11 +434,11 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
         elif isinstance(data, pd.DataFrame):
             is_object = True
             r_type = RObjectType.VEC
-            names = []
+            column_names = []
             r_value = []
             for column, series in data.items():
                 assert isinstance(column, str)
-                names.append(column)
+                column_names.append(column)
 
                 array = series.array
                 if isinstance(array, pd.Categorical):
@@ -447,7 +447,7 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
                     r_series = self.convert_to_r_object(create_unicode_array(array))
                 elif isinstance(array, (
                          pd.arrays.IntegerArray,
-                         pd.arrays.NumpyExtensionArray,
+                         pd.arrays.NumpyExtensionArray,  # type: ignore [attr-defined]
                 )):
                     r_series = self.convert_to_r_object(array.to_numpy())
                 else:
@@ -459,11 +459,12 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
             index = data.index
             attr_order = ["names", "row.names", "class"]
             if isinstance(index, pd.RangeIndex):
+                assert isinstance(index.start, int)
                 if (index.start == 1
                     and index.stop == data.shape[0] + 1
                     and index.step == 1
                 ):
-                    row_names = np.ma.array(  # type: ignore [no-untyped-call]
+                    row_names = np.ma.array(
                             data=[R_INT_NA, -data.shape[0]],
                             mask=[True, False],
                             fill_value=R_INT_NA,
@@ -474,7 +475,7 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
                 attr_order = ["names", "class", "row.names"]
                 if index.dtype == "object":
                     row_names = create_unicode_array(index)
-                elif np.issubdtype(index.dtype, np.integer):
+                elif np.issubdtype(str(index.dtype), np.integer):
                     row_names = index.to_numpy()
                 else:
                     msg = f"pd.DataFrame pd.Index {index.dtype} not implemented"
@@ -484,7 +485,7 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
                 raise NotImplementedError(msg)
 
             attr_dict = {
-                "names": create_unicode_array(names),
+                "names": create_unicode_array(column_names),
                 "row.names": row_names,
                 "class": "data.frame",
             }
diff --git a/rdata/tests/test_write.py b/rdata/tests/test_write.py
index 07ac786..86e9762 100644
--- a/rdata/tests/test_write.py
+++ b/rdata/tests/test_write.py
@@ -173,16 +173,16 @@ def test_unparse_bad_rda() -> None:
 
 def test_convert_to_r_bad_encoding() -> None:
     """Test checking encoding."""
-    converter = ConverterFromPythonToR(encoding="non-existent")
+    converter = ConverterFromPythonToR(encoding="non-existent")  # type: ignore [arg-type]
     with pytest.raises(LookupError, match="(?i)unknown encoding"):
-        converter.convert_to_r_object("ä")  # type: ignore [arg-type]
+        converter.convert_to_r_object("ä")
 
 
 def test_convert_to_r_unsupported_encoding() -> None:
     """Test checking encoding."""
-    converter = ConverterFromPythonToR(encoding="cp1250")
+    converter = ConverterFromPythonToR(encoding="cp1250")  # type: ignore [arg-type]
     with pytest.raises(ValueError, match="(?i)unsupported encoding"):
-        converter.convert_to_r_object("ä")  # type: ignore [arg-type]
+        converter.convert_to_r_object("ä")
 
 
 def test_unparse_big_int() -> None:

From efbb09d2af88fa2bbb0d02cae4923cbdaa384bcf Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Thu, 12 Sep 2024 09:48:17 +0300
Subject: [PATCH 020/100] Add tests for different dataframe index types

---
 .../data/test_dataframe_int_rownames.rds      | Bin 0 -> 123 bytes
 .../data/test_dataframe_range_rownames.rds    | Bin 0 -> 163 bytes
 rdata/tests/test_rdata.py                     |  30 ++++++++++++++++++
 3 files changed, 30 insertions(+)
 create mode 100644 rdata/tests/data/test_dataframe_int_rownames.rds
 create mode 100644 rdata/tests/data/test_dataframe_range_rownames.rds

diff --git a/rdata/tests/data/test_dataframe_int_rownames.rds b/rdata/tests/data/test_dataframe_int_rownames.rds
new file mode 100644
index 0000000000000000000000000000000000000000..74772a2ea72e7b5f2c3d9973ed8f805cd58752fd
GIT binary patch
literal 123
zcmb2|=3oE==I#ec2?+^l35kr8);Op!XJ>TGUdK8?o#%-GlcBsvV>1`CgvAVpOlvlc
zghZy*$%zkGGS7LR@zrHBVwu1aIq7pT&qYzS=PF{0wV!x->Udu9)@GX3xNOd5jr}qF
ZX=eI2jtZP%o5b>{o<Wc&((we)P5@(EDE9yW

literal 0
HcmV?d00001

diff --git a/rdata/tests/data/test_dataframe_range_rownames.rds b/rdata/tests/data/test_dataframe_range_rownames.rds
new file mode 100644
index 0000000000000000000000000000000000000000..2f7ae99781f89c86daecafa56bbdf59b50432812
GIT binary patch
literal 163
zcmV;U09^kciwFP!000001B>8dU|?WoU||80tUx9MYiNj@t_6@M4CF8ZF&{{Qg9-x}
zIG8|bI|e9VVFIgTVc-O5&P&WqEe0|KkOWzh^K%T*6(;8-7NaZVN=Yn9)JrP@nt`UA
zvnan@4`%i|AV!wtOU}<NNK7t?&&(?+PAx=}Wl2gbPKAo`LcIi{|G~Y+0re8pL-rqF
RUIVN72LS(ywEQ9g0077kJ*5Bu

literal 0
HcmV?d00001

diff --git a/rdata/tests/test_rdata.py b/rdata/tests/test_rdata.py
index ccffead..898d6c9 100644
--- a/rdata/tests/test_rdata.py
+++ b/rdata/tests/test_rdata.py
@@ -508,6 +508,36 @@ def test_dataframe_rownames(self) -> None:
             ),
         )
 
+    def test_dataframe_int_rownames(self) -> None:
+        """Test dataframe conversion."""
+        # File created in R with
+        # df = data.frame(col1=c(10, 20, 30), row.names=c(3L, 6L, 9L)); saveRDS(df, file="test_dataframe_int_rownames.rds")  # noqa: E501
+        data = rdata.read_rda(TESTDATA_PATH / "test_dataframe_int_rownames.rds")
+
+        index = np.array([3, 6, 9], dtype=np.int32)
+        ref = pd.DataFrame(
+            {
+                "col1": pd.Series([10., 20., 30.], dtype=pd.Float64Dtype(), index=index),
+            },
+            index=index,
+        )
+        pd.testing.assert_frame_equal(data, ref)
+
+    def test_dataframe_range_rownames(self) -> None:
+        """Test dataframe conversion."""
+        # File created in R with
+        # df = data.frame(col1=c(10, 20, 30), row.names=2:4); saveRDS(df, file="test_dataframe_range_rownames.rds")  # noqa: E501
+        data = rdata.read_rda(TESTDATA_PATH / "test_dataframe_range_rownames.rds")
+
+        index = pd.RangeIndex(2, 5)
+        ref = pd.DataFrame(
+            {
+                "col1": pd.Series([10., 20., 30.], dtype=pd.Float64Dtype(), index=index),
+            },
+            index=index,
+        )
+        pd.testing.assert_frame_equal(data, ref)
+
     def test_ts(self) -> None:
         """Test time series conversion."""
         data = rdata.read_rda(TESTDATA_PATH / "test_ts.rda")

From 32a2cc6175c1a6985f427fc782bd3e238119aba5 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Thu, 12 Sep 2024 10:13:40 +0300
Subject: [PATCH 021/100] Test converting expanded altrep

---
 rdata/tests/test_write.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rdata/tests/test_write.py b/rdata/tests/test_write.py
index 86e9762..a5a2127 100644
--- a/rdata/tests/test_write.py
+++ b/rdata/tests/test_write.py
@@ -113,7 +113,7 @@ def test_convert_to_r(fname: str) -> None:
         data = decompress_data(f.read())
         file_type, file_format = parse_file_type_and_format(data)
 
-        r_data = rdata.parser.parse_data(data, expand_altrep=False)
+        r_data = rdata.parser.parse_data(data)
 
         try:
             py_data = rdata.conversion.convert(r_data)

From 1f4e8d824c3d0627366887d4d46db7f627b80d6e Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Thu, 12 Sep 2024 10:14:28 +0300
Subject: [PATCH 022/100] Add only non-nil attributes to expanded altrep

---
 rdata/parser/_parser.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/rdata/parser/_parser.py b/rdata/parser/_parser.py
index 902484b..6b9808f 100644
--- a/rdata/parser/_parser.py
+++ b/rdata/parser/_parser.py
@@ -941,7 +941,10 @@ def parse_R_object(  # noqa: N802, C901, PLR0912, PLR0915
                     info=altrep_info,
                     state=altrep_state,
                 )
-                attributes = altrep_attr
+                if altrep_attr.info.type != RObjectType.NILVALUE:
+                    info.attributes = True
+                    attributes_read = True
+                    attributes = altrep_attr
             else:
                 value = (altrep_info, altrep_state, altrep_attr)
 

From 237bc22cc6bf10fcb81268ddd178d7a5f3b094b6 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Thu, 12 Sep 2024 11:04:05 +0300
Subject: [PATCH 023/100] Enable general rangeindex in dataframe

---
 rdata/conversion/_conversion.py | 51 ++++++++++++++++++++++++++++++++-
 1 file changed, 50 insertions(+), 1 deletion(-)

diff --git a/rdata/conversion/_conversion.py b/rdata/conversion/_conversion.py
index 7ad0957..fa846bf 100644
--- a/rdata/conversion/_conversion.py
+++ b/rdata/conversion/_conversion.py
@@ -394,6 +394,52 @@ def convert_array(
     return value  # type: ignore [no-any-return]
 
 
+def convert_altrep_to_range(
+    r_altrep: parser.RObject,
+) -> range:
+    """
+    Convert a R altrep to range object.
+
+    Args:
+        r_altrep: R altrep object
+
+    Returns:
+        Array.
+
+    See Also:
+        convert_array
+    """
+    if r_altrep.info.type != parser.RObjectType.ALTREP:
+        msg = "Must receive an altrep object"
+        raise TypeError(msg)
+
+    info, state, attr = r_altrep.value
+    assert attr.info.type == parser.RObjectType.NILVALUE
+
+    assert info.info.type == parser.RObjectType.LIST
+
+    class_sym = info.value[0]
+    while class_sym.info.type == parser.RObjectType.REF:
+        class_sym = class_sym.referenced_object
+
+    assert class_sym.info.type == parser.RObjectType.SYM
+    assert class_sym.value.info.type == parser.RObjectType.CHAR
+
+    altrep_name = class_sym.value.value
+    assert isinstance(altrep_name, bytes)
+
+    if altrep_name != b"compact_intseq":
+        msg = "Only compact integer sequences can be converted to range"
+        raise NotImplementedError(msg)
+
+    n = int(state.value[0])
+    start = int(state.value[1])
+    step = int(state.value[2])
+    stop = start + (n - 1) * step
+    value = range(start, stop + 1, step)
+    return value
+
+
 R_INT_MIN = -2**31
 
 
@@ -430,7 +476,7 @@ def dataframe_constructor(
             and isinstance(row_names, np.ma.MaskedArray)
             and row_names.mask[0]
         )
-        else tuple(row_names)
+        else row_names
     )
 
     return pd.DataFrame(obj, columns=obj, index=index)
@@ -820,6 +866,9 @@ def _convert_next(  # noqa: C901, PLR0912, PLR0915
 
             value = None
 
+        elif obj.info.type == parser.RObjectType.ALTREP:
+            value = convert_altrep_to_range(obj)
+
         else:
             msg = f"Type {obj.info.type} not implemented"
             raise NotImplementedError(msg)

From 6859b8cabeff5e051f868d64f00587b4ccb0d18b Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Thu, 12 Sep 2024 11:05:42 +0300
Subject: [PATCH 024/100] Test conversion of altreps

---
 rdata/tests/test_write.py | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/rdata/tests/test_write.py b/rdata/tests/test_write.py
index a5a2127..0fe7934 100644
--- a/rdata/tests/test_write.py
+++ b/rdata/tests/test_write.py
@@ -98,7 +98,8 @@ def test_unparse(fname: str) -> None:
 
 
 @pytest.mark.parametrize("fname", fnames, ids=fnames)
-def test_convert_to_r(fname: str) -> None:
+@pytest.mark.parametrize("expand_altrep", [True, False])
+def test_convert_to_r(fname: str, expand_altrep: bool) -> None:
     """Test converting Python data to RData object."""
     with (TESTDATA_PATH / fname).open("rb") as f:
         # Skip test files without unique R->py->R transformation
@@ -113,7 +114,7 @@ def test_convert_to_r(fname: str) -> None:
         data = decompress_data(f.read())
         file_type, file_format = parse_file_type_and_format(data)
 
-        r_data = rdata.parser.parse_data(data)
+        r_data = rdata.parser.parse_data(data, expand_altrep=expand_altrep)
 
         try:
             py_data = rdata.conversion.convert(r_data)
@@ -144,6 +145,18 @@ def test_convert_to_r(fname: str) -> None:
         assert str(r_data) == str(new_r_data)
         assert r_data == new_r_data
 
+        # Check futher that the resulting unparsed data is correct to ensure that
+        # Python-to-R conversion hasn't created any odd objects that can't be unparsed
+        if not expand_altrep:
+            file_type, file_format = parse_file_type_and_format(data)
+            out_data = unparse_data(
+                new_r_data, file_format=file_format, file_type=file_type)
+
+            if file_format == "ascii":
+                data = data.replace(b"\r\n", b"\n")
+
+            assert data == out_data
+
 
 def test_convert_to_r_bad_rda() -> None:
     """Test checking that data for RDA has variable names."""

From 5ac49d0cae3d9c4dec8c2abdfed4ef88afab8bfd Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Thu, 12 Sep 2024 11:06:18 +0300
Subject: [PATCH 025/100] Change attribute order to match test files

---
 rdata/conversion/to_r.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/rdata/conversion/to_r.py b/rdata/conversion/to_r.py
index 415f94b..b09a913 100644
--- a/rdata/conversion/to_r.py
+++ b/rdata/conversion/to_r.py
@@ -470,6 +470,7 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
                             fill_value=R_INT_NA,
                         )
                 else:
+                    attr_order = ["names", "class", "row.names"]
                     row_names = range(index.start, index.stop, index.step)
             elif isinstance(index, pd.Index):
                 attr_order = ["names", "class", "row.names"]

From 6ad1408ebe60e17586b8bb1397cd4e1adc5d27ef Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Thu, 12 Sep 2024 11:18:19 +0300
Subject: [PATCH 026/100] Add comment about reordering attributes

---
 rdata/conversion/to_r.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/rdata/conversion/to_r.py b/rdata/conversion/to_r.py
index b09a913..01ed7aa 100644
--- a/rdata/conversion/to_r.py
+++ b/rdata/conversion/to_r.py
@@ -456,8 +456,12 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
 
                 r_value.append(r_series)
 
-            index = data.index
+            # In test files the order in which attributes are written varies.
+            # We replicate here the order matching test files, but likely
+            # R could read files with attributes in any order.
             attr_order = ["names", "row.names", "class"]
+
+            index = data.index
             if isinstance(index, pd.RangeIndex):
                 assert isinstance(index.start, int)
                 if (index.start == 1
@@ -490,7 +494,6 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
                 "row.names": row_names,
                 "class": "data.frame",
             }
-
             attributes = self.build_r_list({k: attr_dict[k] for k in attr_order})
 
         else:

From 1c458ba617d0f3f895c0ccc4d984fecde9391b8a Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Thu, 12 Sep 2024 11:25:08 +0300
Subject: [PATCH 027/100] Fix ruff and mypy

---
 rdata/conversion/_conversion.py | 3 +--
 rdata/tests/test_rdata.py       | 4 ++--
 rdata/tests/test_write.py       | 2 +-
 3 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/rdata/conversion/_conversion.py b/rdata/conversion/_conversion.py
index fa846bf..efbc41c 100644
--- a/rdata/conversion/_conversion.py
+++ b/rdata/conversion/_conversion.py
@@ -436,8 +436,7 @@ def convert_altrep_to_range(
     start = int(state.value[1])
     step = int(state.value[2])
     stop = start + (n - 1) * step
-    value = range(start, stop + 1, step)
-    return value
+    return range(start, stop + 1, step)
 
 
 R_INT_MIN = -2**31
diff --git a/rdata/tests/test_rdata.py b/rdata/tests/test_rdata.py
index 898d6c9..47f07ff 100644
--- a/rdata/tests/test_rdata.py
+++ b/rdata/tests/test_rdata.py
@@ -512,7 +512,7 @@ def test_dataframe_int_rownames(self) -> None:
         """Test dataframe conversion."""
         # File created in R with
         # df = data.frame(col1=c(10, 20, 30), row.names=c(3L, 6L, 9L)); saveRDS(df, file="test_dataframe_int_rownames.rds")  # noqa: E501
-        data = rdata.read_rda(TESTDATA_PATH / "test_dataframe_int_rownames.rds")
+        data = rdata.read_rds(TESTDATA_PATH / "test_dataframe_int_rownames.rds")
 
         index = np.array([3, 6, 9], dtype=np.int32)
         ref = pd.DataFrame(
@@ -527,7 +527,7 @@ def test_dataframe_range_rownames(self) -> None:
         """Test dataframe conversion."""
         # File created in R with
         # df = data.frame(col1=c(10, 20, 30), row.names=2:4); saveRDS(df, file="test_dataframe_range_rownames.rds")  # noqa: E501
-        data = rdata.read_rda(TESTDATA_PATH / "test_dataframe_range_rownames.rds")
+        data = rdata.read_rds(TESTDATA_PATH / "test_dataframe_range_rownames.rds")
 
         index = pd.RangeIndex(2, 5)
         ref = pd.DataFrame(
diff --git a/rdata/tests/test_write.py b/rdata/tests/test_write.py
index 0fe7934..aef384d 100644
--- a/rdata/tests/test_write.py
+++ b/rdata/tests/test_write.py
@@ -99,7 +99,7 @@ def test_unparse(fname: str) -> None:
 
 @pytest.mark.parametrize("fname", fnames, ids=fnames)
 @pytest.mark.parametrize("expand_altrep", [True, False])
-def test_convert_to_r(fname: str, expand_altrep: bool) -> None:
+def test_convert_to_r(fname: str, expand_altrep: bool) -> None:  # noqa: FBT001
     """Test converting Python data to RData object."""
     with (TESTDATA_PATH / fname).open("rb") as f:
         # Skip test files without unique R->py->R transformation

From 92429caef84429a1ab3886f5bab92d0e0e1a02dd Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Thu, 12 Sep 2024 11:46:24 +0300
Subject: [PATCH 028/100] Add test for dataframe with different dtypes

---
 rdata/tests/data/test_dataframe_dtypes.rds | Bin 0 -> 217 bytes
 rdata/tests/test_rdata.py                  |  19 +++++++++++++++++++
 2 files changed, 19 insertions(+)
 create mode 100644 rdata/tests/data/test_dataframe_dtypes.rds

diff --git a/rdata/tests/data/test_dataframe_dtypes.rds b/rdata/tests/data/test_dataframe_dtypes.rds
new file mode 100644
index 0000000000000000000000000000000000000000..aeb9ffbb9d7556e11055fe05856abbaecf046e34
GIT binary patch
literal 217
zcmV;~04Dz*iwFP!000001B>8dU|?WoU||80tUx9MYiNj@t_6@M4B`MWFIWKs14y9=
z5X%8EA4tsp<4h2k<-iD~xzfM@C?^0hlz|gSGgiPDl`uvX)Hsl_j0_L}qWM8$4gwGX
z2N5VO0i|W2v;ve?g6d>oVFK%i7|xoPn44M*bptC@kU2B21SX%Blb;CVuoagSW#*+r
zc`Ql!`8iNFdvbnmK~8D~y6MR|iN!F}VS-#Ki6x18X+=Pb(Ufx*<(KQh90UqTrUoGK
T|NsBLK<Xa=q6LRfd;tIeumn?A

literal 0
HcmV?d00001

diff --git a/rdata/tests/test_rdata.py b/rdata/tests/test_rdata.py
index 47f07ff..8636f87 100644
--- a/rdata/tests/test_rdata.py
+++ b/rdata/tests/test_rdata.py
@@ -538,6 +538,25 @@ def test_dataframe_range_rownames(self) -> None:
         )
         pd.testing.assert_frame_equal(data, ref)
 
+    def test_dataframe_dtypes(self) -> None:
+        """Test dataframe conversion."""
+        # File created in R with
+        # df = data.frame(int=c(10L, 20L, 30L), float=c(1.1, 2.2, 3.3), string=c("x", "y", "z"), bool=as.logical(c(1, 0, 1)), complex=c(4+5i, 6+7i, 8+9i)); print(df); saveRDS(df, file="test_dataframe_dtypes.rds")  # noqa: E501
+        data = rdata.read_rds(TESTDATA_PATH / "test_dataframe_dtypes.rds")
+
+        index = pd.RangeIndex(1, 4)
+        ref = pd.DataFrame(
+            {
+                "int": pd.Series([10, 20, 30], dtype=pd.Int32Dtype(), index=index),
+                "float": pd.Series([1.1, 2.2, 3.3], dtype=pd.Float64Dtype(), index=index),
+                "string": pd.Series(["x" ,"y", "z"], dtype=pd.StringDtype(), index=index),
+                "bool": pd.Series([True, False, True], dtype=pd.BooleanDtype(), index=index),
+                "complex": pd.Series([4+5j, 6+7j, 8+9j], dtype=complex, index=index),
+            },
+            index=index,
+        )
+        pd.testing.assert_frame_equal(data, ref)
+
     def test_ts(self) -> None:
         """Test time series conversion."""
         data = rdata.read_rda(TESTDATA_PATH / "test_ts.rda")

From 5cf678d055beb89a476c0ada7c95e54142607c17 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Thu, 12 Sep 2024 11:48:16 +0300
Subject: [PATCH 029/100] Add conversion of boolean pd arrays

---
 rdata/conversion/to_r.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/rdata/conversion/to_r.py b/rdata/conversion/to_r.py
index 01ed7aa..f583d3a 100644
--- a/rdata/conversion/to_r.py
+++ b/rdata/conversion/to_r.py
@@ -446,8 +446,9 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
                 elif isinstance(array, pd.arrays.StringArray):
                     r_series = self.convert_to_r_object(create_unicode_array(array))
                 elif isinstance(array, (
-                         pd.arrays.IntegerArray,
-                         pd.arrays.NumpyExtensionArray,  # type: ignore [attr-defined]
+                    pd.arrays.IntegerArray,
+                    pd.arrays.BooleanArray,
+                    pd.arrays.NumpyExtensionArray,  # type: ignore [attr-defined]
                 )):
                     r_series = self.convert_to_r_object(array.to_numpy())
                 else:

From f379fc97127a54caa9f1e289f0ce16e2dd52f819 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Thu, 12 Sep 2024 14:25:08 +0300
Subject: [PATCH 030/100] Add test for pandas dtypes

---
 rdata/tests/test_write.py | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/rdata/tests/test_write.py b/rdata/tests/test_write.py
index aef384d..9d24052 100644
--- a/rdata/tests/test_write.py
+++ b/rdata/tests/test_write.py
@@ -7,6 +7,8 @@
 from pathlib import Path
 from typing import TYPE_CHECKING, Any
 
+import numpy as np
+import pandas as pd
 import pytest
 
 import rdata
@@ -208,6 +210,37 @@ def test_unparse_big_int() -> None:
         unparse_data(r_data, file_format="xdr")
 
 
+def test_convert_dataframe_pandas_dtypes() -> None:
+    """Test converting dataframe with pandas dtypes."""
+    df1 = pd.DataFrame(
+        {
+            "int": np.array([10, 20, 30], dtype=np.int32),
+            "float": [1.1, 2.2, 3.3],
+            "string": ["x" ,"y", "z"],
+            "bool": [True, False, True],
+            "complex": [4+5j, 6+7j, 8+9j],
+        },
+        index=range(3),
+    )
+
+    df2 = pd.DataFrame(
+        {
+            "int": pd.Series([10, 20, 30], dtype=pd.Int32Dtype()),
+            "float": pd.Series([1.1, 2.2, 3.3], dtype=pd.Float64Dtype()),
+            "string": pd.Series(["x" ,"y", "z"], dtype=pd.StringDtype()),
+            "bool": pd.Series([True, False, True], dtype=pd.BooleanDtype()),
+            "complex": pd.Series([4+5j, 6+7j, 8+9j], dtype=complex),
+        },
+        index=pd.RangeIndex(3),
+    )
+
+    r_obj1 = ConverterFromPythonToR().convert_to_r_object(df1)
+    r_obj2 = ConverterFromPythonToR().convert_to_r_object(df2)
+
+    assert str(r_obj1) == str(r_obj2)
+    assert r_obj1 == r_obj2
+
+
 @pytest.mark.parametrize("compression", [*valid_compressions, "fail"])
 @pytest.mark.parametrize("file_format", [*valid_formats, None, "fail"])
 @pytest.mark.parametrize("file_type", ["rds", "rda"])

From ddabf65432b7ded9fc5bb068410832dd58207e1c Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Thu, 12 Sep 2024 14:25:37 +0300
Subject: [PATCH 031/100] Add missing conversions

---
 rdata/conversion/to_r.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/rdata/conversion/to_r.py b/rdata/conversion/to_r.py
index f583d3a..e976897 100644
--- a/rdata/conversion/to_r.py
+++ b/rdata/conversion/to_r.py
@@ -446,11 +446,16 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
                 elif isinstance(array, pd.arrays.StringArray):
                     r_series = self.convert_to_r_object(create_unicode_array(array))
                 elif isinstance(array, (
-                    pd.arrays.IntegerArray,
                     pd.arrays.BooleanArray,
+                    pd.arrays.IntegerArray,
+                    pd.arrays.FloatingArray,
                     pd.arrays.NumpyExtensionArray,  # type: ignore [attr-defined]
                 )):
-                    r_series = self.convert_to_r_object(array.to_numpy())
+                    np_array = array.to_numpy()
+                    if np_array.dtype.kind == "O":
+                        r_series = self.convert_to_r_object(create_unicode_array(array))
+                    else:
+                        r_series = self.convert_to_r_object(array.to_numpy())
                 else:
                     msg = f"pd.DataFrame with pd.Series {type(array)} not implemented"
                     raise NotImplementedError(msg)

From 9dd2559b00fd7f3e0f9d39ec40b29af8713b7a0b Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Thu, 12 Sep 2024 14:48:53 +0300
Subject: [PATCH 032/100] Set dataframe attribute order file-by-file

---
 rdata/conversion/to_r.py  | 16 ++++++++--------
 rdata/tests/test_write.py | 21 +++++++++++++++------
 2 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/rdata/conversion/to_r.py b/rdata/conversion/to_r.py
index e976897..efc1f48 100644
--- a/rdata/conversion/to_r.py
+++ b/rdata/conversion/to_r.py
@@ -154,6 +154,11 @@ def __init__(self, *,
         self.reference_name_list: list[None | str] = [None]
         self.reference_obj_list: list[None | RObject] = [None]
 
+        # In test files the order in which dataframe attributes are written varies.
+        # R can read files with attributes in any order, but this variable
+        # is used in tests to change the attribute order to match with the test file.
+        self.df_attr_order = None
+
 
     def build_r_data(self,
             r_object: RObject,
@@ -462,11 +467,6 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
 
                 r_value.append(r_series)
 
-            # In test files the order in which attributes are written varies.
-            # We replicate here the order matching test files, but likely
-            # R could read files with attributes in any order.
-            attr_order = ["names", "row.names", "class"]
-
             index = data.index
             if isinstance(index, pd.RangeIndex):
                 assert isinstance(index.start, int)
@@ -480,10 +480,8 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
                             fill_value=R_INT_NA,
                         )
                 else:
-                    attr_order = ["names", "class", "row.names"]
                     row_names = range(index.start, index.stop, index.step)
             elif isinstance(index, pd.Index):
-                attr_order = ["names", "class", "row.names"]
                 if index.dtype == "object":
                     row_names = create_unicode_array(index)
                 elif np.issubdtype(str(index.dtype), np.integer):
@@ -500,7 +498,9 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
                 "row.names": row_names,
                 "class": "data.frame",
             }
-            attributes = self.build_r_list({k: attr_dict[k] for k in attr_order})
+            if self.df_attr_order is not None:
+                attr_dict = {k: attr_dict[k] for k in self.df_attr_order}
+            attributes = self.build_r_list(attr_dict)
 
         else:
             msg = f"type {type(data)} not implemented"
diff --git a/rdata/tests/test_write.py b/rdata/tests/test_write.py
index 9d24052..fe67aef 100644
--- a/rdata/tests/test_write.py
+++ b/rdata/tests/test_write.py
@@ -130,20 +130,29 @@ def test_convert_to_r(fname: str, expand_altrep: bool) -> None:  # noqa: FBT001
         else:
             encoding = encoding.lower()  # type: ignore [assignment]
 
+        converter = ConverterFromPythonToR(
+            encoding=encoding,
+            format_version=r_data.versions.format,
+            r_version_serialized=r_data.versions.serialized,
+        )
+        if fname in [
+            "test_dataframe_dtypes.rds",
+            "test_dataframe_int_rownames.rds",
+            "test_dataframe_range_rownames.rds",
+            "test_dataframe_rownames.rda",
+        ]:
+            converter.df_attr_order = ["names", "class", "row.names"]
+
         try:
-            converter = ConverterFromPythonToR(
-                encoding=encoding,
-                format_version=r_data.versions.format,
-                r_version_serialized=r_data.versions.serialized,
-            )
             if file_type == "rds":
                 r_obj = converter.convert_to_r_object(py_data)
             else:
                 r_obj = converter.convert_to_r_object_for_rda(py_data)
-            new_r_data = converter.build_r_data(r_obj)
         except NotImplementedError as e:
             pytest.xfail(str(e))
 
+        new_r_data = converter.build_r_data(r_obj)
+
         assert str(r_data) == str(new_r_data)
         assert r_data == new_r_data
 

From 865227183237f2a2c715aee0d208479bf0ab8135 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Thu, 12 Sep 2024 15:05:03 +0300
Subject: [PATCH 033/100] Add test for dataframe with NAs

---
 .../data/test_dataframe_dtypes_with_na.rds    | Bin 0 -> 235 bytes
 rdata/tests/test_rdata.py                     |  19 ++++++++++++++++++
 2 files changed, 19 insertions(+)
 create mode 100644 rdata/tests/data/test_dataframe_dtypes_with_na.rds

diff --git a/rdata/tests/data/test_dataframe_dtypes_with_na.rds b/rdata/tests/data/test_dataframe_dtypes_with_na.rds
new file mode 100644
index 0000000000000000000000000000000000000000..17a170c0e2b53e2b70a11b4ba4c44dae95f48313
GIT binary patch
literal 235
zcmV<H02KcpiwFP!000001B>8dU|?WoU||80tUx9MYiNj@t_6@M4B`MWFAxJ|89)j}
zfLN}90Sx#+;`SeBg1{^XMkvjd1_t#XKuXvb0SN)HeijB!AkA0-V^qQzRX~FCKM+8z
z09nNdv4MdRY85|7+(7`saS(yh5>Q$ON-IEVB?yh=1Snu(0-Fu7m^CjkH?<h*K31q8
zb7o!%Og=3qKM}@ZD=sO@%u9#zSd#MdbD(VY<ow)%oYV?*)01-&i(#h21i4ZYOA__c
lihvfQDd#N8FV}-P2o&H<AfE#R<qweh2LKaNiRPpM002U$S7ZPH

literal 0
HcmV?d00001

diff --git a/rdata/tests/test_rdata.py b/rdata/tests/test_rdata.py
index 8636f87..dfe03d8 100644
--- a/rdata/tests/test_rdata.py
+++ b/rdata/tests/test_rdata.py
@@ -557,6 +557,25 @@ def test_dataframe_dtypes(self) -> None:
         )
         pd.testing.assert_frame_equal(data, ref)
 
+    def test_dataframe_dtypes_with_na(self) -> None:
+        """Test dataframe conversion."""
+        # File created in R with
+        # df = data.frame(int=c(10L, 20L, 30L, NA), float=c(1.1, 2.2, 3.3, NA), string=c("x", "y", "z", NA), bool=as.logical(c(1, 0, 1, NA)), complex=c(4+5i, 6+7i, 8+9i, NA)); saveRDS(df, file="test_dataframe_dtypes_with_na.rds")
+        data = rdata.read_rds(TESTDATA_PATH / "test_dataframe_dtypes_with_na.rds")
+
+        index = pd.RangeIndex(1, 5)
+        ref = pd.DataFrame(
+            {
+                "int": pd.Series([10, 20, 30, None], dtype=pd.Int32Dtype(), index=index),
+                "float": pd.Series([1.1, 2.2, 3.3, None], dtype=pd.Float64Dtype(), index=index),
+                "string": pd.Series(["x" ,"y", "z", None], dtype=pd.StringDtype(), index=index),
+                "bool": pd.Series([True, False, True, None], dtype=pd.BooleanDtype(), index=index),
+                "complex": pd.Series([4+5j, 6+7j, 8+9j, None], dtype=complex, index=index),
+            },
+            index=index,
+        )
+        pd.testing.assert_frame_equal(data, ref)
+
     def test_ts(self) -> None:
         """Test time series conversion."""
         data = rdata.read_rda(TESTDATA_PATH / "test_ts.rda")

From 993e2ed0f07a84aaf4576af24a4b439593cf9f29 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Thu, 12 Sep 2024 18:02:36 +0300
Subject: [PATCH 034/100] Add dataframe column transformation for more types

---
 rdata/conversion/_conversion.py | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/rdata/conversion/_conversion.py b/rdata/conversion/_conversion.py
index efbc41c..24c5022 100644
--- a/rdata/conversion/_conversion.py
+++ b/rdata/conversion/_conversion.py
@@ -445,14 +445,26 @@ def convert_altrep_to_range(
 def _dataframe_column_transform(source: Any) -> Any:  # noqa: ANN401
 
     if isinstance(source, np.ndarray):
+        dtype: Any
         if np.issubdtype(source.dtype, np.integer):
-            return pd.Series(source, dtype=pd.Int32Dtype()).array
-
-        if np.issubdtype(source.dtype, np.bool_):
-            return pd.Series(source, dtype=pd.BooleanDtype()).array
+            dtype = pd.Int32Dtype()
+        elif np.issubdtype(source.dtype, np.floating):
+            dtype = pd.Float64Dtype()
+        elif np.issubdtype(source.dtype, np.complexfloating):
+            # There seems to be no pandas type for complex array
+            return source
+        elif np.issubdtype(source.dtype, np.bool_):
+            dtype = pd.BooleanDtype()
+        elif np.issubdtype(source.dtype, np.str_):
+            dtype = pd.StringDtype()
+        elif np.issubdtype(source.dtype, np.object_):
+            for value in source:
+                assert isinstance(value, str) or value is None
+            dtype = pd.StringDtype()
+        else:
+            return source
 
-        if np.issubdtype(source.dtype, np.str_):
-            return pd.Series(source, dtype=pd.StringDtype()).array
+        return pd.Series(source, dtype=dtype).array
 
     return source
 

From dc1950df66b485df1df2f4aaa9d2bf4affb86366 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Thu, 12 Sep 2024 18:05:06 +0300
Subject: [PATCH 035/100] Fix NA values in dataframes

---
 rdata/conversion/to_r.py | 107 +++++++++++++++++++++++++--------------
 rdata/parser/__init__.py |   1 +
 rdata/parser/_parser.py  |   4 ++
 3 files changed, 74 insertions(+), 38 deletions(-)

diff --git a/rdata/conversion/to_r.py b/rdata/conversion/to_r.py
index efc1f48..372c749 100644
--- a/rdata/conversion/to_r.py
+++ b/rdata/conversion/to_r.py
@@ -10,6 +10,7 @@
 import pandas as pd
 
 from rdata.parser import (
+    R_FLOAT_NA,
     R_INT_NA,
     CharFlags,
     RData,
@@ -55,23 +56,61 @@ def __call__(self, data: Any) -> RObject: # noqa: ANN401
 R_MINIMUM_VERSION_WITH_ALTREP: Final[int] = 3
 
 
-def create_unicode_array(
-        names: Any,  # noqa: ANN401
+def convert_pd_array_to_np_array(
+        pd_array: Any,
 ) -> npt.NDArray[Any]:
     """
-    Create unicode array from sequence/iterator of strings.
+    Convert pandas array object to numpy array.
 
     Args:
-        names: Strings.
+        pd_array: Pandas array.
 
     Returns:
-        Array.
+        Numpy array.
     """
-    name_list = []
-    for name in names:
-        assert isinstance(name, str)
-        name_list.append(name)
-    return np.array(name_list, dtype=np.dtype("U"))
+    if isinstance(pd_array, pd.arrays.StringArray):
+        return pd_array.to_numpy()
+    elif isinstance(pd_array, (
+        pd.arrays.BooleanArray,
+        pd.arrays.IntegerArray,
+        pd.arrays.FloatingArray,
+    )):
+        if isinstance(pd_array, pd.arrays.BooleanArray):
+            dtype = np.bool_
+            fill_value = True
+        elif isinstance(pd_array, pd.arrays.IntegerArray):
+            dtype = np.int32
+            fill_value = R_INT_NA
+        elif isinstance(pd_array, pd.arrays.FloatingArray):
+            dtype = np.float64
+            fill_value = R_FLOAT_NA
+
+        mask = pd_array.isna()
+        if np.any(mask):
+            data = np.empty(pd_array.shape, dtype=dtype)
+            data[~mask] = pd_array[~mask].to_numpy()
+            data[mask] = fill_value
+            if isinstance(pd_array, pd.arrays.FloatingArray):
+                array = data
+            else:
+                array = np.ma.array(
+                    data=data,
+                    mask=mask,
+                    fill_value=fill_value,
+                )
+        else:
+            array = pd_array.to_numpy()
+        assert array.dtype == dtype
+        return array
+
+    elif isinstance(pd_array, (
+        pd.arrays.NumpyExtensionArray,  # type: ignore [attr-defined]
+    )):
+        array = pd_array.to_numpy()
+        return array
+
+    msg = f"pandas array {type(array)} not implemented"
+    raise NotImplementedError(msg)
 
 
 def build_r_object(
@@ -332,17 +371,23 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
             r_value = [self.convert_to_r_object(el) for el in values]
 
             if isinstance(data, dict):
-                names = create_unicode_array(data.keys())
+                names = np.array(list(data.keys()), dtype=np.dtype("U"))
                 attributes = self.build_r_list({"names": names})
 
         elif isinstance(data, np.ndarray):
             if data.dtype.kind in ["O"]:
-                # This is a special case handling only np.array([None])
-                if data.size != 1 or data[0] is not None:
-                    msg = "general object array not implemented"
-                    raise NotImplementedError(msg)
+                assert data.ndim == 1
                 r_type = RObjectType.STR
-                r_value = [build_r_object(RObjectType.CHAR)]
+                r_value = []
+                for el in data:
+                    if el is None or pd.isna(el):
+                        r_el = build_r_object(RObjectType.CHAR)
+                    elif isinstance(el, str):
+                        r_el = self.convert_to_r_object(el.encode(self.encoding))
+                    else:
+                        msg = "general object array not implemented"
+                        raise NotImplementedError(msg)
+                    r_value.append(r_el)
 
             elif data.dtype.kind in ["S"]:
                 assert data.ndim == 1
@@ -432,7 +477,7 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
             r_type = RObjectType.INT
             r_value = data.codes + 1
             attributes = self.build_r_list({
-                "levels": create_unicode_array(data.categories),
+                "levels": data.categories.to_numpy(),
                 "class": "factor",
                 })
 
@@ -445,26 +490,12 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
                 assert isinstance(column, str)
                 column_names.append(column)
 
-                array = series.array
-                if isinstance(array, pd.Categorical):
-                    r_series = self.convert_to_r_object(array)
-                elif isinstance(array, pd.arrays.StringArray):
-                    r_series = self.convert_to_r_object(create_unicode_array(array))
-                elif isinstance(array, (
-                    pd.arrays.BooleanArray,
-                    pd.arrays.IntegerArray,
-                    pd.arrays.FloatingArray,
-                    pd.arrays.NumpyExtensionArray,  # type: ignore [attr-defined]
-                )):
-                    np_array = array.to_numpy()
-                    if np_array.dtype.kind == "O":
-                        r_series = self.convert_to_r_object(create_unicode_array(array))
-                    else:
-                        r_series = self.convert_to_r_object(array.to_numpy())
+                pd_array = series.array
+                if isinstance(pd_array, pd.Categorical):
+                    array = pd_array
                 else:
-                    msg = f"pd.DataFrame with pd.Series {type(array)} not implemented"
-                    raise NotImplementedError(msg)
-
+                    array = convert_pd_array_to_np_array(pd_array)
+                r_series = self.convert_to_r_object(array)
                 r_value.append(r_series)
 
             index = data.index
@@ -483,7 +514,7 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
                     row_names = range(index.start, index.stop, index.step)
             elif isinstance(index, pd.Index):
                 if index.dtype == "object":
-                    row_names = create_unicode_array(index)
+                    row_names = index.to_numpy()
                 elif np.issubdtype(str(index.dtype), np.integer):
                     row_names = index.to_numpy()
                 else:
@@ -494,7 +525,7 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
                 raise NotImplementedError(msg)
 
             attr_dict = {
-                "names": create_unicode_array(column_names),
+                "names": np.array(column_names, dtype=np.dtype("U")),
                 "row.names": row_names,
                 "class": "data.frame",
             }
diff --git a/rdata/parser/__init__.py b/rdata/parser/__init__.py
index d62b6e9..683d039 100644
--- a/rdata/parser/__init__.py
+++ b/rdata/parser/__init__.py
@@ -2,6 +2,7 @@
 
 from ._parser import (
     DEFAULT_ALTREP_MAP as DEFAULT_ALTREP_MAP,
+    R_FLOAT_NA as R_FLOAT_NA,
     R_INT_NA as R_INT_NA,
     CharFlags as CharFlags,
     RData as RData,
diff --git a/rdata/parser/_parser.py b/rdata/parser/_parser.py
index 6b9808f..e88fc42 100644
--- a/rdata/parser/_parser.py
+++ b/rdata/parser/_parser.py
@@ -31,6 +31,10 @@
 #: Value used to represent a missing integer in R.
 R_INT_NA: Final = -2**31
 
+#: Value used to represent a missing float in R.
+#  This is a NaN with a particular payload, but it's not the same as np.nan.
+R_FLOAT_NA: Final = np.frombuffer(b"\x7f\xf0\x00\x00\x00\x00\x07\xa2", dtype=">f8").astype("=f8")[0]  # noqa: E501
+
 
 @runtime_checkable
 class BinaryFileLike(Protocol):

From 38c80d73685738ffc3c9be08f22bed7b86c3fca7 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Thu, 12 Sep 2024 18:19:03 +0300
Subject: [PATCH 036/100] Fix dataframe attribute order

---
 rdata/tests/test_write.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/rdata/tests/test_write.py b/rdata/tests/test_write.py
index fe67aef..86cca4d 100644
--- a/rdata/tests/test_write.py
+++ b/rdata/tests/test_write.py
@@ -140,6 +140,7 @@ def test_convert_to_r(fname: str, expand_altrep: bool) -> None:  # noqa: FBT001
             "test_dataframe_int_rownames.rds",
             "test_dataframe_range_rownames.rds",
             "test_dataframe_rownames.rda",
+            "test_dataframe_dtypes_with_na.rds",
         ]:
             converter.df_attr_order = ["names", "class", "row.names"]
 

From b622c9cd48d9a724786a6c65cb84b3ac4b3d6ee3 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Thu, 12 Sep 2024 18:28:58 +0300
Subject: [PATCH 037/100] Add NA floats to ascii parser

---
 rdata/parser/_ascii.py | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/rdata/parser/_ascii.py b/rdata/parser/_ascii.py
index 15f59a7..976d4df 100644
--- a/rdata/parser/_ascii.py
+++ b/rdata/parser/_ascii.py
@@ -6,7 +6,15 @@
 import numpy as np
 import numpy.typing as npt
 
-from ._parser import R_INT_NA, AltRepConstructorMap, Parser
+from ._parser import R_FLOAT_NA, R_INT_NA, AltRepConstructorMap, Parser
+
+
+def map_int_na(line: str) -> int:
+    return R_INT_NA if line == "NA" else int(line)
+
+
+def map_float_na(line: str) -> float:
+    return R_FLOAT_NA if line == "NA" else float(line)
 
 
 class ParserASCII(Parser):
@@ -42,14 +50,16 @@ def _parse_array_values(
             line = self._readline()
 
             if np.issubdtype(dtype, np.integer):
-                value = R_INT_NA if line == "NA" else int(line)
+                value = map_int_na(line)
 
             elif np.issubdtype(dtype, np.floating):
-                value = float(line)
+                value = map_float_na(line)
 
             elif np.issubdtype(dtype, np.complexfloating):
+                value1 = map_float_na(line)
                 line2 = self._readline()
-                value = complex(float(line), float(line2))
+                value2 = map_float_na(line2)
+                value = complex(value1, value2)
 
             else:
                 msg = f"Unknown dtype: {dtype}"

From c2728e281c5fdf82d9a01eb63df1111f3218c40d Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Thu, 12 Sep 2024 18:30:11 +0300
Subject: [PATCH 038/100] Add NA floats to ascii unparser

---
 rdata/unparser/_ascii.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/rdata/unparser/_ascii.py b/rdata/unparser/_ascii.py
index a20f8fc..a8bdc78 100644
--- a/rdata/unparser/_ascii.py
+++ b/rdata/unparser/_ascii.py
@@ -7,6 +7,8 @@
 
 import numpy as np
 
+from rdata.parser import R_FLOAT_NA
+
 from ._unparser import Unparser
 
 if TYPE_CHECKING:
@@ -15,6 +17,11 @@
     import numpy.typing as npt
 
 
+def is_float_na(value: float) -> bool:
+    """Check if value is NA value."""
+    return np.array(value).tobytes() == np.array(R_FLOAT_NA).tobytes()
+
+
 class UnparserASCII(Unparser):
     """Unparser for files in ASCII format."""
 
@@ -35,7 +42,7 @@ def unparse_magic(self) -> None:
         """Unparse magic bits."""
         self._add_line("A")
 
-    def _unparse_array_values(self, array: npt.NDArray[Any]) -> None:
+    def _unparse_array_values(self, array: npt.NDArray[Any]) -> None:  # noqa: C901
         # Convert boolean to int
         if np.issubdtype(array.dtype, np.bool_):
             array = array.astype(np.int32)
@@ -51,7 +58,9 @@ def _unparse_array_values(self, array: npt.NDArray[Any]) -> None:
                 line = "NA" if value is None or np.ma.is_masked(value) else str(value)  # type: ignore [no-untyped-call]
 
             elif np.issubdtype(array.dtype, np.floating):
-                if np.isnan(value):
+                if is_float_na(value):
+                    line = "NA"
+                elif np.isnan(value):
                     line = "NaN"
                 elif value == np.inf:
                     line = "Inf"

From e27217ca838f6afad63f35d467192fc22aaf8819 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Thu, 12 Sep 2024 18:46:28 +0300
Subject: [PATCH 039/100] Fix ruff

---
 rdata/conversion/to_r.py  | 15 ++++++------
 rdata/tests/test_rdata.py | 50 +++++++++++++++++++++++++++++----------
 2 files changed, 44 insertions(+), 21 deletions(-)

diff --git a/rdata/conversion/to_r.py b/rdata/conversion/to_r.py
index 372c749..9de4417 100644
--- a/rdata/conversion/to_r.py
+++ b/rdata/conversion/to_r.py
@@ -57,7 +57,7 @@ def __call__(self, data: Any) -> RObject: # noqa: ANN401
 
 
 def convert_pd_array_to_np_array(
-        pd_array: Any,
+        pd_array: Any,  # noqa: ANN401
 ) -> npt.NDArray[Any]:
     """
     Convert pandas array object to numpy array.
@@ -70,7 +70,8 @@ def convert_pd_array_to_np_array(
     """
     if isinstance(pd_array, pd.arrays.StringArray):
         return pd_array.to_numpy()
-    elif isinstance(pd_array, (
+
+    if isinstance(pd_array, (
         pd.arrays.BooleanArray,
         pd.arrays.IntegerArray,
         pd.arrays.FloatingArray,
@@ -103,11 +104,10 @@ def convert_pd_array_to_np_array(
         assert array.dtype == dtype
         return array
 
-    elif isinstance(pd_array, (
+    if isinstance(pd_array, (
         pd.arrays.NumpyExtensionArray,  # type: ignore [attr-defined]
     )):
-        array = pd_array.to_numpy()
-        return array
+        return pd_array.to_numpy()
 
     msg = f"pandas array {type(array)} not implemented"
     raise NotImplementedError(msg)
@@ -513,9 +513,8 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
                 else:
                     row_names = range(index.start, index.stop, index.step)
             elif isinstance(index, pd.Index):
-                if index.dtype == "object":
-                    row_names = index.to_numpy()
-                elif np.issubdtype(str(index.dtype), np.integer):
+                if (index.dtype == "object"
+                    or np.issubdtype(str(index.dtype), np.integer)):
                     row_names = index.to_numpy()
                 else:
                     msg = f"pd.DataFrame pd.Index {index.dtype} not implemented"
diff --git a/rdata/tests/test_rdata.py b/rdata/tests/test_rdata.py
index dfe03d8..9e4d663 100644
--- a/rdata/tests/test_rdata.py
+++ b/rdata/tests/test_rdata.py
@@ -517,7 +517,9 @@ def test_dataframe_int_rownames(self) -> None:
         index = np.array([3, 6, 9], dtype=np.int32)
         ref = pd.DataFrame(
             {
-                "col1": pd.Series([10., 20., 30.], dtype=pd.Float64Dtype(), index=index),
+                "col1": pd.Series(
+                    [10., 20., 30.],
+                    dtype=pd.Float64Dtype(), index=index),
             },
             index=index,
         )
@@ -532,7 +534,9 @@ def test_dataframe_range_rownames(self) -> None:
         index = pd.RangeIndex(2, 5)
         ref = pd.DataFrame(
             {
-                "col1": pd.Series([10., 20., 30.], dtype=pd.Float64Dtype(), index=index),
+                "col1": pd.Series(
+                    [10., 20., 30.],
+                    dtype=pd.Float64Dtype(), index=index),
             },
             index=index,
         )
@@ -547,11 +551,21 @@ def test_dataframe_dtypes(self) -> None:
         index = pd.RangeIndex(1, 4)
         ref = pd.DataFrame(
             {
-                "int": pd.Series([10, 20, 30], dtype=pd.Int32Dtype(), index=index),
-                "float": pd.Series([1.1, 2.2, 3.3], dtype=pd.Float64Dtype(), index=index),
-                "string": pd.Series(["x" ,"y", "z"], dtype=pd.StringDtype(), index=index),
-                "bool": pd.Series([True, False, True], dtype=pd.BooleanDtype(), index=index),
-                "complex": pd.Series([4+5j, 6+7j, 8+9j], dtype=complex, index=index),
+                "int": pd.Series(
+                    [10, 20, 30],
+                    dtype=pd.Int32Dtype(), index=index),
+                "float": pd.Series(
+                    [1.1, 2.2, 3.3],
+                    dtype=pd.Float64Dtype(), index=index),
+                "string": pd.Series(
+                    ["x" ,"y", "z"],
+                    dtype=pd.StringDtype(), index=index),
+                "bool": pd.Series(
+                    [True, False, True],
+                    dtype=pd.BooleanDtype(), index=index),
+                "complex": pd.Series(
+                    [4+5j, 6+7j, 8+9j],
+                    dtype=complex, index=index),
             },
             index=index,
         )
@@ -560,17 +574,27 @@ def test_dataframe_dtypes(self) -> None:
     def test_dataframe_dtypes_with_na(self) -> None:
         """Test dataframe conversion."""
         # File created in R with
-        # df = data.frame(int=c(10L, 20L, 30L, NA), float=c(1.1, 2.2, 3.3, NA), string=c("x", "y", "z", NA), bool=as.logical(c(1, 0, 1, NA)), complex=c(4+5i, 6+7i, 8+9i, NA)); saveRDS(df, file="test_dataframe_dtypes_with_na.rds")
+        # df = data.frame(int=c(10L, 20L, 30L, NA), float=c(1.1, 2.2, 3.3, NA), string=c("x", "y", "z", NA), bool=as.logical(c(1, 0, 1, NA)), complex=c(4+5i, 6+7i, 8+9i, NA)); saveRDS(df, file="test_dataframe_dtypes_with_na.rds")  # noqa: E501
         data = rdata.read_rds(TESTDATA_PATH / "test_dataframe_dtypes_with_na.rds")
 
         index = pd.RangeIndex(1, 5)
         ref = pd.DataFrame(
             {
-                "int": pd.Series([10, 20, 30, None], dtype=pd.Int32Dtype(), index=index),
-                "float": pd.Series([1.1, 2.2, 3.3, None], dtype=pd.Float64Dtype(), index=index),
-                "string": pd.Series(["x" ,"y", "z", None], dtype=pd.StringDtype(), index=index),
-                "bool": pd.Series([True, False, True, None], dtype=pd.BooleanDtype(), index=index),
-                "complex": pd.Series([4+5j, 6+7j, 8+9j, None], dtype=complex, index=index),
+                "int": pd.Series(
+                    [10, 20, 30, None],
+                    dtype=pd.Int32Dtype(), index=index),
+                "float": pd.Series(
+                    [1.1, 2.2, 3.3, None],
+                    dtype=pd.Float64Dtype(), index=index),
+                "string": pd.Series(
+                    ["x" ,"y", "z", None],
+                    dtype=pd.StringDtype(), index=index),
+                "bool": pd.Series(
+                    [True, False, True, None],
+                    dtype=pd.BooleanDtype(), index=index),
+                "complex": pd.Series(
+                    [4+5j, 6+7j, 8+9j, None],
+                    dtype=complex, index=index),
             },
             index=index,
         )

From 9da1a4286e51031de1bb164ab5408dfed04e6193 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Fri, 13 Sep 2024 13:27:47 +0300
Subject: [PATCH 040/100] Define NA checker function close to the definition

---
 rdata/parser/__init__.py | 1 +
 rdata/parser/_parser.py  | 5 +++++
 rdata/unparser/_ascii.py | 7 +------
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/rdata/parser/__init__.py b/rdata/parser/__init__.py
index 683d039..98375fe 100644
--- a/rdata/parser/__init__.py
+++ b/rdata/parser/__init__.py
@@ -11,6 +11,7 @@
     RObjectInfo as RObjectInfo,
     RObjectType as RObjectType,
     RVersions as RVersions,
+    is_float_na as is_float_na,
     parse_data as parse_data,
     parse_file as parse_file,
 )
diff --git a/rdata/parser/_parser.py b/rdata/parser/_parser.py
index e88fc42..929f1ac 100644
--- a/rdata/parser/_parser.py
+++ b/rdata/parser/_parser.py
@@ -36,6 +36,11 @@
 R_FLOAT_NA: Final = np.frombuffer(b"\x7f\xf0\x00\x00\x00\x00\x07\xa2", dtype=">f8").astype("=f8")[0]  # noqa: E501
 
 
+def is_float_na(value: float) -> bool:
+    """Check if value is NA value."""
+    return np.array(value).tobytes() == np.array(R_FLOAT_NA).tobytes()
+
+
 @runtime_checkable
 class BinaryFileLike(Protocol):
     """Protocol for binary files."""
diff --git a/rdata/unparser/_ascii.py b/rdata/unparser/_ascii.py
index a8bdc78..4ea9863 100644
--- a/rdata/unparser/_ascii.py
+++ b/rdata/unparser/_ascii.py
@@ -7,7 +7,7 @@
 
 import numpy as np
 
-from rdata.parser import R_FLOAT_NA
+from rdata.parser import is_float_na
 
 from ._unparser import Unparser
 
@@ -17,11 +17,6 @@
     import numpy.typing as npt
 
 
-def is_float_na(value: float) -> bool:
-    """Check if value is NA value."""
-    return np.array(value).tobytes() == np.array(R_FLOAT_NA).tobytes()
-
-
 class UnparserASCII(Unparser):
     """Unparser for files in ASCII format."""
 

From 3dc01696a6c936b30ec8b79aa29bc0d3fa478298 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Fri, 13 Sep 2024 16:39:54 +0300
Subject: [PATCH 041/100] Fix mypy

---
 rdata/conversion/to_r.py | 15 +++++++++------
 rdata/parser/_parser.py  |  4 ++--
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/rdata/conversion/to_r.py b/rdata/conversion/to_r.py
index 9de4417..c95dd7c 100644
--- a/rdata/conversion/to_r.py
+++ b/rdata/conversion/to_r.py
@@ -74,15 +74,17 @@ def convert_pd_array_to_np_array(
     if isinstance(pd_array, (
         pd.arrays.BooleanArray,
         pd.arrays.IntegerArray,
-        pd.arrays.FloatingArray,
+        pd.arrays.FloatingArray,  # type: ignore [attr-defined]
     )):
+        dtype: type[Any]
+        fill_value: bool | int | float
         if isinstance(pd_array, pd.arrays.BooleanArray):
             dtype = np.bool_
             fill_value = True
         elif isinstance(pd_array, pd.arrays.IntegerArray):
             dtype = np.int32
             fill_value = R_INT_NA
-        elif isinstance(pd_array, pd.arrays.FloatingArray):
+        elif isinstance(pd_array, pd.arrays.FloatingArray):  # type: ignore [attr-defined]
             dtype = np.float64
             fill_value = R_FLOAT_NA
 
@@ -91,10 +93,10 @@ def convert_pd_array_to_np_array(
             data = np.empty(pd_array.shape, dtype=dtype)
             data[~mask] = pd_array[~mask].to_numpy()
             data[mask] = fill_value
-            if isinstance(pd_array, pd.arrays.FloatingArray):
+            if isinstance(pd_array, pd.arrays.FloatingArray):  # type: ignore [attr-defined]
                 array = data
             else:
-                array = np.ma.array(
+                array = np.ma.array(  # type: ignore [no-untyped-call]
                     data=data,
                     mask=mask,
                     fill_value=fill_value,
@@ -105,7 +107,7 @@ def convert_pd_array_to_np_array(
         return array
 
     if isinstance(pd_array, (
-        pd.arrays.NumpyExtensionArray,  # type: ignore [attr-defined]
+        pd.arrays.NumpyExtensionArray,
     )):
         return pd_array.to_numpy()
 
@@ -196,7 +198,7 @@ def __init__(self, *,
         # In test files the order in which dataframe attributes are written varies.
         # R can read files with attributes in any order, but this variable
         # is used in tests to change the attribute order to match with the test file.
-        self.df_attr_order = None
+        self.df_attr_order: list[str] | None = None
 
 
     def build_r_data(self,
@@ -491,6 +493,7 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
                 column_names.append(column)
 
                 pd_array = series.array
+                array: pd.Categorical | npt.NDArray[Any]
                 if isinstance(pd_array, pd.Categorical):
                     array = pd_array
                 else:
diff --git a/rdata/parser/_parser.py b/rdata/parser/_parser.py
index 929f1ac..aaa0123 100644
--- a/rdata/parser/_parser.py
+++ b/rdata/parser/_parser.py
@@ -29,11 +29,11 @@
 
 
 #: Value used to represent a missing integer in R.
-R_INT_NA: Final = -2**31
+R_INT_NA: Final[int] = -2**31
 
 #: Value used to represent a missing float in R.
 #  This is a NaN with a particular payload, but it's not the same as np.nan.
-R_FLOAT_NA: Final = np.frombuffer(b"\x7f\xf0\x00\x00\x00\x00\x07\xa2", dtype=">f8").astype("=f8")[0]  # noqa: E501
+R_FLOAT_NA: Final[float] = np.frombuffer(b"\x7f\xf0\x00\x00\x00\x00\x07\xa2", dtype=">f8").astype("=f8")[0]  # noqa: E501
 
 
 def is_float_na(value: float) -> bool:

From d4049ba14c516b81ca802d52f1f23ab79e4b0237 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Fri, 13 Sep 2024 16:47:05 +0300
Subject: [PATCH 042/100] Simplify reference lists

---
 rdata/conversion/to_r.py | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/rdata/conversion/to_r.py b/rdata/conversion/to_r.py
index c95dd7c..201de14 100644
--- a/rdata/conversion/to_r.py
+++ b/rdata/conversion/to_r.py
@@ -192,8 +192,8 @@ def __init__(self, *,
         self.encoding = encoding
         self.format_version = format_version
         self.r_version_serialized = r_version_serialized
-        self.reference_name_list: list[None | str] = [None]
-        self.reference_obj_list: list[None | RObject] = [None]
+        self._references: dict[str | None, tuple[int, RObject | None]] \
+            = {None: (0, None)}
 
         # In test files the order in which dataframe attributes are written varies.
         # R can read files with attributes in any order, but this variable
@@ -290,18 +290,16 @@ def build_r_sym(self,
             R object.
         """
         # Reference to existing symbol if exists
-        if name in self.reference_name_list:
-            idx = self.reference_name_list.index(name)
-            obj = self.reference_obj_list[idx]
-            return build_r_object(RObjectType.REF, reference=(idx, obj))
+        if name in self._references:
+            reference = self._references[name]
+            return build_r_object(RObjectType.REF, reference=reference)
 
         # Create a new symbol
         r_value = self.convert_to_r_object(name.encode(self.encoding))
         r_object = build_r_object(RObjectType.SYM, value=r_value)
 
         # Add to reference list
-        self.reference_name_list.append(name)
-        self.reference_obj_list.append(r_object)
+        self._references[name] = (len(self._references), r_object)
         return r_object
 
 

From cb3c487576135ee1dc750c9e7c9d7ba3eebb1f8a Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Fri, 13 Sep 2024 17:40:08 +0300
Subject: [PATCH 043/100] Simplify creation of R lists

---
 rdata/conversion/to_r.py | 119 ++++++++++++++++++---------------------
 1 file changed, 56 insertions(+), 63 deletions(-)

diff --git a/rdata/conversion/to_r.py b/rdata/conversion/to_r.py
index 201de14..6e0ec31 100644
--- a/rdata/conversion/to_r.py
+++ b/rdata/conversion/to_r.py
@@ -28,21 +28,13 @@
 
 if TYPE_CHECKING:
     from collections.abc import Mapping
-    from typing import Any, Final, Literal, Protocol
+    from typing import Any, Final, Literal
 
     import numpy.typing as npt
 
     Encoding = Literal["utf-8", "cp1252"]
 
 
-    class Converter(Protocol):
-        """Protocol for Py-to-R conversion."""
-
-        def __call__(self, data: Any) -> RObject: # noqa: ANN401
-            """Convert Python object to R object."""
-
-
-
 # Default values for RVersions object
 DEFAULT_FORMAT_VERSION: Final[int] = 3
 DEFAULT_R_VERSION_SERIALIZED: Final[int] = 0x40201
@@ -166,6 +158,34 @@ def build_r_object(
      )
 
 
+def build_r_list(
+        data: list[RObject] | list[tuple[RObject, RObject]],
+) -> RObject:
+    """
+    Build R object representing (named) linked list.
+
+    Args:
+        data: Non-empty list of values or (key, value) pairs.
+
+    Returns:
+        R object.
+    """
+    if len(data) == 0:
+        msg = "data must not be empty"
+        raise ValueError(msg)
+
+    head = data[0]
+    tail = data[1:]
+    if isinstance(head, tuple):
+        tag, car = head
+    else:
+        tag = None
+        car = head
+
+    cdr = build_r_object(RObjectType.NILVALUE) if len(tail) == 0 else build_r_list(tail)
+
+    return build_r_object(RObjectType.LIST, value=(car, cdr), tag=tag)
+
 
 class ConverterFromPythonToR:
     """
@@ -229,52 +249,23 @@ def build_r_data(self,
         return RData(versions, extra, r_object)
 
 
-    def build_r_list(self,
-            data: Mapping[str, Any] | list[Any],
-            *,
-            convert_value: Converter | None = None,
+    def convert_to_r_attributes(self,
+            data: dict[str, Any],
     ) -> RObject:
         """
-        Build R object representing named linked list.
+        Convert dictionary to R attributes list.
 
         Args:
-            data: Non-empty dictionary or list.
-            convert_value: Function used for converting value to R object
-                (for example, convert_to_r_object).
+            data: Non-empty dictionary.
 
         Returns:
             R object.
         """
-        if convert_value is None:
-            convert_value = self.convert_to_r_object
-
-        if len(data) == 0:
-            msg = "data must not be empty"
-            raise ValueError(msg)
-
-        if isinstance(data, dict):
-            data = data.copy()
-            key = next(iter(data))
-            tag = self.build_r_sym(key)
-            car = data.pop(key)
-        elif isinstance(data, list):
-            car = data[0]
-            data = data[1:]
-            tag = None
-
-        if not isinstance(car, RObject):
-            car = convert_value(car)
-
-        if len(data) == 0:
-            cdr = build_r_object(RObjectType.NILVALUE)
-        else:
-            cdr = self.build_r_list(data, convert_value=convert_value)
+        converted = []
+        for key, value in data.items():
+            converted.append((self.build_r_sym(key), self.convert_to_r_object(value)))
 
-        return build_r_object(
-            RObjectType.LIST,
-            value=(car, cdr),
-            tag=tag,
-            )
+        return build_r_list(converted)
 
 
     def build_r_sym(self,
@@ -321,7 +312,7 @@ def convert_to_r_object_for_rda(self,
         if not isinstance(data, dict):
             msg = f"for RDA file, data must be a dictionary, not type {type(data)}"
             raise TypeError(msg)
-        return self.build_r_list(data)
+        return self.convert_to_r_attributes(data)
 
 
     def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
@@ -341,7 +332,7 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
         values: list[Any] | tuple[Any, ...]
         r_value: Any = None
         is_object = False
-        attributes = None
+        attributes: dict[str, Any] | None = None
         tag = None
         gp = 0
 
@@ -354,14 +345,12 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
 
         elif isinstance(data, RLanguage):
             r_type = RObjectType.LANG
-            values = data.elements
-            r_value = (self.build_r_sym(str(values[0])),
-                       self.build_r_list(values[1:],
-                                         convert_value=self.build_r_sym))  # type: ignore [arg-type]
+            symbols = [self.build_r_sym(el) for el in data.elements]
+            r_value = (symbols[0], build_r_list(symbols[1:]))
 
             if len(data.attributes) > 0:
                 # The following might work here (untested)
-                # attributes = build_r_list(data.attributes)  # noqa: ERA001
+                # attributes = data.attributes  # noqa: ERA001
                 msg = f"type {r_type} with attributes not implemented"
                 raise NotImplementedError(msg)
 
@@ -372,7 +361,7 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
 
             if isinstance(data, dict):
                 names = np.array(list(data.keys()), dtype=np.dtype("U"))
-                attributes = self.build_r_list({"names": names})
+                attributes = {"names": names}
 
         elif isinstance(data, np.ndarray):
             if data.dtype.kind in ["O"]:
@@ -415,7 +404,7 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
                 else:
                     # R uses column-major order like Fortran
                     r_value = np.ravel(data, order="F")
-                    attributes = self.build_r_list({"dim": np.array(data.shape)})
+                    attributes = {"dim": np.array(data.shape)}
 
         elif isinstance(data, (bool, int, float, complex)):
             return self.convert_to_r_object(np.array(data))
@@ -455,10 +444,10 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
 
             r_type = RObjectType.ALTREP
             r_value = (
-                self.build_r_list([
+                build_r_list([
                     self.build_r_sym("compact_intseq"),
                     self.build_r_sym("base"),
-                    RObjectType.INT.value,
+                    self.convert_to_r_object(RObjectType.INT.value),
                 ]),
                 self.convert_to_r_object(np.array([
                     len(data),
@@ -476,10 +465,10 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
             is_object = True
             r_type = RObjectType.INT
             r_value = data.codes + 1
-            attributes = self.build_r_list({
+            attributes = {
                 "levels": data.categories.to_numpy(),
                 "class": "factor",
-                })
+            }
 
         elif isinstance(data, pd.DataFrame):
             is_object = True
@@ -524,20 +513,24 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
                 msg = f"pd.DataFrame index {type(index)} not implemented"
                 raise NotImplementedError(msg)
 
-            attr_dict = {
+            attributes = {
                 "names": np.array(column_names, dtype=np.dtype("U")),
                 "row.names": row_names,
                 "class": "data.frame",
             }
             if self.df_attr_order is not None:
-                attr_dict = {k: attr_dict[k] for k in self.df_attr_order}
-            attributes = self.build_r_list(attr_dict)
+                attributes = {k: attributes[k] for k in self.df_attr_order}
 
         else:
             msg = f"type {type(data)} not implemented"
             raise NotImplementedError(msg)
 
+        if attributes is not None:
+            r_attributes = self.convert_to_r_attributes(attributes)
+        else:
+            r_attributes = None
+
         return build_r_object(r_type, value=r_value,
                               is_object=is_object,
-                              attributes=attributes,
+                              attributes=r_attributes,
                               tag=tag, gp=gp)

From 76bdc83091a7b063acffc1d3273ab59fc7458a4e Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Fri, 13 Sep 2024 17:57:55 +0300
Subject: [PATCH 044/100] Rename build_r_sym() to convert_to_r_sym()

---
 rdata/conversion/to_r.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/rdata/conversion/to_r.py b/rdata/conversion/to_r.py
index 6e0ec31..f47f132 100644
--- a/rdata/conversion/to_r.py
+++ b/rdata/conversion/to_r.py
@@ -263,16 +263,19 @@ def convert_to_r_attributes(self,
         """
         converted = []
         for key, value in data.items():
-            converted.append((self.build_r_sym(key), self.convert_to_r_object(value)))
+            converted.append((
+                self.convert_to_r_sym(key),
+                self.convert_to_r_object(value),
+            ))
 
         return build_r_list(converted)
 
 
-    def build_r_sym(self,
+    def convert_to_r_sym(self,
             name: str,
     ) -> RObject:
         """
-        Build R object representing symbol.
+        Convert string to R symbol.
 
         Args:
             name: String.
@@ -345,7 +348,7 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
 
         elif isinstance(data, RLanguage):
             r_type = RObjectType.LANG
-            symbols = [self.build_r_sym(el) for el in data.elements]
+            symbols = [self.convert_to_r_sym(el) for el in data.elements]
             r_value = (symbols[0], build_r_list(symbols[1:]))
 
             if len(data.attributes) > 0:
@@ -445,8 +448,8 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
             r_type = RObjectType.ALTREP
             r_value = (
                 build_r_list([
-                    self.build_r_sym("compact_intseq"),
-                    self.build_r_sym("base"),
+                    self.convert_to_r_sym("compact_intseq"),
+                    self.convert_to_r_sym("base"),
                     self.convert_to_r_object(RObjectType.INT.value),
                 ]),
                 self.convert_to_r_object(np.array([

From 87194d4674e087b0b62172d65d92824a7423f2f2 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Fri, 13 Sep 2024 17:58:52 +0300
Subject: [PATCH 045/100] Simplify creation of RData object

---
 rdata/_write.py           |  6 ++----
 rdata/conversion/to_r.py  | 42 ++++++++++++++++-----------------------
 rdata/tests/test_write.py | 17 +++++-----------
 3 files changed, 24 insertions(+), 41 deletions(-)

diff --git a/rdata/_write.py b/rdata/_write.py
index a1fd162..c56e274 100644
--- a/rdata/_write.py
+++ b/rdata/_write.py
@@ -53,8 +53,7 @@ def write_rds(
         encoding=encoding,
         format_version=format_version,
     )
-    r_object = converter.convert_to_r_object(data)
-    r_data = converter.build_r_data(r_object)
+    r_data = converter.convert_to_r_data(data)
 
     unparse_file(
         path,
@@ -103,8 +102,7 @@ def write_rda(
         encoding=encoding,
         format_version=format_version,
     )
-    r_object = converter.convert_to_r_object_for_rda(data)
-    r_data = converter.build_r_data(r_object)
+    r_data = converter.convert_to_r_data(data, file_type="rda")
 
     unparse_file(
         path,
diff --git a/rdata/conversion/to_r.py b/rdata/conversion/to_r.py
index f47f132..785e311 100644
--- a/rdata/conversion/to_r.py
+++ b/rdata/conversion/to_r.py
@@ -32,6 +32,8 @@
 
     import numpy.typing as npt
 
+    from rdata.unparser import FileType
+
     Encoding = Literal["utf-8", "cp1252"]
 
 
@@ -221,14 +223,17 @@ def __init__(self, *,
         self.df_attr_order: list[str] | None = None
 
 
-    def build_r_data(self,
-            r_object: RObject,
+    def convert_to_r_data(self,
+            data: Any,  # noqa: ANN401
+            *,
+            file_type: FileType = "rds",
     ) -> RData:
         """
-        Build RData object from R object.
+        Convert Python data to R data.
 
         Args:
-            r_object: R object.
+            data: Any Python object.
+            file_type: File type.
 
         Returns:
             Corresponding RData object.
@@ -236,6 +241,14 @@ def build_r_data(self,
         See Also:
             convert_to_r_object
         """
+        if file_type == "rda":
+            if not isinstance(data, dict):
+                msg = f"for RDA file, data must be a dictionary, not type {type(data)}"
+                raise TypeError(msg)
+            r_object = self.convert_to_r_attributes(data)
+        else:
+            r_object = self.convert_to_r_object(data)
+
         versions = RVersions(
             self.format_version,
             self.r_version_serialized,
@@ -297,27 +310,6 @@ def convert_to_r_sym(self,
         return r_object
 
 
-    def convert_to_r_object_for_rda(self,
-            data: Mapping[str, Any],
-    ) -> RObject:
-        """
-        Convert Python dictionary to R object for RDA file.
-
-        Args:
-            data: Python dictionary with data and variable names.
-
-        Returns:
-            Corresponding R object.
-
-        See Also:
-            convert_to_r_object
-        """
-        if not isinstance(data, dict):
-            msg = f"for RDA file, data must be a dictionary, not type {type(data)}"
-            raise TypeError(msg)
-        return self.convert_to_r_attributes(data)
-
-
     def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
             data: Any,  # noqa: ANN401
     ) -> RObject:
diff --git a/rdata/tests/test_write.py b/rdata/tests/test_write.py
index 86cca4d..68ea003 100644
--- a/rdata/tests/test_write.py
+++ b/rdata/tests/test_write.py
@@ -145,15 +145,10 @@ def test_convert_to_r(fname: str, expand_altrep: bool) -> None:  # noqa: FBT001
             converter.df_attr_order = ["names", "class", "row.names"]
 
         try:
-            if file_type == "rds":
-                r_obj = converter.convert_to_r_object(py_data)
-            else:
-                r_obj = converter.convert_to_r_object_for_rda(py_data)
+            new_r_data = converter.convert_to_r_data(py_data, file_type=file_type)
         except NotImplementedError as e:
             pytest.xfail(str(e))
 
-        new_r_data = converter.build_r_data(r_obj)
-
         assert str(r_data) == str(new_r_data)
         assert r_data == new_r_data
 
@@ -175,7 +170,7 @@ def test_convert_to_r_bad_rda() -> None:
     py_data = "hello"
     converter = ConverterFromPythonToR()
     with pytest.raises(TypeError, match="(?i)data must be a dictionary"):
-        converter.convert_to_r_object_for_rda(py_data)  # type: ignore [arg-type]
+        converter.convert_to_r_data(py_data, file_type="rda")
 
 
 def test_convert_to_r_empty_rda() -> None:
@@ -183,15 +178,14 @@ def test_convert_to_r_empty_rda() -> None:
     py_data: dict[str, Any] = {}
     converter = ConverterFromPythonToR()
     with pytest.raises(ValueError, match="(?i)data must not be empty"):
-        converter.convert_to_r_object_for_rda(py_data)
+        converter.convert_to_r_data(py_data, file_type="rda")
 
 
 def test_unparse_bad_rda() -> None:
     """Test checking that data for RDA has variable names."""
     py_data = "hello"
     converter = ConverterFromPythonToR()
-    r_obj = converter.convert_to_r_object(py_data)
-    r_data = converter.build_r_data(r_obj)
+    r_data = converter.convert_to_r_data(py_data)
     with pytest.raises(ValueError, match="(?i)must be dictionary-like"):
         unparse_data(r_data, file_type="rda")
 
@@ -214,8 +208,7 @@ def test_unparse_big_int() -> None:
     """Test checking too large integers."""
     big_int = 2**32
     converter = ConverterFromPythonToR()
-    r_obj = converter.convert_to_r_object(big_int)
-    r_data = converter.build_r_data(r_obj)
+    r_data = converter.convert_to_r_data(big_int)
     with pytest.raises(ValueError, match="(?i)not castable"):
         unparse_data(r_data, file_format="xdr")
 

From d9c3be58eb9c6b015b5704a741df7f5958c358ad Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Fri, 13 Sep 2024 18:14:08 +0300
Subject: [PATCH 046/100] Add helper functions for conversion

---
 rdata/_write.py              | 11 ++++---
 rdata/conversion/__init__.py |  2 ++
 rdata/conversion/to_r.py     | 60 ++++++++++++++++++++++++++++++++++++
 3 files changed, 68 insertions(+), 5 deletions(-)

diff --git a/rdata/_write.py b/rdata/_write.py
index c56e274..b1a42e1 100644
--- a/rdata/_write.py
+++ b/rdata/_write.py
@@ -3,7 +3,7 @@
 
 from typing import TYPE_CHECKING
 
-from .conversion import ConverterFromPythonToR
+from .conversion import convert_python_to_r_data
 from .conversion.to_r import DEFAULT_FORMAT_VERSION
 from .unparser import unparse_file
 
@@ -49,11 +49,11 @@ def write_rds(
         >>> data = ["hello", 1, 2.2, 3.3+4.4j]
         >>> rdata.write_rds("test.rds", data)
     """
-    converter = ConverterFromPythonToR(
+    r_data = convert_python_to_r_data(
+        data,
         encoding=encoding,
         format_version=format_version,
     )
-    r_data = converter.convert_to_r_data(data)
 
     unparse_file(
         path,
@@ -98,11 +98,12 @@ def write_rda(
         >>> data = {"name": "hello", "values": [1, 2.2, 3.3+4.4j]}
         >>> rdata.write_rda("test.rda", data)
     """
-    converter = ConverterFromPythonToR(
+    r_data = convert_python_to_r_data(
+        data,
         encoding=encoding,
         format_version=format_version,
+        file_type="rda",
     )
-    r_data = converter.convert_to_r_data(data, file_type="rda")
 
     unparse_file(
         path,
diff --git a/rdata/conversion/__init__.py b/rdata/conversion/__init__.py
index e802758..55506b3 100644
--- a/rdata/conversion/__init__.py
+++ b/rdata/conversion/__init__.py
@@ -26,4 +26,6 @@
 )
 from .to_r import (
     ConverterFromPythonToR as ConverterFromPythonToR,
+    convert_python_to_r_data as convert_python_to_r_data,
+    convert_python_to_r_object as convert_python_to_r_object,
 )
diff --git a/rdata/conversion/to_r.py b/rdata/conversion/to_r.py
index 785e311..7ac4591 100644
--- a/rdata/conversion/to_r.py
+++ b/rdata/conversion/to_r.py
@@ -529,3 +529,63 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
                               is_object=is_object,
                               attributes=r_attributes,
                               tag=tag, gp=gp)
+
+
+def convert_python_to_r_data(
+        data: Any,  # noqa: ANN401
+        *,
+        encoding: Encoding = "utf-8",
+        format_version: int = DEFAULT_FORMAT_VERSION,
+        r_version_serialized: int = DEFAULT_R_VERSION_SERIALIZED,
+        file_type: FileType = "rds",
+) -> RData:
+    """
+    Convert Python data to R data.
+
+    Args:
+        data: Any Python object.
+        encoding: Encoding to be used for strings within data.
+        format_version: File format version.
+        r_version_serialized: R version written as the creator of the object.
+        file_type: File type.
+
+    Returns:
+        Corresponding RData object.
+
+    See Also:
+        convert_python_to_r_object
+    """
+    return ConverterFromPythonToR(
+            encoding=encoding,
+            format_version=format_version,
+            r_version_serialized=r_version_serialized,
+    ).convert_to_r_data(data, file_type=file_type)
+
+
+def convert_python_to_r_object(
+        data: Any,  # noqa: ANN401
+        *,
+        encoding: Encoding = "utf-8",
+        format_version: int = DEFAULT_FORMAT_VERSION,
+        r_version_serialized: int = DEFAULT_R_VERSION_SERIALIZED,
+) -> RObject:
+    """
+    Convert Python data to R object.
+
+    Args:
+        data: Any Python object.
+        encoding: Encoding to be used for strings within data.
+        format_version: File format version.
+        r_version_serialized: R version written as the creator of the object.
+
+    Returns:
+        Corresponding RObject object.
+
+    See Also:
+        convert_python_to_r_data
+    """
+    return ConverterFromPythonToR(
+            encoding=encoding,
+            format_version=format_version,
+            r_version_serialized=r_version_serialized,
+    ).convert_to_r_object(data)

From 22fe43d9e057139bdf035284c29a9746735e61aa Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Mon, 16 Sep 2024 08:19:24 +0300
Subject: [PATCH 047/100] Clarify NA values

---
 rdata/tests/test_rdata.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/rdata/tests/test_rdata.py b/rdata/tests/test_rdata.py
index 9e4d663..579ab4f 100644
--- a/rdata/tests/test_rdata.py
+++ b/rdata/tests/test_rdata.py
@@ -581,19 +581,19 @@ def test_dataframe_dtypes_with_na(self) -> None:
         ref = pd.DataFrame(
             {
                 "int": pd.Series(
-                    [10, 20, 30, None],
+                    [10, 20, 30, pd.NA],
                     dtype=pd.Int32Dtype(), index=index),
                 "float": pd.Series(
-                    [1.1, 2.2, 3.3, None],
+                    [1.1, 2.2, 3.3, pd.NA],
                     dtype=pd.Float64Dtype(), index=index),
                 "string": pd.Series(
-                    ["x" ,"y", "z", None],
+                    ["x" ,"y", "z", pd.NA],
                     dtype=pd.StringDtype(), index=index),
                 "bool": pd.Series(
-                    [True, False, True, None],
+                    [True, False, True, pd.NA],
                     dtype=pd.BooleanDtype(), index=index),
                 "complex": pd.Series(
-                    [4+5j, 6+7j, 8+9j, None],
+                    [4+5j, 6+7j, 8+9j, rdata.parser.R_FLOAT_NA],
                     dtype=complex, index=index),
             },
             index=index,

From 3bd285a52f301471c9d95ec24a1feda2ead43699 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Mon, 16 Sep 2024 08:19:43 +0300
Subject: [PATCH 048/100] Filter expected warnings

---
 rdata/tests/test_rdata.py | 5 ++++-
 rdata/tests/test_write.py | 7 +++++--
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/rdata/tests/test_rdata.py b/rdata/tests/test_rdata.py
index 579ab4f..41534ec 100644
--- a/rdata/tests/test_rdata.py
+++ b/rdata/tests/test_rdata.py
@@ -598,7 +598,10 @@ def test_dataframe_dtypes_with_na(self) -> None:
             },
             index=index,
         )
-        pd.testing.assert_frame_equal(data, ref)
+
+        with np.errstate(invalid="ignore"):
+            # Comparing complex arrays with R_FLOAT_NA gives warning
+            pd.testing.assert_frame_equal(data, ref)
 
     def test_ts(self) -> None:
         """Test time series conversion."""
diff --git a/rdata/tests/test_write.py b/rdata/tests/test_write.py
index 68ea003..6d2db28 100644
--- a/rdata/tests/test_write.py
+++ b/rdata/tests/test_write.py
@@ -85,7 +85,8 @@ def test_unparse(fname: str) -> None:
     with (TESTDATA_PATH / fname).open("rb") as f:
         data = decompress_data(f.read())
         file_type, file_format = parse_file_type_and_format(data)
-        r_data = rdata.parser.parse_data(data, expand_altrep=False)
+        r_data = rdata.parser.parse_data(
+            data, expand_altrep=False, extension=f".{file_type}")
 
         try:
             out_data = unparse_data(
@@ -99,6 +100,7 @@ def test_unparse(fname: str) -> None:
         assert data == out_data
 
 
+@pytest.mark.filterwarnings("ignore:Missing constructor")
 @pytest.mark.parametrize("fname", fnames, ids=fnames)
 @pytest.mark.parametrize("expand_altrep", [True, False])
 def test_convert_to_r(fname: str, expand_altrep: bool) -> None:  # noqa: FBT001
@@ -116,7 +118,8 @@ def test_convert_to_r(fname: str, expand_altrep: bool) -> None:  # noqa: FBT001
         data = decompress_data(f.read())
         file_type, file_format = parse_file_type_and_format(data)
 
-        r_data = rdata.parser.parse_data(data, expand_altrep=expand_altrep)
+        r_data = rdata.parser.parse_data(
+            data, expand_altrep=expand_altrep, extension=f".{file_type}")
 
         try:
             py_data = rdata.conversion.convert(r_data)

From 80a0c9be7d5b3f80427af65aeceec582982c1f79 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Mon, 16 Sep 2024 09:18:45 +0300
Subject: [PATCH 049/100] Add test for dataframe with NA and NaN floats

---
 .../data/test_dataframe_float_with_na_nan.rds  | Bin 0 -> 147 bytes
 rdata/tests/test_rdata.py                      |  17 +++++++++++++++++
 rdata/tests/test_write.py                      |   1 +
 3 files changed, 18 insertions(+)
 create mode 100644 rdata/tests/data/test_dataframe_float_with_na_nan.rds

diff --git a/rdata/tests/data/test_dataframe_float_with_na_nan.rds b/rdata/tests/data/test_dataframe_float_with_na_nan.rds
new file mode 100644
index 0000000000000000000000000000000000000000..fed00f4d1da15f2a404f9b2054696038d3559b71
GIT binary patch
literal 147
zcmb2|=3oE==I#ec2?+^l35kr8);Op!XJ>TGUdK8?o#%-GlcBu8=G*miIGHxfNGvo7
zmy)<r&+sxaKIDHhBacE2TigGDS&faYoH=~l%<3mT>^XAhN|J&zL!^POy8T9$*wiAw
zB*R@^>!uu;<g#~*(gMYdQ<mMIPJc~k`^;n*%zQ%h|Ns9iLH`-%rtdlU6zCQJ^?f<w

literal 0
HcmV?d00001

diff --git a/rdata/tests/test_rdata.py b/rdata/tests/test_rdata.py
index 41534ec..1a0f982 100644
--- a/rdata/tests/test_rdata.py
+++ b/rdata/tests/test_rdata.py
@@ -603,6 +603,23 @@ def test_dataframe_dtypes_with_na(self) -> None:
             # Comparing complex arrays with R_FLOAT_NA gives warning
             pd.testing.assert_frame_equal(data, ref)
 
+    def test_dataframe_float_with_na_nan(self) -> None:
+        """Test dataframe conversion."""
+        # File created in R with
+        # df = data.frame(float=c(1.1, 2.2, 3.3, NA, NaN, Inf, -Inf)); saveRDS(df, file="test_dataframe_float_with_na_nan.rds")  # noqa: E501,ERA001
+        data = rdata.read_rds(TESTDATA_PATH / "test_dataframe_float_with_na_nan.rds")
+
+        index = pd.RangeIndex(1, 8)
+        ref = pd.DataFrame(
+            {
+                "float": pd.Series(
+                    [1.1, 2.2, 3.3, rdata.parser.R_FLOAT_NA, np.nan, np.inf, -np.inf],
+                    dtype=float, index=index),
+            },
+            index=index,
+        )
+        pd.testing.assert_frame_equal(data, ref)
+
     def test_ts(self) -> None:
         """Test time series conversion."""
         data = rdata.read_rda(TESTDATA_PATH / "test_ts.rda")
diff --git a/rdata/tests/test_write.py b/rdata/tests/test_write.py
index 6d2db28..3e9b5ec 100644
--- a/rdata/tests/test_write.py
+++ b/rdata/tests/test_write.py
@@ -144,6 +144,7 @@ def test_convert_to_r(fname: str, expand_altrep: bool) -> None:  # noqa: FBT001
             "test_dataframe_range_rownames.rds",
             "test_dataframe_rownames.rda",
             "test_dataframe_dtypes_with_na.rds",
+            "test_dataframe_float_with_na_nan.rds",
         ]:
             converter.df_attr_order = ["names", "class", "row.names"]
 

From b73d4bd5e0652a9f8b17cef740060b021a853029 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Mon, 16 Sep 2024 09:20:25 +0300
Subject: [PATCH 050/100] Do not use pandas floating array

---
 rdata/conversion/_conversion.py |  8 ++++++-
 rdata/conversion/to_r.py        | 39 ++++++++++++++++++---------------
 rdata/tests/test_rdata.py       | 10 ++++-----
 3 files changed, 33 insertions(+), 24 deletions(-)

diff --git a/rdata/conversion/_conversion.py b/rdata/conversion/_conversion.py
index 24c5022..8e492e3 100644
--- a/rdata/conversion/_conversion.py
+++ b/rdata/conversion/_conversion.py
@@ -449,7 +449,13 @@ def _dataframe_column_transform(source: Any) -> Any:  # noqa: ANN401
         if np.issubdtype(source.dtype, np.integer):
             dtype = pd.Int32Dtype()
         elif np.issubdtype(source.dtype, np.floating):
-            dtype = pd.Float64Dtype()
+            # We return the numpy array here, which keeps
+            # R_FLOAT_NA, np.nan, and other NaNs as they were originally in the file.
+            # Users can then decide if they prefer to interpret
+            # only R_FLOAT_NA or all NaNs as "missing".
+            return source
+            # This would create an array with all NaNs as "missing":
+            # dtype = pd.Float64Dtype()  # noqa: ERA001
         elif np.issubdtype(source.dtype, np.complexfloating):
             # There seems to be no pandas type for complex array
             return source
diff --git a/rdata/conversion/to_r.py b/rdata/conversion/to_r.py
index 7ac4591..f09ba17 100644
--- a/rdata/conversion/to_r.py
+++ b/rdata/conversion/to_r.py
@@ -68,42 +68,45 @@ def convert_pd_array_to_np_array(
     if isinstance(pd_array, (
         pd.arrays.BooleanArray,
         pd.arrays.IntegerArray,
-        pd.arrays.FloatingArray,  # type: ignore [attr-defined]
     )):
         dtype: type[Any]
-        fill_value: bool | int | float
+        fill_value: bool | int
         if isinstance(pd_array, pd.arrays.BooleanArray):
             dtype = np.bool_
             fill_value = True
         elif isinstance(pd_array, pd.arrays.IntegerArray):
             dtype = np.int32
             fill_value = R_INT_NA
-        elif isinstance(pd_array, pd.arrays.FloatingArray):  # type: ignore [attr-defined]
-            dtype = np.float64
-            fill_value = R_FLOAT_NA
 
         mask = pd_array.isna()
         if np.any(mask):
-            data = np.empty(pd_array.shape, dtype=dtype)
-            data[~mask] = pd_array[~mask].to_numpy()
-            data[mask] = fill_value
-            if isinstance(pd_array, pd.arrays.FloatingArray):  # type: ignore [attr-defined]
-                array = data
-            else:
-                array = np.ma.array(  # type: ignore [no-untyped-call]
-                    data=data,
-                    mask=mask,
-                    fill_value=fill_value,
-                )
+            data = pd_array.to_numpy(dtype=dtype, na_value=fill_value)
+            array = np.ma.array(  # type: ignore [no-untyped-call]
+                data=data,
+                mask=mask,
+                fill_value=fill_value,
+            )
         else:
             array = pd_array.to_numpy()
         assert array.dtype == dtype
+        assert isinstance(array, np.ndarray)  # for mypy
         return array
 
     if isinstance(pd_array, (
-        pd.arrays.NumpyExtensionArray,
+        pd.arrays.FloatingArray,  # type: ignore [attr-defined]
     )):
-        return pd_array.to_numpy()
+        # Note that this possibly maps all NaNs (not only R_FLOAT_NA)
+        # to the same `na_value` depending on how the array was built:
+        array = pd_array.to_numpy(dtype=np.float64, na_value=R_FLOAT_NA)
+        assert isinstance(array, np.ndarray)  # for mypy
+        return array
+
+    if isinstance(pd_array, (
+        pd.arrays.NumpyExtensionArray,  # type: ignore [attr-defined]
+    )):
+        array = pd_array.to_numpy()
+        assert isinstance(array, np.ndarray)  # for mypy
+        return array
 
     msg = f"pandas array {type(array)} not implemented"
     raise NotImplementedError(msg)
diff --git a/rdata/tests/test_rdata.py b/rdata/tests/test_rdata.py
index 1a0f982..d3c0a89 100644
--- a/rdata/tests/test_rdata.py
+++ b/rdata/tests/test_rdata.py
@@ -519,7 +519,7 @@ def test_dataframe_int_rownames(self) -> None:
             {
                 "col1": pd.Series(
                     [10., 20., 30.],
-                    dtype=pd.Float64Dtype(), index=index),
+                    dtype=float, index=index),
             },
             index=index,
         )
@@ -536,7 +536,7 @@ def test_dataframe_range_rownames(self) -> None:
             {
                 "col1": pd.Series(
                     [10., 20., 30.],
-                    dtype=pd.Float64Dtype(), index=index),
+                    dtype=float, index=index),
             },
             index=index,
         )
@@ -556,7 +556,7 @@ def test_dataframe_dtypes(self) -> None:
                     dtype=pd.Int32Dtype(), index=index),
                 "float": pd.Series(
                     [1.1, 2.2, 3.3],
-                    dtype=pd.Float64Dtype(), index=index),
+                    dtype=float, index=index),
                 "string": pd.Series(
                     ["x" ,"y", "z"],
                     dtype=pd.StringDtype(), index=index),
@@ -584,8 +584,8 @@ def test_dataframe_dtypes_with_na(self) -> None:
                     [10, 20, 30, pd.NA],
                     dtype=pd.Int32Dtype(), index=index),
                 "float": pd.Series(
-                    [1.1, 2.2, 3.3, pd.NA],
-                    dtype=pd.Float64Dtype(), index=index),
+                    [1.1, 2.2, 3.3, rdata.parser.R_FLOAT_NA],
+                    dtype=float, index=index),
                 "string": pd.Series(
                     ["x" ,"y", "z", pd.NA],
                     dtype=pd.StringDtype(), index=index),

From e88419e636149c80b7b94aab76f317143df02890 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Mon, 16 Sep 2024 10:33:09 +0300
Subject: [PATCH 051/100] Remove unused R_INT_MIN

---
 rdata/conversion/_conversion.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/rdata/conversion/_conversion.py b/rdata/conversion/_conversion.py
index 8e492e3..57644c6 100644
--- a/rdata/conversion/_conversion.py
+++ b/rdata/conversion/_conversion.py
@@ -439,9 +439,6 @@ def convert_altrep_to_range(
     return range(start, stop + 1, step)
 
 
-R_INT_MIN = -2**31
-
-
 def _dataframe_column_transform(source: Any) -> Any:  # noqa: ANN401
 
     if isinstance(source, np.ndarray):

From 6bb5d5bc9d0a77fc7cb26799e7ad4c9aafa55fc3 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Mon, 16 Sep 2024 10:35:19 +0300
Subject: [PATCH 052/100] Change dataframe default attribute order

---
 rdata/conversion/to_r.py  |  2 +-
 rdata/tests/test_write.py | 12 +++++-------
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/rdata/conversion/to_r.py b/rdata/conversion/to_r.py
index f09ba17..f232316 100644
--- a/rdata/conversion/to_r.py
+++ b/rdata/conversion/to_r.py
@@ -513,8 +513,8 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
 
             attributes = {
                 "names": np.array(column_names, dtype=np.dtype("U")),
-                "row.names": row_names,
                 "class": "data.frame",
+                "row.names": row_names,
             }
             if self.df_attr_order is not None:
                 attributes = {k: attributes[k] for k in self.df_attr_order}
diff --git a/rdata/tests/test_write.py b/rdata/tests/test_write.py
index 3e9b5ec..edef9e7 100644
--- a/rdata/tests/test_write.py
+++ b/rdata/tests/test_write.py
@@ -139,14 +139,12 @@ def test_convert_to_r(fname: str, expand_altrep: bool) -> None:  # noqa: FBT001
             r_version_serialized=r_data.versions.serialized,
         )
         if fname in [
-            "test_dataframe_dtypes.rds",
-            "test_dataframe_int_rownames.rds",
-            "test_dataframe_range_rownames.rds",
-            "test_dataframe_rownames.rda",
-            "test_dataframe_dtypes_with_na.rds",
-            "test_dataframe_float_with_na_nan.rds",
+            "test_dataframe.rda",
+            "test_dataframe.rds",
+            "test_dataframe_v3.rda",
+            "test_dataframe_v3.rds",
         ]:
-            converter.df_attr_order = ["names", "class", "row.names"]
+            converter.df_attr_order = ["names", "row.names", "class"]
 
         try:
             new_r_data = converter.convert_to_r_data(py_data, file_type=file_type)

From fd813e2224886086364c989c1de47e6cd60a0ad9 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Mon, 16 Sep 2024 10:42:11 +0300
Subject: [PATCH 053/100] Move NA values and related functions to a new file

---
 rdata/conversion/to_r.py  |  3 +--
 rdata/missing.py          | 17 +++++++++++++++++
 rdata/parser/__init__.py  |  3 ---
 rdata/parser/_ascii.py    |  4 +++-
 rdata/parser/_parser.py   | 15 ++-------------
 rdata/tests/test_rdata.py |  7 ++++---
 rdata/unparser/_ascii.py  |  2 +-
 rdata/unparser/_xdr.py    |  2 +-
 8 files changed, 29 insertions(+), 24 deletions(-)
 create mode 100644 rdata/missing.py

diff --git a/rdata/conversion/to_r.py b/rdata/conversion/to_r.py
index f232316..a2add50 100644
--- a/rdata/conversion/to_r.py
+++ b/rdata/conversion/to_r.py
@@ -9,9 +9,8 @@
 import numpy as np
 import pandas as pd
 
+from rdata.missing import R_FLOAT_NA, R_INT_NA
 from rdata.parser import (
-    R_FLOAT_NA,
-    R_INT_NA,
     CharFlags,
     RData,
     RExtraInfo,
diff --git a/rdata/missing.py b/rdata/missing.py
new file mode 100644
index 0000000..0a931b4
--- /dev/null
+++ b/rdata/missing.py
@@ -0,0 +1,17 @@
+"""Utilities for missing (NA) values in R."""
+
+from typing import Final
+
+import numpy as np
+
+#: Value used to represent a missing integer in R.
+R_INT_NA: Final[int] = -2**31
+
+#: Value used to represent a missing float in R.
+#  This is a NaN with a particular payload, but it's not the same as np.nan.
+R_FLOAT_NA: Final[float] = np.frombuffer(b"\x7f\xf0\x00\x00\x00\x00\x07\xa2", dtype=">f8").astype("=f8")[0]  # noqa: E501
+
+
+def is_float_na(value: float) -> bool:
+    """Check if value is NA value."""
+    return np.array(value).tobytes() == np.array(R_FLOAT_NA).tobytes()
diff --git a/rdata/parser/__init__.py b/rdata/parser/__init__.py
index 98375fe..48421e6 100644
--- a/rdata/parser/__init__.py
+++ b/rdata/parser/__init__.py
@@ -2,8 +2,6 @@
 
 from ._parser import (
     DEFAULT_ALTREP_MAP as DEFAULT_ALTREP_MAP,
-    R_FLOAT_NA as R_FLOAT_NA,
-    R_INT_NA as R_INT_NA,
     CharFlags as CharFlags,
     RData as RData,
     RExtraInfo as RExtraInfo,
@@ -11,7 +9,6 @@
     RObjectInfo as RObjectInfo,
     RObjectType as RObjectType,
     RVersions as RVersions,
-    is_float_na as is_float_na,
     parse_data as parse_data,
     parse_file as parse_file,
 )
diff --git a/rdata/parser/_ascii.py b/rdata/parser/_ascii.py
index 976d4df..f37c9e5 100644
--- a/rdata/parser/_ascii.py
+++ b/rdata/parser/_ascii.py
@@ -6,7 +6,9 @@
 import numpy as np
 import numpy.typing as npt
 
-from ._parser import R_FLOAT_NA, R_INT_NA, AltRepConstructorMap, Parser
+from rdata.missing import R_FLOAT_NA, R_INT_NA
+
+from ._parser import AltRepConstructorMap, Parser
 
 
 def map_int_na(line: str) -> int:
diff --git a/rdata/parser/_parser.py b/rdata/parser/_parser.py
index aaa0123..b5e1570 100644
--- a/rdata/parser/_parser.py
+++ b/rdata/parser/_parser.py
@@ -23,24 +23,13 @@
 import numpy as np
 import numpy.typing as npt
 
+from rdata.missing import R_INT_NA
+
 if TYPE_CHECKING:
     from ._ascii import ParserASCII
     from ._xdr import ParserXDR
 
 
-#: Value used to represent a missing integer in R.
-R_INT_NA: Final[int] = -2**31
-
-#: Value used to represent a missing float in R.
-#  This is a NaN with a particular payload, but it's not the same as np.nan.
-R_FLOAT_NA: Final[float] = np.frombuffer(b"\x7f\xf0\x00\x00\x00\x00\x07\xa2", dtype=">f8").astype("=f8")[0]  # noqa: E501
-
-
-def is_float_na(value: float) -> bool:
-    """Check if value is NA value."""
-    return np.array(value).tobytes() == np.array(R_FLOAT_NA).tobytes()
-
-
 @runtime_checkable
 class BinaryFileLike(Protocol):
     """Protocol for binary files."""
diff --git a/rdata/tests/test_rdata.py b/rdata/tests/test_rdata.py
index d3c0a89..0a6ff54 100644
--- a/rdata/tests/test_rdata.py
+++ b/rdata/tests/test_rdata.py
@@ -13,6 +13,7 @@
 import xarray
 
 import rdata
+from rdata.missing import R_FLOAT_NA
 
 TESTDATA_PATH = rdata.TESTDATA_PATH
 
@@ -584,7 +585,7 @@ def test_dataframe_dtypes_with_na(self) -> None:
                     [10, 20, 30, pd.NA],
                     dtype=pd.Int32Dtype(), index=index),
                 "float": pd.Series(
-                    [1.1, 2.2, 3.3, rdata.parser.R_FLOAT_NA],
+                    [1.1, 2.2, 3.3, R_FLOAT_NA],
                     dtype=float, index=index),
                 "string": pd.Series(
                     ["x" ,"y", "z", pd.NA],
@@ -593,7 +594,7 @@ def test_dataframe_dtypes_with_na(self) -> None:
                     [True, False, True, pd.NA],
                     dtype=pd.BooleanDtype(), index=index),
                 "complex": pd.Series(
-                    [4+5j, 6+7j, 8+9j, rdata.parser.R_FLOAT_NA],
+                    [4+5j, 6+7j, 8+9j, R_FLOAT_NA],
                     dtype=complex, index=index),
             },
             index=index,
@@ -613,7 +614,7 @@ def test_dataframe_float_with_na_nan(self) -> None:
         ref = pd.DataFrame(
             {
                 "float": pd.Series(
-                    [1.1, 2.2, 3.3, rdata.parser.R_FLOAT_NA, np.nan, np.inf, -np.inf],
+                    [1.1, 2.2, 3.3, R_FLOAT_NA, np.nan, np.inf, -np.inf],
                     dtype=float, index=index),
             },
             index=index,
diff --git a/rdata/unparser/_ascii.py b/rdata/unparser/_ascii.py
index 4ea9863..2fbd376 100644
--- a/rdata/unparser/_ascii.py
+++ b/rdata/unparser/_ascii.py
@@ -7,7 +7,7 @@
 
 import numpy as np
 
-from rdata.parser import is_float_na
+from rdata.missing import is_float_na
 
 from ._unparser import Unparser
 
diff --git a/rdata/unparser/_xdr.py b/rdata/unparser/_xdr.py
index 8bea3f0..e2031e2 100644
--- a/rdata/unparser/_xdr.py
+++ b/rdata/unparser/_xdr.py
@@ -6,7 +6,7 @@
 
 import numpy as np
 
-from rdata.parser import R_INT_NA
+from rdata.missing import R_INT_NA
 
 from ._unparser import Unparser
 

From c63dfe774060e99aa810c9f1bb81043839e99c03 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Mon, 16 Sep 2024 11:37:37 +0300
Subject: [PATCH 054/100] Add helper functions for handling NA values

---
 rdata/missing.py         | 91 +++++++++++++++++++++++++++++++++++++---
 rdata/parser/_parser.py  | 14 +------
 rdata/unparser/_ascii.py |  4 +-
 3 files changed, 90 insertions(+), 19 deletions(-)

diff --git a/rdata/missing.py b/rdata/missing.py
index 0a931b4..8ce1e7b 100644
--- a/rdata/missing.py
+++ b/rdata/missing.py
@@ -1,17 +1,98 @@
 """Utilities for missing (NA) values in R."""
 
-from typing import Final
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
 
 import numpy as np
 
+if TYPE_CHECKING:
+    from typing import Any, Final
+
+    import numpy.typing as npt
+
+
 #: Value used to represent a missing integer in R.
-R_INT_NA: Final[int] = -2**31
+R_INT_NA: Final[int] = np.int32(-2**31)  # type: ignore [assignment]
 
 #: Value used to represent a missing float in R.
 #  This is a NaN with a particular payload, but it's not the same as np.nan.
 R_FLOAT_NA: Final[float] = np.frombuffer(b"\x7f\xf0\x00\x00\x00\x00\x07\xa2", dtype=">f8").astype("=f8")[0]  # noqa: E501
 
 
-def is_float_na(value: float) -> bool:
-    """Check if value is NA value."""
-    return np.array(value).tobytes() == np.array(R_FLOAT_NA).tobytes()
+def get_na_value(dtype: np.dtype[Any]) -> Any:  # noqa: ANN401
+    """
+    Get NA value for a given type.
+
+    Args:
+        dtype: NumPy dtype.
+
+    Returns:
+        NA value of given dtype.
+    """
+    if dtype == np.int32:
+        return R_INT_NA
+    if dtype == np.float64:
+        return R_FLOAT_NA
+    msg = f"NA for numpy dtype {dtype} not implemented"
+    raise NotImplementedError(msg)
+
+
+def is_na(
+    array: np.int32 | np.float64 | npt.NDArray[np.int32 | np.float64],
+) -> bool | npt.NDArray[np.bool_]:
+    """
+    Check if the array elements are NA.
+
+    Args:
+        array: NumPy array or single value.
+
+    Returns:
+        Boolean mask of NA values in the array.
+    """
+    if isinstance(array, np.ndarray):
+        dtype = array.dtype
+        na = get_na_value(dtype)
+        if dtype == np.int32:
+            # Use the native dtype for comparison when possible;
+            # slightly faster than the steps below
+            return array == na  # type: ignore [no-any-return]
+        raw_dtype = f"V{array.dtype.itemsize}"
+        return array.view(raw_dtype) == np.array(na).view(raw_dtype)  # type: ignore [no-any-return]
+
+    if isinstance(array, (np.int32, np.float64)):
+        return is_na(np.array(array))
+
+    msg = f"NA for {type(array)} not implemented"
+    raise NotImplementedError(msg)
+
+
+def mask_na_values(
+    array: npt.NDArray[Any],
+    *,
+    fill_value: Any | None = None,  # noqa: ANN401
+) -> npt.NDArray[Any] | np.ma.MaskedArray[Any, Any]:
+    """
+    Mask NA elements of the array.
+
+    Args:
+        array: NumPy array.
+        fill_value: Fill value for the masked array.
+            Defaults to the NA value.
+
+    Returns:
+        NumPy masked array with NA values as the mask
+        or the original array if there is no NA elements.
+    """
+    mask = is_na(array)
+    if np.any(mask):
+        if fill_value is None:
+            fill_value = get_na_value(array.dtype)
+
+        array[mask] = fill_value
+        return np.ma.array(  # type: ignore [no-untyped-call,no-any-return]
+            data=array,
+            mask=mask,
+            fill_value=fill_value,
+        )
+    return array
diff --git a/rdata/parser/_parser.py b/rdata/parser/_parser.py
index b5e1570..82b080a 100644
--- a/rdata/parser/_parser.py
+++ b/rdata/parser/_parser.py
@@ -23,7 +23,7 @@
 import numpy as np
 import numpy.typing as npt
 
-from rdata.missing import R_INT_NA
+from rdata.missing import R_INT_NA, mask_na_values
 
 if TYPE_CHECKING:
     from ._ascii import ParserASCII
@@ -606,17 +606,7 @@ def parse_nullable_int_array(
     ) -> npt.NDArray[np.int32] | np.ma.MaskedArray[Any, Any]:
         """Parse an integer array."""
         data = self._parse_array(np.int32)
-        mask = (data == R_INT_NA)
-        data[mask] = fill_value
-
-        if np.any(mask):
-            return np.ma.array(  # type: ignore [no-untyped-call,no-any-return]
-                data=data,
-                mask=mask,
-                fill_value=fill_value,
-            )
-
-        return data
+        return mask_na_values(data, fill_value=fill_value)
 
     def parse_double_array(self) -> npt.NDArray[np.float64]:
         """Parse a double array."""
diff --git a/rdata/unparser/_ascii.py b/rdata/unparser/_ascii.py
index 2fbd376..36b83ae 100644
--- a/rdata/unparser/_ascii.py
+++ b/rdata/unparser/_ascii.py
@@ -7,7 +7,7 @@
 
 import numpy as np
 
-from rdata.missing import is_float_na
+from rdata.missing import is_na
 
 from ._unparser import Unparser
 
@@ -53,7 +53,7 @@ def _unparse_array_values(self, array: npt.NDArray[Any]) -> None:  # noqa: C901
                 line = "NA" if value is None or np.ma.is_masked(value) else str(value)  # type: ignore [no-untyped-call]
 
             elif np.issubdtype(array.dtype, np.floating):
-                if is_float_na(value):
+                if is_na(value):
                     line = "NA"
                 elif np.isnan(value):
                     line = "NaN"

From b8b6948f82e60a5b832a8c5d9c00b82b2251dc33 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Mon, 16 Sep 2024 13:01:43 +0300
Subject: [PATCH 055/100] Add comment on setting mask

---
 rdata/conversion/_conversion.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/rdata/conversion/_conversion.py b/rdata/conversion/_conversion.py
index 57644c6..bd80070 100644
--- a/rdata/conversion/_conversion.py
+++ b/rdata/conversion/_conversion.py
@@ -453,6 +453,9 @@ def _dataframe_column_transform(source: Any) -> Any:  # noqa: ANN401
             return source
             # This would create an array with all NaNs as "missing":
             # dtype = pd.Float64Dtype()  # noqa: ERA001
+            # This would create an array with only R_FLOAT_NA as "missing":
+            # from rdata.missing import is_na  # noqa: ERA001
+            # return pd.arrays.FloatingArray(source, is_na(source))  # noqa: ERA001
         elif np.issubdtype(source.dtype, np.complexfloating):
             # There seems to be no pandas type for complex array
             return source

From e773101a2bd6e9a429b226a42b465bef515f6400 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Mon, 16 Sep 2024 13:39:33 +0300
Subject: [PATCH 056/100] Add tests for missing value functionality

---
 rdata/tests/test_missing.py | 90 +++++++++++++++++++++++++++++++++++++
 1 file changed, 90 insertions(+)
 create mode 100644 rdata/tests/test_missing.py

diff --git a/rdata/tests/test_missing.py b/rdata/tests/test_missing.py
new file mode 100644
index 0000000..ac3e22b
--- /dev/null
+++ b/rdata/tests/test_missing.py
@@ -0,0 +1,90 @@
+"""Tests of missing value functionality."""
+
+from __future__ import annotations
+
+from typing import Any
+
+import numpy as np
+import pytest
+
+from rdata.missing import R_FLOAT_NA, R_INT_NA, is_na, mask_na_values
+
+
+def test_int_is_na() -> None:
+    """Test checking NA values in int array."""
+    array = np.array([1, 2, R_INT_NA], dtype=np.int32)
+    ref_mask = np.array([0, 0, 1], dtype=np.bool_)
+
+    mask = is_na(array)
+    np.testing.assert_array_equal(mask, ref_mask)
+
+
+def test_float_is_na() -> None:
+    """Test checking NA values in float array."""
+    array = np.array([1, 2, R_FLOAT_NA, np.nan], dtype=np.float64)
+    ref_mask = np.array([0, 0, 1, 0], dtype=np.bool_)
+
+    mask = is_na(array)
+    np.testing.assert_array_equal(mask, ref_mask)
+
+
+@pytest.mark.parametrize("value", [R_INT_NA, R_FLOAT_NA])
+def test_value_is_na(value: Any) -> None:  # noqa: ANN401
+    """Test checking single NA values."""
+    assert is_na(value)
+
+
+@pytest.mark.parametrize("value", [
+    np.int32(0), 0, np.float64(0.0), 0.0, np.nan,
+])
+def test_value_is_not_na(value: Any) -> None:  # noqa: ANN401
+    """Test checking single NA values."""
+    assert not is_na(value)
+
+
+def test_int64() -> None:
+    """Test checking int64."""
+    with pytest.raises(NotImplementedError):
+        is_na(2**32)
+    with pytest.raises(NotImplementedError):
+        is_na(-2**32)
+
+
+def test_wrong_type() -> None:
+    """Test checking int64."""
+    with pytest.raises(NotImplementedError):
+        is_na("test")
+
+
+def test_masked_array() -> None:
+    """Test checking masked array creation."""
+    array = np.array([1, 2, R_FLOAT_NA, np.nan], dtype=np.float64)
+    ref_mask = np.array([0, 0, 1, 0], dtype=np.bool_)
+    ref_data = array.copy()
+
+    masked = mask_na_values(array)
+    assert isinstance(masked, np.ma.MaskedArray)
+    np.testing.assert_array_equal(masked.data, ref_data)
+    np.testing.assert_array_equal(masked.mask, ref_mask)
+
+
+def test_masked_array_fill() -> None:
+    """Test checking masked array creation."""
+    array = np.array([1, 2, R_FLOAT_NA, np.nan], dtype=np.float64)
+    ref_mask = np.array([0, 0, 1, 0], dtype=np.bool_)
+    ref_data = array.copy()
+    ref_data[ref_mask] = 42
+
+    masked = mask_na_values(array, fill_value=42)
+    assert isinstance(masked, np.ma.MaskedArray)
+    np.testing.assert_array_equal(masked.data, ref_data)
+    np.testing.assert_array_equal(masked.mask, ref_mask)
+
+
+def test_nonmasked_array() -> None:
+    """Test checking masked array no-op."""
+    array = np.array([1, 2, np.nan, np.nan], dtype=np.float64)
+
+    masked = mask_na_values(array)
+    assert not isinstance(masked, np.ma.MaskedArray)
+    np.testing.assert_array_equal(masked, array)

From 828f9daef4f7c5ff0aa94504c5581379409baf81 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Mon, 16 Sep 2024 13:39:51 +0300
Subject: [PATCH 057/100] Include checking int and float values

---
 rdata/missing.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/rdata/missing.py b/rdata/missing.py
index 8ce1e7b..8e6702f 100644
--- a/rdata/missing.py
+++ b/rdata/missing.py
@@ -39,7 +39,7 @@ def get_na_value(dtype: np.dtype[Any]) -> Any:  # noqa: ANN401
 
 
 def is_na(
-    array: np.int32 | np.float64 | npt.NDArray[np.int32 | np.float64],
+    array: Any | npt.NDArray[Any],  # noqa: ANN401
 ) -> bool | npt.NDArray[np.bool_]:
     """
     Check if the array elements are NA.
@@ -60,7 +60,13 @@ def is_na(
         raw_dtype = f"V{array.dtype.itemsize}"
         return array.view(raw_dtype) == np.array(na).view(raw_dtype)  # type: ignore [no-any-return]
 
-    if isinstance(array, (np.int32, np.float64)):
+    if isinstance(array, int):
+        try:
+            return is_na(np.array(array, dtype=np.int32))
+        except OverflowError:
+            return is_na(np.array(array))
+
+    if isinstance(array, (float, np.int32, np.float64)):
         return is_na(np.array(array))
 
     msg = f"NA for {type(array)} not implemented"

From 0fe903f2d1208a687d379ecae6185bd859783e03 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Mon, 16 Sep 2024 14:29:12 +0300
Subject: [PATCH 058/100] Include ascii format in testing too large ints

---
 rdata/tests/test_write.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/rdata/tests/test_write.py b/rdata/tests/test_write.py
index edef9e7..7e92312 100644
--- a/rdata/tests/test_write.py
+++ b/rdata/tests/test_write.py
@@ -206,13 +206,14 @@ def test_convert_to_r_unsupported_encoding() -> None:
         converter.convert_to_r_object("ä")
 
 
-def test_unparse_big_int() -> None:
+@pytest.mark.parametrize("file_format", valid_formats)
+def test_unparse_big_int(file_format: FileFormat) -> None:
     """Test checking too large integers."""
     big_int = 2**32
     converter = ConverterFromPythonToR()
     r_data = converter.convert_to_r_data(big_int)
     with pytest.raises(ValueError, match="(?i)not castable"):
-        unparse_data(r_data, file_format="xdr")
+        unparse_data(r_data, file_format=file_format)
 
 
 def test_convert_dataframe_pandas_dtypes() -> None:

From 2755b3c39d393b5fef6e8943822eae1293fe6f8b Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Mon, 16 Sep 2024 14:30:38 +0300
Subject: [PATCH 059/100] Fix datatype conversions in ascii unparser

---
 rdata/unparser/_ascii.py    | 12 +++++-------
 rdata/unparser/_unparser.py | 26 +++++++++++++++++++++++++-
 rdata/unparser/_xdr.py      | 27 +++++----------------------
 3 files changed, 35 insertions(+), 30 deletions(-)

diff --git a/rdata/unparser/_ascii.py b/rdata/unparser/_ascii.py
index 36b83ae..3219d5b 100644
--- a/rdata/unparser/_ascii.py
+++ b/rdata/unparser/_ascii.py
@@ -3,7 +3,7 @@
 from __future__ import annotations
 
 import string
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING
 
 import numpy as np
 
@@ -37,11 +37,9 @@ def unparse_magic(self) -> None:
         """Unparse magic bits."""
         self._add_line("A")
 
-    def _unparse_array_values(self, array: npt.NDArray[Any]) -> None:  # noqa: C901
-        # Convert boolean to int
-        if np.issubdtype(array.dtype, np.bool_):
-            array = array.astype(np.int32)
-
+    def _unparse_array_values_raw(self,
+        array: npt.NDArray[np.int32 | np.float64 | np.complex128],
+    ) -> None:
         # Convert complex to pairs of floats
         if np.issubdtype(array.dtype, np.complexfloating):
             assert array.dtype == np.complex128
@@ -50,7 +48,7 @@ def _unparse_array_values(self, array: npt.NDArray[Any]) -> None:  # noqa: C901
         # Unparse data
         for value in array:
             if np.issubdtype(array.dtype, np.integer):
-                line = "NA" if value is None or np.ma.is_masked(value) else str(value)  # type: ignore [no-untyped-call]
+                line = "NA" if is_na(value) else str(value)
 
             elif np.issubdtype(array.dtype, np.floating):
                 if is_na(value):
diff --git a/rdata/unparser/_unparser.py b/rdata/unparser/_unparser.py
index 7dd6243..0b57705 100644
--- a/rdata/unparser/_unparser.py
+++ b/rdata/unparser/_unparser.py
@@ -7,6 +7,7 @@
 
 import numpy as np
 
+from rdata.missing import R_INT_NA
 from rdata.parser import (
     RData,
     RExtraInfo,
@@ -69,9 +70,32 @@ def unparse_array(self, array: npt.NDArray[Any]) -> None:
         self.unparse_int(array.size)
         self._unparse_array_values(array)
 
-    @abc.abstractmethod
     def _unparse_array_values(self, array: npt.NDArray[Any]) -> None:
         """Unparse the values of an array."""
+        # Convert boolean to int
+        if np.issubdtype(array.dtype, np.bool_):
+            array = array.astype(np.int32)
+
+        # Flatten masked values and convert int arrays to int32
+        if np.issubdtype(array.dtype, np.integer):
+            if np.ma.is_masked(array):  # type: ignore [no-untyped-call]
+                mask = np.ma.getmask(array)  # type: ignore [no-untyped-call]
+                array = np.ma.getdata(array).copy()  # type: ignore [no-untyped-call]
+                array[mask] = R_INT_NA
+            info = np.iinfo(np.int32)
+            if not all(info.min <= val <= info.max for val in array):
+                msg = "Integer array not castable to int32"
+                raise ValueError(msg)
+            array = array.astype(np.int32)
+
+        assert array.dtype in (np.int32, np.float64, np.complex128)
+        self._unparse_array_values_raw(array)
+
+    @abc.abstractmethod
+    def _unparse_array_values_raw(self,
+        array: npt.NDArray[np.int32 | np.float64 | np.complex128],
+    ) -> None:
+        """Unparse the values of an array as such."""
 
     @abc.abstractmethod
     def unparse_string(self, value: bytes) -> None:
diff --git a/rdata/unparser/_xdr.py b/rdata/unparser/_xdr.py
index e2031e2..664d95b 100644
--- a/rdata/unparser/_xdr.py
+++ b/rdata/unparser/_xdr.py
@@ -2,17 +2,14 @@
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Any
-
-import numpy as np
-
-from rdata.missing import R_INT_NA
+from typing import TYPE_CHECKING
 
 from ._unparser import Unparser
 
 if TYPE_CHECKING:
     import io
 
+    import numpy as np
     import numpy.typing as npt
 
 
@@ -30,23 +27,9 @@ def unparse_magic(self) -> None:
         """Unparse magic bits."""
         self.file.write(b"X\n")
 
-    def _unparse_array_values(self, array: npt.NDArray[Any]) -> None:
-        # Convert boolean to int
-        if np.issubdtype(array.dtype, np.bool_):
-            array = array.astype(np.int32)
-
-        # Flatten masked values and convert int arrays to int32
-        if np.issubdtype(array.dtype, np.integer):
-            if np.ma.is_masked(array):  # type: ignore [no-untyped-call]
-                mask = np.ma.getmask(array)  # type: ignore [no-untyped-call]
-                array = np.ma.getdata(array).copy()  # type: ignore [no-untyped-call]
-                array[mask] = R_INT_NA
-            info = np.iinfo(np.int32)
-            if not all(info.min <= val <= info.max for val in array):
-                msg = "Integer array not castable to int32"
-                raise ValueError(msg)
-            array = array.astype(np.int32)
-
+    def _unparse_array_values_raw(self,
+        array: npt.NDArray[np.int32 | np.float64 | np.complex128],
+    ) -> None:
         # Convert to big endian if needed
         array = array.astype(array.dtype.newbyteorder(">"))
 

From 6040baeb30d791332a4421021de00db07bb546e7 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Mon, 16 Sep 2024 14:42:10 +0300
Subject: [PATCH 060/100] Include testing negative end

---
 rdata/tests/test_write.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/rdata/tests/test_write.py b/rdata/tests/test_write.py
index 7e92312..3d6a9e3 100644
--- a/rdata/tests/test_write.py
+++ b/rdata/tests/test_write.py
@@ -207,11 +207,11 @@ def test_convert_to_r_unsupported_encoding() -> None:
 
 
 @pytest.mark.parametrize("file_format", valid_formats)
-def test_unparse_big_int(file_format: FileFormat) -> None:
+@pytest.mark.parametrize("value", [-2**31 - 1, 2**31])
+def test_unparse_big_int(file_format: FileFormat, value: int) -> None:
     """Test checking too large integers."""
-    big_int = 2**32
     converter = ConverterFromPythonToR()
-    r_data = converter.convert_to_r_data(big_int)
+    r_data = converter.convert_to_r_data(value)
     with pytest.raises(ValueError, match="(?i)not castable"):
         unparse_data(r_data, file_format=file_format)
 

From 7c605661b77fbd48777a06dbd3ef415faffbd8f1 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Mon, 16 Sep 2024 14:42:41 +0300
Subject: [PATCH 061/100] Speed up range check

---
 rdata/unparser/_unparser.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/rdata/unparser/_unparser.py b/rdata/unparser/_unparser.py
index 0b57705..b41647e 100644
--- a/rdata/unparser/_unparser.py
+++ b/rdata/unparser/_unparser.py
@@ -82,11 +82,13 @@ def _unparse_array_values(self, array: npt.NDArray[Any]) -> None:
                 mask = np.ma.getmask(array)  # type: ignore [no-untyped-call]
                 array = np.ma.getdata(array).copy()  # type: ignore [no-untyped-call]
                 array[mask] = R_INT_NA
-            info = np.iinfo(np.int32)
-            if not all(info.min <= val <= info.max for val in array):
-                msg = "Integer array not castable to int32"
-                raise ValueError(msg)
-            array = array.astype(np.int32)
+
+            if array.dtype != np.int32:
+                info = np.iinfo(np.int32)
+                if np.any((array < info.min) | (array > info.max)):
+                    msg = "Integer array not castable to int32"
+                    raise ValueError(msg)
+                array = array.astype(np.int32)
 
         assert array.dtype in (np.int32, np.float64, np.complex128)
         self._unparse_array_values_raw(array)

From d450f7306cd13f2159b70ce10e460ab45c58d76c Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Mon, 16 Sep 2024 15:38:32 +0300
Subject: [PATCH 062/100] Move duplicated code to a function

---
 rdata/conversion/_conversion.py | 14 +++-----------
 rdata/parser/_parser.py         | 29 +++++++++++++++++------------
 2 files changed, 20 insertions(+), 23 deletions(-)

diff --git a/rdata/conversion/_conversion.py b/rdata/conversion/_conversion.py
index bd80070..eb1dc5b 100644
--- a/rdata/conversion/_conversion.py
+++ b/rdata/conversion/_conversion.py
@@ -14,6 +14,8 @@
 import xarray
 from typing_extensions import override
 
+from rdata.parser._parser import get_altrep_name
+
 from .. import parser
 
 ConversionFunction = Callable[[Union[parser.RData, parser.RObject]], Any]
@@ -416,17 +418,7 @@ def convert_altrep_to_range(
     info, state, attr = r_altrep.value
     assert attr.info.type == parser.RObjectType.NILVALUE
 
-    assert info.info.type == parser.RObjectType.LIST
-
-    class_sym = info.value[0]
-    while class_sym.info.type == parser.RObjectType.REF:
-        class_sym = class_sym.referenced_object
-
-    assert class_sym.info.type == parser.RObjectType.SYM
-    assert class_sym.value.info.type == parser.RObjectType.CHAR
-
-    altrep_name = class_sym.value.value
-    assert isinstance(altrep_name, bytes)
+    altrep_name = get_altrep_name(info)
 
     if altrep_name != b"compact_intseq":
         msg = "Only compact integer sequences can be converted to range"
diff --git a/rdata/parser/_parser.py b/rdata/parser/_parser.py
index 82b080a..44661bc 100644
--- a/rdata/parser/_parser.py
+++ b/rdata/parser/_parser.py
@@ -538,6 +538,22 @@ def wrap_constructor(
     return new_info, value
 
 
+def get_altrep_name(info: RObject) -> bytes:
+    """Get the name of the ALTREP object."""
+    assert info.info.type == RObjectType.LIST
+
+    class_sym = info.value[0]
+    while class_sym.info.type == RObjectType.REF:
+        class_sym = class_sym.referenced_object
+
+    assert class_sym.info.type == RObjectType.SYM
+    assert class_sym.value.info.type == RObjectType.CHAR
+
+    altrep_name = class_sym.value.value
+    assert isinstance(altrep_name, bytes)
+    return altrep_name
+
+
 default_altrep_map_dict: Final[Mapping[bytes, AltRepConstructor]] = {
     b"deferred_string": deferred_string_constructor,
     b"compact_intseq": compact_intseq_constructor,
@@ -666,18 +682,7 @@ def expand_altrep_to_object(
         state: RObject,
     ) -> tuple[RObjectInfo, Any]:
         """Expand alternative representation to normal object."""
-        assert info.info.type == RObjectType.LIST
-
-        class_sym = info.value[0]
-        while class_sym.info.type == RObjectType.REF:
-            class_sym = class_sym.referenced_object
-
-        assert class_sym.info.type == RObjectType.SYM
-        assert class_sym.value.info.type == RObjectType.CHAR
-
-        altrep_name = class_sym.value.value
-        assert isinstance(altrep_name, bytes)
-
+        altrep_name = get_altrep_name(info)
         constructor = self.altrep_constructor_dict[altrep_name]
         return constructor(state)
 

From 4d3e38f323d49451735f458a91883c059ea89e62 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Mon, 16 Sep 2024 17:25:41 +0300
Subject: [PATCH 063/100] Speed up range check

---
 rdata/unparser/_unparser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rdata/unparser/_unparser.py b/rdata/unparser/_unparser.py
index b41647e..ae15914 100644
--- a/rdata/unparser/_unparser.py
+++ b/rdata/unparser/_unparser.py
@@ -85,7 +85,7 @@ def _unparse_array_values(self, array: npt.NDArray[Any]) -> None:
 
             if array.dtype != np.int32:
                 info = np.iinfo(np.int32)
-                if np.any((array < info.min) | (array > info.max)):
+                if np.any(array > info.max) or np.any(array < info.min):
                     msg = "Integer array not castable to int32"
                     raise ValueError(msg)
                 array = array.astype(np.int32)

From ac948de64e23a45606952ab5ccf03e383d7f70ad Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Mon, 16 Sep 2024 17:52:38 +0300
Subject: [PATCH 064/100] Fix docstring

---
 rdata/conversion/_conversion.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/rdata/conversion/_conversion.py b/rdata/conversion/_conversion.py
index eb1dc5b..8530b8a 100644
--- a/rdata/conversion/_conversion.py
+++ b/rdata/conversion/_conversion.py
@@ -406,10 +406,7 @@ def convert_altrep_to_range(
         r_altrep: R altrep object
 
     Returns:
-        Array.
-
-    See Also:
-        convert_array
+        Range object.
     """
     if r_altrep.info.type != parser.RObjectType.ALTREP:
         msg = "Must receive an altrep object"

From 957887c60a3f322e523d6ae2ac9916f147f57a38 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Thu, 19 Sep 2024 09:48:31 +0300
Subject: [PATCH 065/100] Simplify the definition of the NA value

---
 rdata/missing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rdata/missing.py b/rdata/missing.py
index 8e6702f..b60a6f8 100644
--- a/rdata/missing.py
+++ b/rdata/missing.py
@@ -17,7 +17,7 @@
 
 #: Value used to represent a missing float in R.
 #  This is a NaN with a particular payload, but it's not the same as np.nan.
-R_FLOAT_NA: Final[float] = np.frombuffer(b"\x7f\xf0\x00\x00\x00\x00\x07\xa2", dtype=">f8").astype("=f8")[0]  # noqa: E501
+R_FLOAT_NA: Final[float] = np.uint64(0x7ff00000000007a2).view(np.float64)  # type: ignore [assignment]
 
 
 def get_na_value(dtype: np.dtype[Any]) -> Any:  # noqa: ANN401

From c3fe17c66e407de9db8f3e31b1acf9daacdd4b28 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <34502776+trossi@users.noreply.github.com>
Date: Wed, 2 Oct 2024 09:57:27 +0300
Subject: [PATCH 066/100] Apply suggestions from code review
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Carlos Ramos Carreño <carlosramosca@hotmail.com>
---
 rdata/conversion/to_r.py  | 14 +++++++++-----
 rdata/tests/test_write.py |  2 +-
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/rdata/conversion/to_r.py b/rdata/conversion/to_r.py
index a2add50..8415504 100644
--- a/rdata/conversion/to_r.py
+++ b/rdata/conversion/to_r.py
@@ -50,7 +50,7 @@
 
 
 def convert_pd_array_to_np_array(
-        pd_array: Any,  # noqa: ANN401
+    pd_array: pd.api.extensions.ExtensionArray,
 ) -> npt.NDArray[Any]:
     """
     Convert pandas array object to numpy array.
@@ -527,10 +527,14 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
         else:
             r_attributes = None
 
-        return build_r_object(r_type, value=r_value,
-                              is_object=is_object,
-                              attributes=r_attributes,
-                              tag=tag, gp=gp)
+        return build_r_object(
+            r_type,
+            value=r_value,
+            is_object=is_object,
+            attributes=r_attributes,
+            tag=tag,
+            gp=gp,
+        )
 
 
 def convert_python_to_r_data(
diff --git a/rdata/tests/test_write.py b/rdata/tests/test_write.py
index 3d6a9e3..8620d3a 100644
--- a/rdata/tests/test_write.py
+++ b/rdata/tests/test_write.py
@@ -154,7 +154,7 @@ def test_convert_to_r(fname: str, expand_altrep: bool) -> None:  # noqa: FBT001
         assert str(r_data) == str(new_r_data)
         assert r_data == new_r_data
 
-        # Check futher that the resulting unparsed data is correct to ensure that
+        # Check further that the resulting unparsed data is correct to ensure that
         # Python-to-R conversion hasn't created any odd objects that can't be unparsed
         if not expand_altrep:
             file_type, file_format = parse_file_type_and_format(data)

From 74a11ba76c834d8db20d3686a17e762dce1ffd84 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Wed, 2 Oct 2024 10:01:52 +0300
Subject: [PATCH 067/100] Fix indentation

---
 rdata/conversion/to_r.py | 78 ++++++++++++++++++++--------------------
 1 file changed, 39 insertions(+), 39 deletions(-)

diff --git a/rdata/conversion/to_r.py b/rdata/conversion/to_r.py
index 8415504..da7427d 100644
--- a/rdata/conversion/to_r.py
+++ b/rdata/conversion/to_r.py
@@ -112,14 +112,14 @@ def convert_pd_array_to_np_array(
 
 
 def build_r_object(
-        r_type: RObjectType,
-        *,
-        value: Any = None,  # noqa: ANN401
-        is_object: bool = False,
-        attributes: RObject | None = None,
-        tag: RObject | None = None,
-        gp: int = 0,
-        reference: tuple[int, RObject | None] = (0, None),
+    r_type: RObjectType,
+    *,
+    value: Any = None,  # noqa: ANN401
+    is_object: bool = False,
+    attributes: RObject | None = None,
+    tag: RObject | None = None,
+    gp: int = 0,
+    reference: tuple[int, RObject | None] = (0, None),
 ) -> RObject:
     """
     Build R object.
@@ -163,7 +163,7 @@ def build_r_object(
 
 
 def build_r_list(
-        data: list[RObject] | list[tuple[RObject, RObject]],
+    data: list[RObject] | list[tuple[RObject, RObject]],
 ) -> RObject:
     """
     Build R object representing (named) linked list.
@@ -201,9 +201,9 @@ class ConverterFromPythonToR:
         r_version_serialized: R version written as the creator of the object.
     """
     def __init__(self, *,
-            encoding: Encoding = "utf-8",
-            format_version: int = DEFAULT_FORMAT_VERSION,
-            r_version_serialized: int = DEFAULT_R_VERSION_SERIALIZED,
+        encoding: Encoding = "utf-8",
+        format_version: int = DEFAULT_FORMAT_VERSION,
+        r_version_serialized: int = DEFAULT_R_VERSION_SERIALIZED,
     ) -> None:
         """
         Init class.
@@ -226,9 +226,9 @@ def __init__(self, *,
 
 
     def convert_to_r_data(self,
-            data: Any,  # noqa: ANN401
-            *,
-            file_type: FileType = "rds",
+        data: Any,  # noqa: ANN401
+        *,
+        file_type: FileType = "rds",
     ) -> RData:
         """
         Convert Python data to R data.
@@ -265,7 +265,7 @@ def convert_to_r_data(self,
 
 
     def convert_to_r_attributes(self,
-            data: dict[str, Any],
+        data: dict[str, Any],
     ) -> RObject:
         """
         Convert dictionary to R attributes list.
@@ -287,7 +287,7 @@ def convert_to_r_attributes(self,
 
 
     def convert_to_r_sym(self,
-            name: str,
+        name: str,
     ) -> RObject:
         """
         Convert string to R symbol.
@@ -313,7 +313,7 @@ def convert_to_r_sym(self,
 
 
     def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
-            data: Any,  # noqa: ANN401
+        data: Any,  # noqa: ANN401
     ) -> RObject:
         """
         Convert Python data to R object.
@@ -493,10 +493,10 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
                     and index.step == 1
                 ):
                     row_names = np.ma.array(
-                            data=[R_INT_NA, -data.shape[0]],
-                            mask=[True, False],
-                            fill_value=R_INT_NA,
-                        )
+                        data=[R_INT_NA, -data.shape[0]],
+                        mask=[True, False],
+                        fill_value=R_INT_NA,
+                    )
                 else:
                     row_names = range(index.start, index.stop, index.step)
             elif isinstance(index, pd.Index):
@@ -538,12 +538,12 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
 
 
 def convert_python_to_r_data(
-        data: Any,  # noqa: ANN401
-        *,
-        encoding: Encoding = "utf-8",
-        format_version: int = DEFAULT_FORMAT_VERSION,
-        r_version_serialized: int = DEFAULT_R_VERSION_SERIALIZED,
-        file_type: FileType = "rds",
+    data: Any,  # noqa: ANN401
+    *,
+    encoding: Encoding = "utf-8",
+    format_version: int = DEFAULT_FORMAT_VERSION,
+    r_version_serialized: int = DEFAULT_R_VERSION_SERIALIZED,
+    file_type: FileType = "rds",
 ) -> RData:
     """
     Convert Python data to R data.
@@ -562,18 +562,18 @@ def convert_python_to_r_data(
         convert_python_to_r_object
     """
     return ConverterFromPythonToR(
-            encoding=encoding,
-            format_version=format_version,
-            r_version_serialized=r_version_serialized,
+        encoding=encoding,
+        format_version=format_version,
+        r_version_serialized=r_version_serialized,
     ).convert_to_r_data(data, file_type=file_type)
 
 
 def convert_python_to_r_object(
-        data: Any,  # noqa: ANN401
-        *,
-        encoding: Encoding = "utf-8",
-        format_version: int = DEFAULT_FORMAT_VERSION,
-        r_version_serialized: int = DEFAULT_R_VERSION_SERIALIZED,
+    data: Any,  # noqa: ANN401
+    *,
+    encoding: Encoding = "utf-8",
+    format_version: int = DEFAULT_FORMAT_VERSION,
+    r_version_serialized: int = DEFAULT_R_VERSION_SERIALIZED,
 ) -> RObject:
     """
     Convert Python data to R object.
@@ -591,7 +591,7 @@ def convert_python_to_r_object(
         convert_python_to_r_data
     """
     return ConverterFromPythonToR(
-            encoding=encoding,
-            format_version=format_version,
-            r_version_serialized=r_version_serialized,
+        encoding=encoding,
+        format_version=format_version,
+        r_version_serialized=r_version_serialized,
     ).convert_to_r_object(data)

From 5449199e291928a02cbd397a92edd6548b7293a6 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Wed, 2 Oct 2024 10:02:53 +0300
Subject: [PATCH 068/100] Fix mypy

---
 rdata/conversion/to_r.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rdata/conversion/to_r.py b/rdata/conversion/to_r.py
index da7427d..3b849bd 100644
--- a/rdata/conversion/to_r.py
+++ b/rdata/conversion/to_r.py
@@ -77,7 +77,7 @@ def convert_pd_array_to_np_array(
             dtype = np.int32
             fill_value = R_INT_NA
 
-        mask = pd_array.isna()
+        mask = pd_array.isna()  # type: ignore [no-untyped-call]
         if np.any(mask):
             data = pd_array.to_numpy(dtype=dtype, na_value=fill_value)
             array = np.ma.array(  # type: ignore [no-untyped-call]

From 74bf9d8f60a80a3c1b7de1c32cc16cb22981d65a Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Wed, 2 Oct 2024 10:03:09 +0300
Subject: [PATCH 069/100] Comment code

---
 rdata/missing.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/rdata/missing.py b/rdata/missing.py
index b60a6f8..2cad764 100644
--- a/rdata/missing.py
+++ b/rdata/missing.py
@@ -57,13 +57,18 @@ def is_na(
             # Use the native dtype for comparison when possible;
             # slightly faster than the steps below
             return array == na  # type: ignore [no-any-return]
-        raw_dtype = f"V{array.dtype.itemsize}"
+        # Convert dtype to unsigned integer to perform byte-by-byte
+        # equality comparison to distinguish different NaN values
+        raw_dtype = f"u{array.dtype.itemsize}"
         return array.view(raw_dtype) == np.array(na).view(raw_dtype)  # type: ignore [no-any-return]
 
     if isinstance(array, int):
         try:
+            # Python built-in integer is 64 bits or larger, so
+            # we try to cast it to 32-bit int if possible
             return is_na(np.array(array, dtype=np.int32))
         except OverflowError:
+            # Proceed with larger integer (in case it is supported at some point)
             return is_na(np.array(array))
 
     if isinstance(array, (float, np.int32, np.float64)):

From da9c940b54fe363c5402f1c148243f2e0133579b Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Wed, 2 Oct 2024 10:05:36 +0300
Subject: [PATCH 070/100] Raise NotImplementedError for untested code

---
 rdata/parser/_parser.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/rdata/parser/_parser.py b/rdata/parser/_parser.py
index 44661bc..8e7db16 100644
--- a/rdata/parser/_parser.py
+++ b/rdata/parser/_parser.py
@@ -935,9 +935,8 @@ def parse_R_object(  # noqa: N802, C901, PLR0912, PLR0915
                     state=altrep_state,
                 )
                 if altrep_attr.info.type != RObjectType.NILVALUE:
-                    info.attributes = True
-                    attributes_read = True
-                    attributes = altrep_attr
+                    msg = "altrep attributes not implemented"
+                    raise NotImplementedError(msg)
             else:
                 value = (altrep_info, altrep_state, altrep_attr)
 

From 3acc18067b3ab2885acfcf5e11f17b20b58c6813 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Wed, 2 Oct 2024 10:39:01 +0300
Subject: [PATCH 071/100] Separate pandas types to constructor functions

---
 rdata/conversion/to_r.py | 199 ++++++++++++++++++++++++++-------------
 1 file changed, 133 insertions(+), 66 deletions(-)

diff --git a/rdata/conversion/to_r.py b/rdata/conversion/to_r.py
index 3b849bd..f3ed1f5 100644
--- a/rdata/conversion/to_r.py
+++ b/rdata/conversion/to_r.py
@@ -27,7 +27,7 @@
 
 if TYPE_CHECKING:
     from collections.abc import Mapping
-    from typing import Any, Final, Literal
+    from typing import Any, Final, Literal, Protocol
 
     import numpy.typing as npt
 
@@ -35,6 +35,24 @@
 
     Encoding = Literal["utf-8", "cp1252"]
 
+    class ConversionFunction(Protocol):
+        """Protocol for Py-to-R conversion function."""
+
+        def __call__(self, data: Any) -> RObject:  # noqa: ANN401
+            """Convert Python object to R object."""
+
+
+    class ConstructorFunction(Protocol):
+        """Protocol for Py-to-R constructor function."""
+
+        def __call__(self,
+            data: Any,  # noqa: ANN401
+            convert_to_r_object: ConversionFunction,
+        ) -> tuple[RObjectType, Any, dict[str, Any]]:
+            """Convert Python object to R object components."""
+
+    ConstructorDict = Mapping[type, ConstructorFunction]
+
 
 # Default values for RVersions object
 DEFAULT_FORMAT_VERSION: Final[int] = 3
@@ -49,6 +67,100 @@
 R_MINIMUM_VERSION_WITH_ALTREP: Final[int] = 3
 
 
+def categorical_constructor(
+    data: pd.Categorical,
+    convert_to_r_object: ConversionFunction,  # noqa: ARG001
+) -> tuple[RObjectType, Any, dict[str, Any]]:
+    """
+    Construct R object components from pandas categorical.
+
+    Args:
+        data: Pandas categorical.
+        convert_to_r_object: Conversion function.
+
+    Returns:
+        Components of the R object.
+    """
+    assert isinstance(data, pd.Categorical)
+    r_type = RObjectType.INT
+    r_value = data.codes + 1
+    attributes = {
+        "levels": data.categories.to_numpy(),
+        "class": "factor",
+    }
+    return r_type, r_value, attributes
+
+
+def dataframe_constructor(
+    data: pd.DataFrame,
+    convert_to_r_object: ConversionFunction,
+) -> tuple[RObjectType, Any, dict[str, Any]]:
+    """
+    Construct R object components from pandas dataframe.
+
+    Args:
+        data: Pandas dataframe.
+        convert_to_r_object: Conversion function.
+
+    Returns:
+        Components of the R object.
+    """
+    assert isinstance(data, pd.DataFrame)
+    r_type = RObjectType.VEC
+    column_names = []
+    r_value = []
+    for column, series in data.items():
+        assert isinstance(column, str)
+        column_names.append(column)
+
+        pd_array = series.array
+        array: pd.Categorical | npt.NDArray[Any]
+        if isinstance(pd_array, pd.Categorical):
+            array = pd_array
+        else:
+            array = convert_pd_array_to_np_array(pd_array)
+        r_series = convert_to_r_object(array)
+        r_value.append(r_series)
+
+    index = data.index
+    if isinstance(index, pd.RangeIndex):
+        assert isinstance(index.start, int)
+        if (index.start == 1
+            and index.stop == data.shape[0] + 1
+            and index.step == 1
+        ):
+            row_names = np.ma.array(
+                data=[R_INT_NA, -data.shape[0]],
+                mask=[True, False],
+                fill_value=R_INT_NA,
+            )
+        else:
+            row_names = range(index.start, index.stop, index.step)
+    elif isinstance(index, pd.Index):
+        if (index.dtype == "object"
+            or np.issubdtype(str(index.dtype), np.integer)):
+            row_names = index.to_numpy()
+        else:
+            msg = f"pd.DataFrame pd.Index {index.dtype} not implemented"
+            raise NotImplementedError(msg)
+    else:
+        msg = f"pd.DataFrame index {type(index)} not implemented"
+        raise NotImplementedError(msg)
+
+    attributes = {
+        "names": np.array(column_names, dtype=np.dtype("U")),
+        "class": "data.frame",
+        "row.names": row_names,
+    }
+    return r_type, r_value, attributes
+
+
+DEFAULT_CONSTRUCTOR_DICT: Final[ConstructorDict] = MappingProxyType({
+    pd.Categorical: categorical_constructor,
+    pd.DataFrame: dataframe_constructor,
+})
+
+
 def convert_pd_array_to_np_array(
     pd_array: pd.api.extensions.ExtensionArray,
 ) -> npt.NDArray[Any]:
@@ -196,11 +308,14 @@ class ConverterFromPythonToR:
     Class converting Python objects to R objects.
 
     Attributes:
+        constructor_dict: Dictionary mapping Python types to R classes.
         encoding: Encoding to be used for strings within data.
         format_version: File format version.
         r_version_serialized: R version written as the creator of the object.
     """
-    def __init__(self, *,
+    def __init__(self,
+        constructor_dict: ConstructorDict = DEFAULT_CONSTRUCTOR_DICT,
+        *,
         encoding: Encoding = "utf-8",
         format_version: int = DEFAULT_FORMAT_VERSION,
         r_version_serialized: int = DEFAULT_R_VERSION_SERIALIZED,
@@ -209,10 +324,12 @@ def __init__(self, *,
         Init class.
 
         Args:
+            constructor_dict: Dictionary mapping Python types to R classes.
             encoding: Encoding to be used for strings within data.
             format_version: File format version.
             r_version_serialized: R version written as the creator of the object.
         """
+        self.constructor_dict = constructor_dict
         self.encoding = encoding
         self.format_version = format_version
         self.r_version_serialized = r_version_serialized
@@ -454,75 +571,25 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
                 self.convert_to_r_object(None),
             )
 
-        elif isinstance(data, pd.Series):
-            msg = "pd.Series not implemented"
-            raise NotImplementedError(msg)
-
-        elif isinstance(data, pd.Categorical):
-            is_object = True
-            r_type = RObjectType.INT
-            r_value = data.codes + 1
-            attributes = {
-                "levels": data.categories.to_numpy(),
-                "class": "factor",
-            }
-
-        elif isinstance(data, pd.DataFrame):
-            is_object = True
-            r_type = RObjectType.VEC
-            column_names = []
-            r_value = []
-            for column, series in data.items():
-                assert isinstance(column, str)
-                column_names.append(column)
-
-                pd_array = series.array
-                array: pd.Categorical | npt.NDArray[Any]
-                if isinstance(pd_array, pd.Categorical):
-                    array = pd_array
-                else:
-                    array = convert_pd_array_to_np_array(pd_array)
-                r_series = self.convert_to_r_object(array)
-                r_value.append(r_series)
-
-            index = data.index
-            if isinstance(index, pd.RangeIndex):
-                assert isinstance(index.start, int)
-                if (index.start == 1
-                    and index.stop == data.shape[0] + 1
-                    and index.step == 1
-                ):
-                    row_names = np.ma.array(
-                        data=[R_INT_NA, -data.shape[0]],
-                        mask=[True, False],
-                        fill_value=R_INT_NA,
-                    )
-                else:
-                    row_names = range(index.start, index.stop, index.step)
-            elif isinstance(index, pd.Index):
-                if (index.dtype == "object"
-                    or np.issubdtype(str(index.dtype), np.integer)):
-                    row_names = index.to_numpy()
-                else:
-                    msg = f"pd.DataFrame pd.Index {index.dtype} not implemented"
-                    raise NotImplementedError(msg)
-            else:
-                msg = f"pd.DataFrame index {type(index)} not implemented"
+        else:
+            # Check available constructors
+            for t, constructor in self.constructor_dict.items():
+                if isinstance(data, t):
+                    r_type, r_value, attributes \
+                        = constructor(data, self.convert_to_r_object)
+                    break
+
+            if r_type is None:
+                msg = f"type {type(data)} not implemented"
                 raise NotImplementedError(msg)
 
-            attributes = {
-                "names": np.array(column_names, dtype=np.dtype("U")),
-                "class": "data.frame",
-                "row.names": row_names,
-            }
-            if self.df_attr_order is not None:
+            # Fix for test files where dataframe attribute order varies
+            assert isinstance(attributes, dict)
+            if isinstance(data, pd.DataFrame) and self.df_attr_order is not None:
                 attributes = {k: attributes[k] for k in self.df_attr_order}
 
-        else:
-            msg = f"type {type(data)} not implemented"
-            raise NotImplementedError(msg)
-
         if attributes is not None:
+            is_object = "class" in attributes
             r_attributes = self.convert_to_r_attributes(attributes)
         else:
             r_attributes = None

From 4557d992df6715643b215c1d2529d3972e8327c9 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Wed, 2 Oct 2024 10:41:33 +0300
Subject: [PATCH 072/100] Separate string builders to functions

---
 rdata/conversion/to_r.py | 94 +++++++++++++++++++++++++++-------------
 1 file changed, 64 insertions(+), 30 deletions(-)

diff --git a/rdata/conversion/to_r.py b/rdata/conversion/to_r.py
index f3ed1f5..6f568d0 100644
--- a/rdata/conversion/to_r.py
+++ b/rdata/conversion/to_r.py
@@ -303,6 +303,63 @@ def build_r_list(
     return build_r_object(RObjectType.LIST, value=(car, cdr), tag=tag)
 
 
+def build_r_str(
+    data: str,
+    *,
+    encoding: Encoding,
+) -> RObject:
+    """
+    Build R object representing string.
+
+    Args:
+        data: String.
+        encoding: Encoding used for strings.
+
+    Returns:
+        R object.
+    """
+    value = [build_r_char(data, encoding=encoding)]
+    return build_r_object(RObjectType.STR, value=value)
+
+
+def build_r_char(
+    data: str | bytes | None,
+    *,
+    encoding: Encoding,
+) -> RObject:
+    """
+    Build R object representing characters.
+
+    Args:
+        data: String or bytestring.
+        encoding: Encoding used for strings.
+
+    Returns:
+        R object.
+    """
+    if data is None:
+        return build_r_object(RObjectType.CHAR)
+
+    if isinstance(data, str):
+        data = data.encode(encoding)
+
+    if all(chr(byte) in string.printable for byte in data):
+        gp = CharFlags.ASCII
+    elif encoding == "utf-8":
+        gp = CharFlags.UTF8
+    elif encoding == "cp1252":
+        # Note!
+        # CP1252 and Latin1 are not the same.
+        # Does CharFlags.LATIN1 mean actually CP1252
+        # as R on Windows mentions CP1252 as encoding?
+        # Or does CP1252 change to e.g. CP1250 depending on localization?
+        gp = CharFlags.LATIN1
+    else:
+        msg = f"unsupported encoding: {encoding}"
+        raise ValueError(msg)
+    return build_r_object(RObjectType.CHAR, value=data, gp=gp)
+
+
 class ConverterFromPythonToR:
     """
     Class converting Python objects to R objects.
@@ -448,7 +505,6 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
         is_object = False
         attributes: dict[str, Any] | None = None
         tag = None
-        gp = 0
 
         if data is None:
             r_type = RObjectType.NILVALUE
@@ -484,24 +540,19 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
                 r_value = []
                 for el in data:
                     if el is None or pd.isna(el):
-                        r_el = build_r_object(RObjectType.CHAR)
+                        r_el = build_r_char(None, encoding=self.encoding)
                     elif isinstance(el, str):
-                        r_el = self.convert_to_r_object(el.encode(self.encoding))
+                        r_el = build_r_char(el, encoding=self.encoding)
                     else:
                         msg = "general object array not implemented"
                         raise NotImplementedError(msg)
                     r_value.append(r_el)
 
-            elif data.dtype.kind in ["S"]:
+            elif data.dtype.kind in ["S", "U"]:
                 assert data.ndim == 1
                 r_type = RObjectType.STR
-                r_value = [self.convert_to_r_object(el) for el in data]
-
-            elif data.dtype.kind in ["U"]:
-                assert data.ndim == 1
-                data = np.array([s.encode(self.encoding) for s in data],
-                                dtype=np.dtype("S"))
-                return self.convert_to_r_object(data)
+                r_value = [build_r_char(el, encoding=self.encoding)
+                           for el in data]
 
             else:
                 r_type = {
@@ -524,26 +575,10 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
             return self.convert_to_r_object(np.array(data))
 
         elif isinstance(data, str):
-            r_type = RObjectType.STR
-            r_value = [self.convert_to_r_object(data.encode(self.encoding))]
+            return build_r_str(data, encoding=self.encoding)
 
         elif isinstance(data, bytes):
-            r_type = RObjectType.CHAR
-            if all(chr(byte) in string.printable for byte in data):
-                gp = CharFlags.ASCII
-            elif self.encoding == "utf-8":
-                gp = CharFlags.UTF8
-            elif self.encoding == "cp1252":
-                # Note!
-                # CP1252 and Latin1 are not the same.
-                # Does CharFlags.LATIN1 mean actually CP1252
-                # as R on Windows mentions CP1252 as encoding?
-                # Or does CP1252 change to e.g. CP1250 depending on localization?
-                gp = CharFlags.LATIN1
-            else:
-                msg = f"unsupported encoding: {self.encoding}"
-                raise ValueError(msg)
-            r_value = data
+            return build_r_char(data, encoding=self.encoding)
 
         elif isinstance(data, range):
             if self.format_version < R_MINIMUM_VERSION_WITH_ALTREP:
@@ -600,7 +635,6 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
             is_object=is_object,
             attributes=r_attributes,
             tag=tag,
-            gp=gp,
         )
 
 

From 3e330a3d4942268d31e0f0d5cb5b9097e1d5c76d Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Wed, 2 Oct 2024 10:42:06 +0300
Subject: [PATCH 073/100] Remove unused variable

---
 rdata/conversion/to_r.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/rdata/conversion/to_r.py b/rdata/conversion/to_r.py
index 6f568d0..f1729e8 100644
--- a/rdata/conversion/to_r.py
+++ b/rdata/conversion/to_r.py
@@ -504,7 +504,6 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
         r_value: Any = None
         is_object = False
         attributes: dict[str, Any] | None = None
-        tag = None
 
         if data is None:
             r_type = RObjectType.NILVALUE
@@ -634,7 +633,6 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
             value=r_value,
             is_object=is_object,
             attributes=r_attributes,
-            tag=tag,
         )
 
 

From 0d4596593f6a66e545cbb89a37c59780a433a95d Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Wed, 2 Oct 2024 11:55:42 +0300
Subject: [PATCH 074/100] Convert all built-in types via numpy type

---
 rdata/conversion/to_r.py | 41 +++++++++++-----------------------------
 1 file changed, 11 insertions(+), 30 deletions(-)

diff --git a/rdata/conversion/to_r.py b/rdata/conversion/to_r.py
index f1729e8..b8a9c9d 100644
--- a/rdata/conversion/to_r.py
+++ b/rdata/conversion/to_r.py
@@ -303,25 +303,6 @@ def build_r_list(
     return build_r_object(RObjectType.LIST, value=(car, cdr), tag=tag)
 
 
-def build_r_str(
-    data: str,
-    *,
-    encoding: Encoding,
-) -> RObject:
-    """
-    Build R object representing string.
-
-    Args:
-        data: String.
-        encoding: Encoding used for strings.
-
-    Returns:
-        R object.
-    """
-    value = [build_r_char(data, encoding=encoding)]
-    return build_r_object(RObjectType.STR, value=value)
-
-
 def build_r_char(
     data: str | bytes | None,
     *,
@@ -533,6 +514,10 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
                 attributes = {"names": names}
 
         elif isinstance(data, np.ndarray):
+            # Promote 0-dimensional array to 1-dimensional array
+            if data.ndim == 0:
+                data = data[np.newaxis]
+
             if data.dtype.kind in ["O"]:
                 assert data.ndim == 1
                 r_type = RObjectType.STR
@@ -547,7 +532,11 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
                         raise NotImplementedError(msg)
                     r_value.append(r_el)
 
-            elif data.dtype.kind in ["S", "U"]:
+            elif data.dtype.kind in ["S"]:  # bytes object is converted to this dtype
+                assert data.size == 1
+                return build_r_char(data[0], encoding=self.encoding)
+
+            elif data.dtype.kind in ["U"]:
                 assert data.ndim == 1
                 r_type = RObjectType.STR
                 r_value = [build_r_char(el, encoding=self.encoding)
@@ -561,24 +550,16 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
                     "c": RObjectType.CPLX,
                 }[data.dtype.kind]
 
-                if data.ndim == 0:
-                    r_value = data[np.newaxis]
-                elif data.ndim == 1:
+                if data.ndim == 1:
                     r_value = data
                 else:
                     # R uses column-major order like Fortran
                     r_value = np.ravel(data, order="F")
                     attributes = {"dim": np.array(data.shape)}
 
-        elif isinstance(data, (bool, int, float, complex)):
+        elif isinstance(data, (bool, int, float, complex, str, bytes)):
             return self.convert_to_r_object(np.array(data))
 
-        elif isinstance(data, str):
-            return build_r_str(data, encoding=self.encoding)
-
-        elif isinstance(data, bytes):
-            return build_r_char(data, encoding=self.encoding)
-
         elif isinstance(data, range):
             if self.format_version < R_MINIMUM_VERSION_WITH_ALTREP:
                 # ALTREP support is from R version 3.5.0

From de5a408d66bacc4d9795652bc6f6b694f523fbe8 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Wed, 2 Oct 2024 12:14:54 +0300
Subject: [PATCH 075/100] Raise error for non-string dictionary keys

---
 rdata/conversion/to_r.py  | 3 +++
 rdata/tests/test_write.py | 7 +++++++
 2 files changed, 10 insertions(+)

diff --git a/rdata/conversion/to_r.py b/rdata/conversion/to_r.py
index b8a9c9d..1df39c6 100644
--- a/rdata/conversion/to_r.py
+++ b/rdata/conversion/to_r.py
@@ -510,6 +510,9 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
             r_value = [self.convert_to_r_object(el) for el in values]
 
             if isinstance(data, dict):
+                if not all(isinstance(key, str) for key in data):
+                    msg = "dictionary keys must be strings"
+                    raise ValueError(msg)
                 names = np.array(list(data.keys()), dtype=np.dtype("U"))
                 attributes = {"names": names}
 
diff --git a/rdata/tests/test_write.py b/rdata/tests/test_write.py
index 8620d3a..a446779 100644
--- a/rdata/tests/test_write.py
+++ b/rdata/tests/test_write.py
@@ -206,6 +206,13 @@ def test_convert_to_r_unsupported_encoding() -> None:
         converter.convert_to_r_object("ä")
 
 
+def test_convert_to_r_nonstr_dict_keys() -> None:
+    """Test checking non-string dict keys."""
+    converter = ConverterFromPythonToR()
+    with pytest.raises(ValueError, match="(?i)keys must be strings"):
+        converter.convert_to_r_object({"a": 1, 2: 2})
+
+
 @pytest.mark.parametrize("file_format", valid_formats)
 @pytest.mark.parametrize("value", [-2**31 - 1, 2**31])
 def test_unparse_big_int(file_format: FileFormat, value: int) -> None:

From 5d8187cb0deccbe73984241c936bfeb0e6512986 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Wed, 2 Oct 2024 12:23:10 +0300
Subject: [PATCH 076/100] Raise error for non-string rda variable names

---
 rdata/conversion/to_r.py  |  5 +++++
 rdata/tests/test_write.py | 12 +++++++++---
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/rdata/conversion/to_r.py b/rdata/conversion/to_r.py
index 1df39c6..aef8054 100644
--- a/rdata/conversion/to_r.py
+++ b/rdata/conversion/to_r.py
@@ -402,6 +402,9 @@ def convert_to_r_data(self,
             if not isinstance(data, dict):
                 msg = f"for RDA file, data must be a dictionary, not type {type(data)}"
                 raise TypeError(msg)
+            if not all(isinstance(key, str) for key in data):
+                msg = "for RDA file, dictionary keys must be strings"
+                raise ValueError(msg)
             r_object = self.convert_to_r_attributes(data)
         else:
             r_object = self.convert_to_r_object(data)
@@ -453,6 +456,8 @@ def convert_to_r_sym(self,
         Returns:
             R object.
         """
+        assert isinstance(name, str)
+
         # Reference to existing symbol if exists
         if name in self._references:
             reference = self._references[name]
diff --git a/rdata/tests/test_write.py b/rdata/tests/test_write.py
index a446779..dd732a1 100644
--- a/rdata/tests/test_write.py
+++ b/rdata/tests/test_write.py
@@ -167,12 +167,18 @@ def test_convert_to_r(fname: str, expand_altrep: bool) -> None:  # noqa: FBT001
             assert data == out_data
 
 
-def test_convert_to_r_bad_rda() -> None:
+def test_convert_to_r_rda_missing_names() -> None:
     """Test checking that data for RDA has variable names."""
-    py_data = "hello"
     converter = ConverterFromPythonToR()
     with pytest.raises(TypeError, match="(?i)data must be a dictionary"):
-        converter.convert_to_r_data(py_data, file_type="rda")
+        converter.convert_to_r_data("hello", file_type="rda")
+
+
+def test_convert_to_r_rda_nonstr_names() -> None:
+    """Test checking that RDA variable names are strings."""
+    converter = ConverterFromPythonToR()
+    with pytest.raises(ValueError, match="(?i)keys must be strings"):
+        converter.convert_to_r_data({1: "hello"}, file_type="rda")
 
 
 def test_convert_to_r_empty_rda() -> None:

From 59f4269f5b1dd0f3a97985d6049d2a921f295a50 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Wed, 2 Oct 2024 12:39:15 +0300
Subject: [PATCH 077/100] Use shorthand function

---
 rdata/tests/test_write.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/rdata/tests/test_write.py b/rdata/tests/test_write.py
index dd732a1..000704c 100644
--- a/rdata/tests/test_write.py
+++ b/rdata/tests/test_write.py
@@ -12,7 +12,7 @@
 import pytest
 
 import rdata
-from rdata.conversion import ConverterFromPythonToR
+from rdata.conversion import ConverterFromPythonToR, convert_python_to_r_object
 from rdata.unparser import unparse_data
 
 if TYPE_CHECKING:
@@ -253,8 +253,8 @@ def test_convert_dataframe_pandas_dtypes() -> None:
         index=pd.RangeIndex(3),
     )
 
-    r_obj1 = ConverterFromPythonToR().convert_to_r_object(df1)
-    r_obj2 = ConverterFromPythonToR().convert_to_r_object(df2)
+    r_obj1 = convert_python_to_r_object(df1)
+    r_obj2 = convert_python_to_r_object(df2)
 
     assert str(r_obj1) == str(r_obj2)
     assert r_obj1 == r_obj2

From 540a59ad8f7feac709d2925f9cedac354af4ed6d Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Wed, 2 Oct 2024 12:40:27 +0300
Subject: [PATCH 078/100] Add constructor_dict to helper functions

---
 rdata/_write.py              | 15 ++++++++++++---
 rdata/conversion/__init__.py |  2 ++
 rdata/conversion/to_r.py     | 17 +++++++++++------
 3 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/rdata/_write.py b/rdata/_write.py
index b1a42e1..3a03128 100644
--- a/rdata/_write.py
+++ b/rdata/_write.py
@@ -3,15 +3,18 @@
 
 from typing import TYPE_CHECKING
 
-from .conversion import convert_python_to_r_data
-from .conversion.to_r import DEFAULT_FORMAT_VERSION
+from .conversion import (
+    DEFAULT_CONSTRUCTOR_DICT,
+    DEFAULT_FORMAT_VERSION,
+    convert_python_to_r_data,
+)
 from .unparser import unparse_file
 
 if TYPE_CHECKING:
     import os
     from typing import Any
 
-    from .conversion.to_r import Encoding
+    from .conversion.to_r import ConstructorDict, Encoding
     from .unparser import Compression, FileFormat
 
 
@@ -23,6 +26,7 @@ def write_rds(
     compression: Compression = "gzip",
     encoding: Encoding = "utf-8",
     format_version: int = DEFAULT_FORMAT_VERSION,
+    constructor_dict: ConstructorDict = DEFAULT_CONSTRUCTOR_DICT,
 ) -> None:
     """
     Write an RDS file.
@@ -37,6 +41,7 @@ def write_rds(
         compression: Compression.
         encoding: Encoding to be used for strings within data.
         format_version: File format version.
+        constructor_dict: Dictionary mapping Python types to R classes.
 
     See Also:
         :func:`write_rda`: Similar function that writes an RDA or RDATA file.
@@ -53,6 +58,7 @@ def write_rds(
         data,
         encoding=encoding,
         format_version=format_version,
+        constructor_dict=constructor_dict,
     )
 
     unparse_file(
@@ -72,6 +78,7 @@ def write_rda(
     compression: Compression = "gzip",
     encoding: Encoding = "utf-8",
     format_version: int = DEFAULT_FORMAT_VERSION,
+    constructor_dict: ConstructorDict = DEFAULT_CONSTRUCTOR_DICT,
 ) -> None:
     """
     Write an RDA or RDATA file.
@@ -86,6 +93,7 @@ def write_rda(
         compression: Compression.
         encoding: Encoding to be used for strings within data.
         format_version: File format version.
+        constructor_dict: Dictionary mapping Python types to R classes.
 
     See Also:
         :func:`write_rds`: Similar function that writes an RDS file.
@@ -102,6 +110,7 @@ def write_rda(
         data,
         encoding=encoding,
         format_version=format_version,
+        constructor_dict=constructor_dict,
         file_type="rda",
     )
 
diff --git a/rdata/conversion/__init__.py b/rdata/conversion/__init__.py
index 55506b3..3d3a699 100644
--- a/rdata/conversion/__init__.py
+++ b/rdata/conversion/__init__.py
@@ -25,6 +25,8 @@
     ts_constructor as ts_constructor,
 )
 from .to_r import (
+    DEFAULT_CONSTRUCTOR_DICT as DEFAULT_CONSTRUCTOR_DICT,
+    DEFAULT_FORMAT_VERSION as DEFAULT_FORMAT_VERSION,
     ConverterFromPythonToR as ConverterFromPythonToR,
     convert_python_to_r_data as convert_python_to_r_data,
     convert_python_to_r_object as convert_python_to_r_object,
diff --git a/rdata/conversion/to_r.py b/rdata/conversion/to_r.py
index aef8054..d0eca0d 100644
--- a/rdata/conversion/to_r.py
+++ b/rdata/conversion/to_r.py
@@ -346,31 +346,30 @@ class ConverterFromPythonToR:
     Class converting Python objects to R objects.
 
     Attributes:
-        constructor_dict: Dictionary mapping Python types to R classes.
         encoding: Encoding to be used for strings within data.
         format_version: File format version.
         r_version_serialized: R version written as the creator of the object.
+        constructor_dict: Dictionary mapping Python types to R classes.
     """
-    def __init__(self,
-        constructor_dict: ConstructorDict = DEFAULT_CONSTRUCTOR_DICT,
-        *,
+    def __init__(self, *,
         encoding: Encoding = "utf-8",
         format_version: int = DEFAULT_FORMAT_VERSION,
         r_version_serialized: int = DEFAULT_R_VERSION_SERIALIZED,
+        constructor_dict: ConstructorDict = DEFAULT_CONSTRUCTOR_DICT,
     ) -> None:
         """
         Init class.
 
         Args:
-            constructor_dict: Dictionary mapping Python types to R classes.
             encoding: Encoding to be used for strings within data.
             format_version: File format version.
             r_version_serialized: R version written as the creator of the object.
+            constructor_dict: Dictionary mapping Python types to R classes.
         """
-        self.constructor_dict = constructor_dict
         self.encoding = encoding
         self.format_version = format_version
         self.r_version_serialized = r_version_serialized
+        self.constructor_dict = constructor_dict
         self._references: dict[str | None, tuple[int, RObject | None]] \
             = {None: (0, None)}
 
@@ -631,6 +630,7 @@ def convert_python_to_r_data(
     encoding: Encoding = "utf-8",
     format_version: int = DEFAULT_FORMAT_VERSION,
     r_version_serialized: int = DEFAULT_R_VERSION_SERIALIZED,
+    constructor_dict: ConstructorDict = DEFAULT_CONSTRUCTOR_DICT,
     file_type: FileType = "rds",
 ) -> RData:
     """
@@ -641,6 +641,7 @@ def convert_python_to_r_data(
         encoding: Encoding to be used for strings within data.
         format_version: File format version.
         r_version_serialized: R version written as the creator of the object.
+        constructor_dict: Dictionary mapping Python types to R classes.
         file_type: File type.
 
     Returns:
@@ -653,6 +654,7 @@ def convert_python_to_r_data(
         encoding=encoding,
         format_version=format_version,
         r_version_serialized=r_version_serialized,
+        constructor_dict=constructor_dict,
     ).convert_to_r_data(data, file_type=file_type)
 
 
@@ -662,6 +664,7 @@ def convert_python_to_r_object(
     encoding: Encoding = "utf-8",
     format_version: int = DEFAULT_FORMAT_VERSION,
     r_version_serialized: int = DEFAULT_R_VERSION_SERIALIZED,
+    constructor_dict: ConstructorDict = DEFAULT_CONSTRUCTOR_DICT,
 ) -> RObject:
     """
     Convert Python data to R object.
@@ -671,6 +674,7 @@ def convert_python_to_r_object(
         encoding: Encoding to be used for strings within data.
         format_version: File format version.
         r_version_serialized: R version written as the creator of the object.
+        constructor_dict: Dictionary mapping Python types to R classes.
 
     Returns:
         Corresponding RObject object.
@@ -682,4 +686,5 @@ def convert_python_to_r_object(
         encoding=encoding,
         format_version=format_version,
         r_version_serialized=r_version_serialized,
+        constructor_dict=constructor_dict,
     ).convert_to_r_object(data)

From df8b3910fc73b43555e1cdb4a2302e5ec0986216 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Fri, 25 Oct 2024 14:30:47 +0300
Subject: [PATCH 079/100] Recreate test files in common attribute order

---
 rdata/conversion/to_r.py               |  10 ----------
 rdata/tests/data/test_dataframe.rda    | Bin 175 -> 176 bytes
 rdata/tests/data/test_dataframe.rds    | Bin 152 -> 153 bytes
 rdata/tests/data/test_dataframe_v3.rda | Bin 186 -> 187 bytes
 rdata/tests/data/test_dataframe_v3.rds | Bin 164 -> 164 bytes
 rdata/tests/test_rdata.py              |   6 ++++++
 rdata/tests/test_write.py              |   7 -------
 7 files changed, 6 insertions(+), 17 deletions(-)

diff --git a/rdata/conversion/to_r.py b/rdata/conversion/to_r.py
index d0eca0d..435ff69 100644
--- a/rdata/conversion/to_r.py
+++ b/rdata/conversion/to_r.py
@@ -373,11 +373,6 @@ def __init__(self, *,
         self._references: dict[str | None, tuple[int, RObject | None]] \
             = {None: (0, None)}
 
-        # In test files the order in which dataframe attributes are written varies.
-        # R can read files with attributes in any order, but this variable
-        # is used in tests to change the attribute order to match with the test file.
-        self.df_attr_order: list[str] | None = None
-
 
     def convert_to_r_data(self,
         data: Any,  # noqa: ANN401
@@ -605,11 +600,6 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
                 msg = f"type {type(data)} not implemented"
                 raise NotImplementedError(msg)
 
-            # Fix for test files where dataframe attribute order varies
-            assert isinstance(attributes, dict)
-            if isinstance(data, pd.DataFrame) and self.df_attr_order is not None:
-                attributes = {k: attributes[k] for k in self.df_attr_order}
-
         if attributes is not None:
             is_object = "class" in attributes
             r_attributes = self.convert_to_r_attributes(attributes)
diff --git a/rdata/tests/data/test_dataframe.rda b/rdata/tests/data/test_dataframe.rda
index bd83517e593aa88d3253e5941d95aa3dd87dbc85..61cbf300f618bd1ad0c0f45fe8558a065ac3bfe7 100644
GIT binary patch
literal 176
zcmV;h08jrPiwFP!0000016@xs4#F@DbmD;2NFXHs(SbMc0;X=rlynMIN<&%sI1O%^
zAT@*Svvt0+FX!f@8w~&wlt7TkP)SY#6jWUI-VFYFCr_QDhXKH2WCQNFVr0gCj@9PY
zn$g&S(P7b#Q$ZQr3jU_GG-Uc;q@&jNj?u(XT4E{j<P%HBq??+N?8_``7z#5zzgee0
eSHhL+-*pzBuaF;3A%t(bZ2AEc%<0QY0RR9edQkuX

literal 175
zcmV;g08sxQiwFP!0000016@$T4#FS|?dUWaiN?hL*uih`3q0*+>SQN$39xzf#~l_1
zCUR)|3h%u(y_~y~>?8mPKn{TnYr(048XB&9r>wi)$<biwp#bn0*?>E)7@4sT@u%Es
zH7jiy9pdl$T2ThKg1_lC*-ZbL4Ai^PFj_dukyu(hdBt*K@=eW5dQyd~G2^Sv(lA53
dIeG6tD-nIMl??Z2(?Q~9%NOEo@hD0G0019OQYrud

diff --git a/rdata/tests/data/test_dataframe.rds b/rdata/tests/data/test_dataframe.rds
index b5f238242691b4ef5071cd1149776e91b592ae93..bdfbdba8fd5174cfb34d5be2c40b19212b28df10 100644
GIT binary patch
literal 153
zcmV;K0A~LmiwFP!0000016_{64#FT1M3-GOX_F=<{zt#Z>ssjvZ9;3k`r{QBC_U_9
zV1_Jj_SOObf;9w4V(FSQP?v(pl$>NSXA6zoXYW?$XK|N8$ghU0xvTlp;CZg&2V~@7
znS!{VNLP|Zy!h#K<Idf~%Kq)jH;k&ReelBl7zCogO_)A?g{Lj>D@TlROPl=x#0|t9
HBLM&aYk)~Q

literal 152
zcmV;J0B8RniwFP!0000016@u*4#F@DOOt}MNt2NH$KJy=jP6vJ)QxfF<D;bQx<d|5
z?JBl&YXJbk8UiGtbj=#5OG03ZPOzA@g+|`9cdPTWuuDnEzlJNhtNqj9d9Le6$jHMo
z1z|sdt|T4t;-}M%d+sh)_HU<bO_)A?wWbdI%7`&;Wf7ygs(tXn{TRfOF8c$z(z<jb
G0RRB@BuZca

diff --git a/rdata/tests/data/test_dataframe_v3.rda b/rdata/tests/data/test_dataframe_v3.rda
index 01e2824b76b627252c23de483057d35dee3690c9..b5955f46da0266c4aad32ad784e62e1d438a1f05 100644
GIT binary patch
literal 187
zcmV;s07U;EiwFP!0000016@zS3c@f9P1jafK@t28ck$u}h#w#dyGfkgsXEx|y!zv=
zrdunlhomp*doRi9Q0`Qz0DyyF9JnM1@^U_I_W&ZJf`NdX-j&szz24Dc-Qq)&VnbbG
z@Z~7?%(3<{Q!b3|wdn}0$KR>TFoIjcmzBXT(*GtkR<>yg4HW4}o?1L<#dKou4MmUU
pr6(D8$uQ;5EW({L<GgL&MKt$YWFJb-xo_BP_yHbiYHCvf007u;S&#q#

literal 186
zcmV;r07d^FiwFP!0000023=6W3c@f9&DL$Of+F}I11Elget>w|P2%iMorAW{t3U2)
z+O2{;Bz-S^FA2Gvnmung05D($MNSZk<9SyfYJe;ymOut2y=x=1x!%!Wr?3}f*;17h
zY~}q<>A8zPXCcKXq$afSzjHOg0B(^!Zzbv=|265bHG?8FQN$xz+~Pqi#uJym3mH*9
oMPw`0@T!8<%}C#rthJw6$ZWI~clW54gZRy+FC~#dXH)?I0G2XRrvLx|

diff --git a/rdata/tests/data/test_dataframe_v3.rds b/rdata/tests/data/test_dataframe_v3.rds
index 6c2ada71662ce503d8b8490e3154ae34c70ae192..8e2492d66cc9b8feba05da9e8f0ec44a3061bc9b 100644
GIT binary patch
delta 73
zcmV-P0Ji_60i*$tG6Kdkku)?sO?vF6!4vvY@kqrjUih>;ar52Cf^N2XO$HWLKXAgP
f_hiD1D?hxOVji!MA9_LvYrO0SYD{WRECB!j*sUMu

delta 73
zcmV-P0Ji_60i*$tG6H5iku)?sm-N_8gD3Q*;*pB4c;VCX#BJ|J7Id>^w#pChrdVT!
f{Lm9ZShG+Dc2)hr37g)NC0_Oe6q6JtECB!jCoCbk

diff --git a/rdata/tests/test_rdata.py b/rdata/tests/test_rdata.py
index 2ab8a4d..6972bcd 100644
--- a/rdata/tests/test_rdata.py
+++ b/rdata/tests/test_rdata.py
@@ -454,6 +454,9 @@ def test_encodings_v3(self) -> None:
 
     def test_dataframe(self) -> None:
         """Test dataframe conversion."""
+        # Files created in R with
+        # test_dataframe = data.frame(class=factor(c("a", "b", "b")), value=c(1L, 2L, 3L)); save(test_dataframe, file="test_dataframe.rda", version=2)  # noqa: E501
+        # test_dataframe = data.frame(class=factor(c("a", "b", "b")), value=c(1L, 2L, 3L)); save(test_dataframe, file="test_dataframe_v3.rda")  # noqa: E501
         for f in ("test_dataframe.rda", "test_dataframe_v3.rda"):
             with self.subTest(file=f):
                 data = rdata.read_rda(TESTDATA_PATH / f)
@@ -476,6 +479,9 @@ def test_dataframe(self) -> None:
 
     def test_dataframe_rds(self) -> None:
         """Test dataframe conversion."""
+        # Files created in R with
+        # df = data.frame(class=factor(c("a", "b", "b")), value=c(1L, 2L, 3L)); saveRDS(df, file="test_dataframe.rds", version=2)  # noqa: E501
+        # df = data.frame(class=factor(c("a", "b", "b")), value=c(1L, 2L, 3L)); saveRDS(df, file="test_dataframe_v3.rds")  # noqa: E501
         for f in ("test_dataframe.rds", "test_dataframe_v3.rds"):
             with self.subTest(file=f):
                 data = rdata.read_rds(TESTDATA_PATH / f)
diff --git a/rdata/tests/test_write.py b/rdata/tests/test_write.py
index 000704c..96dc96c 100644
--- a/rdata/tests/test_write.py
+++ b/rdata/tests/test_write.py
@@ -138,13 +138,6 @@ def test_convert_to_r(fname: str, expand_altrep: bool) -> None:  # noqa: FBT001
             format_version=r_data.versions.format,
             r_version_serialized=r_data.versions.serialized,
         )
-        if fname in [
-            "test_dataframe.rda",
-            "test_dataframe.rds",
-            "test_dataframe_v3.rda",
-            "test_dataframe_v3.rds",
-        ]:
-            converter.df_attr_order = ["names", "row.names", "class"]
 
         try:
             new_r_data = converter.convert_to_r_data(py_data, file_type=file_type)

From 1a00c1db63de5951ae929efc34a628fc7575309f Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Fri, 25 Oct 2024 14:46:50 +0300
Subject: [PATCH 080/100] Skip altreps with attributes in test

---
 rdata/tests/test_write.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/rdata/tests/test_write.py b/rdata/tests/test_write.py
index 96dc96c..575f23e 100644
--- a/rdata/tests/test_write.py
+++ b/rdata/tests/test_write.py
@@ -108,10 +108,14 @@ def test_convert_to_r(fname: str, expand_altrep: bool) -> None:  # noqa: FBT001
     with (TESTDATA_PATH / fname).open("rb") as f:
         # Skip test files without unique R->py->R transformation
         if fname in [
-            "test_encodings.rda",     # encoding not kept in Python
-            "test_encodings_v3.rda",  # encoding not kept in Python
-            "test_list_attrs.rda",    # attributes not kept in Python
-            "test_file.rda",          # attributes not kept in Python
+            # encoding not kept in Python
+            "test_encodings.rda",
+            "test_encodings_v3.rda",
+            # attributes not kept in Python
+            "test_list_attrs.rda",
+            "test_file.rda",
+            "test_altrep_wrap_real_attributes.rds",
+            "test_altrep_wrap_real_class_attribute.rds",
         ]:
             pytest.skip("ambiguous R->py->R transformation")
 

From ff6b6a9bf2aef6727fabe10c0738aa6e7216f75d Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Fri, 25 Oct 2024 14:47:21 +0300
Subject: [PATCH 081/100] Fix ruff

---
 rdata/unparser/_ascii.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rdata/unparser/_ascii.py b/rdata/unparser/_ascii.py
index 6bb694b..b9ce4f4 100644
--- a/rdata/unparser/_ascii.py
+++ b/rdata/unparser/_ascii.py
@@ -13,7 +13,7 @@
 
 if TYPE_CHECKING:
     import io
-    from typing import Any, Final
+    from typing import Final
 
     import numpy.typing as npt
 

From daf1e3a4cdbdc904612d70fd143bded24ac47c47 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Fri, 25 Oct 2024 14:49:12 +0300
Subject: [PATCH 082/100] Filter expected warnings

---
 rdata/tests/test_rdata.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/rdata/tests/test_rdata.py b/rdata/tests/test_rdata.py
index 6972bcd..d06df71 100644
--- a/rdata/tests/test_rdata.py
+++ b/rdata/tests/test_rdata.py
@@ -808,6 +808,7 @@ def test_altrep_wrap_real_attributes(self) -> None:
         data = rdata.conversion.convert(parsed)
         np.testing.assert_equal(data, [1., 2., 3.])
 
+    @pytest.mark.filterwarnings("ignore:Missing constructor")
     def test_altrep_wrap_real_class_attribute(self) -> None:
         """Test alternative representation of wrap_real with class attribute."""
         # File created in R with

From 943e69747fbede54cd3e7398c4f7689fe7e95bea Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Fri, 25 Oct 2024 15:13:25 +0300
Subject: [PATCH 083/100] Pass converter object to constructor functions

---
 rdata/conversion/to_r.py | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/rdata/conversion/to_r.py b/rdata/conversion/to_r.py
index 435ff69..c2cb188 100644
--- a/rdata/conversion/to_r.py
+++ b/rdata/conversion/to_r.py
@@ -35,19 +35,21 @@
 
     Encoding = Literal["utf-8", "cp1252"]
 
-    class ConversionFunction(Protocol):
-        """Protocol for Py-to-R conversion function."""
-
-        def __call__(self, data: Any) -> RObject:  # noqa: ANN401
-            """Convert Python object to R object."""
+    class Converter(Protocol):
+        """Protocol for class converting Python objects to R objects."""
 
+        def convert_to_r_object(
+            self,
+            data: Any,  # noqa: ANN401
+        ) -> RObject:
+            """Convert Python data to R object."""
 
     class ConstructorFunction(Protocol):
         """Protocol for Py-to-R constructor function."""
 
         def __call__(self,
             data: Any,  # noqa: ANN401
-            convert_to_r_object: ConversionFunction,
+            converter: Converter,
         ) -> tuple[RObjectType, Any, dict[str, Any]]:
             """Convert Python object to R object components."""
 
@@ -69,14 +71,14 @@ def __call__(self,
 
 def categorical_constructor(
     data: pd.Categorical,
-    convert_to_r_object: ConversionFunction,  # noqa: ARG001
+    converter: Converter,  # noqa: ARG001
 ) -> tuple[RObjectType, Any, dict[str, Any]]:
     """
     Construct R object components from pandas categorical.
 
     Args:
         data: Pandas categorical.
-        convert_to_r_object: Conversion function.
+        converter: Python-to-R converter.
 
     Returns:
         Components of the R object.
@@ -93,14 +95,14 @@ def categorical_constructor(
 
 def dataframe_constructor(
     data: pd.DataFrame,
-    convert_to_r_object: ConversionFunction,
+    converter: Converter,
 ) -> tuple[RObjectType, Any, dict[str, Any]]:
     """
     Construct R object components from pandas dataframe.
 
     Args:
         data: Pandas dataframe.
-        convert_to_r_object: Conversion function.
+        converter: Python-to-R converter.
 
     Returns:
         Components of the R object.
@@ -119,7 +121,7 @@ def dataframe_constructor(
             array = pd_array
         else:
             array = convert_pd_array_to_np_array(pd_array)
-        r_series = convert_to_r_object(array)
+        r_series = converter.convert_to_r_object(array)
         r_value.append(r_series)
 
     index = data.index
@@ -592,8 +594,7 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
             # Check available constructors
             for t, constructor in self.constructor_dict.items():
                 if isinstance(data, t):
-                    r_type, r_value, attributes \
-                        = constructor(data, self.convert_to_r_object)
+                    r_type, r_value, attributes = constructor(data, self)
                     break
 
             if r_type is None:

From 8a269ae91c0da35f92846ee62211fc74931f5c5f Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Fri, 25 Oct 2024 15:34:26 +0300
Subject: [PATCH 084/100] Allow constructor functions without converter

---
 rdata/conversion/to_r.py | 33 ++++++++++++++++++---------------
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/rdata/conversion/to_r.py b/rdata/conversion/to_r.py
index c2cb188..06f141c 100644
--- a/rdata/conversion/to_r.py
+++ b/rdata/conversion/to_r.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+import inspect
 import string
 from types import MappingProxyType
 from typing import TYPE_CHECKING
@@ -26,7 +27,7 @@
 )
 
 if TYPE_CHECKING:
-    from collections.abc import Mapping
+    from collections.abc import Callable, Mapping
     from typing import Any, Final, Literal, Protocol
 
     import numpy.typing as npt
@@ -44,16 +45,11 @@ def convert_to_r_object(
         ) -> RObject:
             """Convert Python data to R object."""
 
-    class ConstructorFunction(Protocol):
-        """Protocol for Py-to-R constructor function."""
+    ConstructorReturnValue = tuple[RObjectType, Any, dict[str, Any]]
+    ConstructorFunction1 = Callable[[Any], ConstructorReturnValue]
+    ConstructorFunction2 = Callable[[Any, Converter], ConstructorReturnValue]
 
-        def __call__(self,
-            data: Any,  # noqa: ANN401
-            converter: Converter,
-        ) -> tuple[RObjectType, Any, dict[str, Any]]:
-            """Convert Python object to R object components."""
-
-    ConstructorDict = Mapping[type, ConstructorFunction]
+    ConstructorDict = Mapping[type, ConstructorFunction1 | ConstructorFunction2]
 
 
 # Default values for RVersions object
@@ -71,14 +67,12 @@ def __call__(self,
 
 def categorical_constructor(
     data: pd.Categorical,
-    converter: Converter,  # noqa: ARG001
-) -> tuple[RObjectType, Any, dict[str, Any]]:
+) -> ConstructorReturnValue:
     """
     Construct R object components from pandas categorical.
 
     Args:
         data: Pandas categorical.
-        converter: Python-to-R converter.
 
     Returns:
         Components of the R object.
@@ -96,7 +90,7 @@ def categorical_constructor(
 def dataframe_constructor(
     data: pd.DataFrame,
     converter: Converter,
-) -> tuple[RObjectType, Any, dict[str, Any]]:
+) -> ConstructorReturnValue:
     """
     Construct R object components from pandas dataframe.
 
@@ -594,7 +588,16 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
             # Check available constructors
             for t, constructor in self.constructor_dict.items():
                 if isinstance(data, t):
-                    r_type, r_value, attributes = constructor(data, self)
+                    n_params = len(inspect.signature(constructor).parameters)
+                    args: tuple[Any] | tuple[Any, Converter]
+                    if n_params == 1:
+                        args = (data,)
+                    elif n_params == 2:  # noqa: PLR2004
+                        args = (data, self)
+                    else:
+                        msg = "constructor function has wrong call signature"
+                        raise ValueError(msg)
+                    r_type, r_value, attributes = constructor(*args)
                     break
 
             if r_type is None:

From 9718161022b8f6946d635138040051fee962c133 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Mon, 28 Oct 2024 08:48:36 +0300
Subject: [PATCH 085/100] Convert only pandas rangeindex to altrep

---
 rdata/conversion/to_r.py | 84 +++++++++++++++++++++++++---------------
 1 file changed, 52 insertions(+), 32 deletions(-)

diff --git a/rdata/conversion/to_r.py b/rdata/conversion/to_r.py
index 06f141c..335eb6f 100644
--- a/rdata/conversion/to_r.py
+++ b/rdata/conversion/to_r.py
@@ -38,14 +38,15 @@
 
     class Converter(Protocol):
         """Protocol for class converting Python objects to R objects."""
+        format_version: int
 
-        def convert_to_r_object(
-            self,
-            data: Any,  # noqa: ANN401
-        ) -> RObject:
+        def convert_to_r_sym(self, name: str) -> RObject:
+            """Convert string to R symbol."""
+
+        def convert_to_r_object(self, data: Any) -> RObject:  # noqa: ANN401
             """Convert Python data to R object."""
 
-    ConstructorReturnValue = tuple[RObjectType, Any, dict[str, Any]]
+    ConstructorReturnValue = tuple[RObjectType, Any, dict[str, Any] | None]
     ConstructorFunction1 = Callable[[Any], ConstructorReturnValue]
     ConstructorFunction2 = Callable[[Any, Converter], ConstructorReturnValue]
 
@@ -131,7 +132,7 @@ def dataframe_constructor(
                 fill_value=R_INT_NA,
             )
         else:
-            row_names = range(index.start, index.stop, index.step)
+            row_names = index
     elif isinstance(index, pd.Index):
         if (index.dtype == "object"
             or np.issubdtype(str(index.dtype), np.integer)):
@@ -151,9 +152,54 @@ def dataframe_constructor(
     return r_type, r_value, attributes
 
 
+def rangeindex_constructor(
+    data: pd.RangeIndex,
+    converter: Converter,
+) -> ConstructorReturnValue:
+    """
+    Construct R object components from pandas rangeindex.
+
+    Args:
+        data: Pandas rangeindex.
+        converter: Python-to-R converter.
+
+    Returns:
+        Components of the R object.
+    """
+    assert isinstance(data, pd.RangeIndex)
+    if converter.format_version < R_MINIMUM_VERSION_WITH_ALTREP:
+        # ALTREP support is from R version 3.5.0
+        # (minimum version for format version 3)
+        return RObjectType.INT, np.array(data), None
+
+    assert isinstance(data.step, int)
+    if data.step != 1:
+        # R supports compact sequences only with step 1;
+        # convert the range to an array of values
+        return RObjectType.INT, np.array(data), None
+
+    r_type = RObjectType.ALTREP
+    r_value = (
+        build_r_list([
+            converter.convert_to_r_sym("compact_intseq"),
+            converter.convert_to_r_sym("base"),
+            converter.convert_to_r_object(RObjectType.INT.value),
+        ]),
+        converter.convert_to_r_object(np.array([
+            len(data),
+            data.start,
+            data.step,
+        ], dtype=float)),
+        converter.convert_to_r_object(None),
+    )
+    attributes = None
+    return r_type, r_value, attributes
+
+
 DEFAULT_CONSTRUCTOR_DICT: Final[ConstructorDict] = MappingProxyType({
     pd.Categorical: categorical_constructor,
     pd.DataFrame: dataframe_constructor,
+    pd.RangeIndex: rangeindex_constructor,
 })
 
 
@@ -558,32 +604,6 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
         elif isinstance(data, (bool, int, float, complex, str, bytes)):
             return self.convert_to_r_object(np.array(data))
 
-        elif isinstance(data, range):
-            if self.format_version < R_MINIMUM_VERSION_WITH_ALTREP:
-                # ALTREP support is from R version 3.5.0
-                # (minimum version for format version 3)
-                return self.convert_to_r_object(np.array(data))
-
-            if data.step != 1:
-                # R supports compact sequences only with step 1;
-                # convert the range to an array of values
-                return self.convert_to_r_object(np.array(data))
-
-            r_type = RObjectType.ALTREP
-            r_value = (
-                build_r_list([
-                    self.convert_to_r_sym("compact_intseq"),
-                    self.convert_to_r_sym("base"),
-                    self.convert_to_r_object(RObjectType.INT.value),
-                ]),
-                self.convert_to_r_object(np.array([
-                    len(data),
-                    data.start,
-                    data.step,
-                ], dtype=float)),
-                self.convert_to_r_object(None),
-            )
-
         else:
             # Check available constructors
             for t, constructor in self.constructor_dict.items():

From a7c7066a8e65e58c4f406039ba7641dd469681f9 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Mon, 28 Oct 2024 09:08:16 +0300
Subject: [PATCH 086/100] Use more robust indexing

---
 rdata/tests/test_write.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/rdata/tests/test_write.py b/rdata/tests/test_write.py
index 575f23e..6e543a8 100644
--- a/rdata/tests/test_write.py
+++ b/rdata/tests/test_write.py
@@ -239,15 +239,16 @@ def test_convert_dataframe_pandas_dtypes() -> None:
         index=range(3),
     )
 
+    index = pd.RangeIndex(3)
     df2 = pd.DataFrame(
         {
-            "int": pd.Series([10, 20, 30], dtype=pd.Int32Dtype()),
-            "float": pd.Series([1.1, 2.2, 3.3], dtype=pd.Float64Dtype()),
-            "string": pd.Series(["x" ,"y", "z"], dtype=pd.StringDtype()),
-            "bool": pd.Series([True, False, True], dtype=pd.BooleanDtype()),
-            "complex": pd.Series([4+5j, 6+7j, 8+9j], dtype=complex),
+            "int": pd.Series([10, 20, 30], dtype=pd.Int32Dtype(), index=index),
+            "float": pd.Series([1.1, 2.2, 3.3], dtype=pd.Float64Dtype(), index=index),
+            "string": pd.Series(["x" ,"y", "z"], dtype=pd.StringDtype(), index=index),
+            "bool": pd.Series([1, 0, 1], dtype=pd.BooleanDtype(), index=index),
+            "complex": pd.Series([4+5j, 6+7j, 8+9j], dtype=complex, index=index),
         },
-        index=pd.RangeIndex(3),
+        index=index,
     )
 
     r_obj1 = convert_python_to_r_object(df1)

From 5a430aaf09461f3a2596f9f615bbeda70c7b97a3 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Mon, 28 Oct 2024 09:08:37 +0300
Subject: [PATCH 087/100] Add tests for rangeindex

---
 rdata/parser/_parser.py   |  3 +++
 rdata/tests/test_write.py | 28 ++++++++++++++++++++++++++++
 2 files changed, 31 insertions(+)

diff --git a/rdata/parser/_parser.py b/rdata/parser/_parser.py
index dbf2cc5..8c90486 100644
--- a/rdata/parser/_parser.py
+++ b/rdata/parser/_parser.py
@@ -369,6 +369,9 @@ def __eq__(self, other: object) -> bool:
             return False
 
         # Compare value field
+        if not isinstance(other.value, type(self.value)):
+            return False
+
         if isinstance(self.value, np.ndarray):
             if not np.array_equal(self.value, other.value, equal_nan=True):
                 return False
diff --git a/rdata/tests/test_write.py b/rdata/tests/test_write.py
index 6e543a8..9034055 100644
--- a/rdata/tests/test_write.py
+++ b/rdata/tests/test_write.py
@@ -258,6 +258,34 @@ def test_convert_dataframe_pandas_dtypes() -> None:
     assert r_obj1 == r_obj2
 
 
+def test_convert_dataframe_rangeindex() -> None:
+    """Test converting dataframe with rangeindex."""
+    data = {"data": np.array([10, 20, 30], dtype=np.int32)}
+
+    df1 = pd.DataFrame(data, index=pd.RangeIndex(3))
+    df2 = pd.DataFrame(data, index=pd.Index([0, 1, 2]))
+
+    r_obj1 = convert_python_to_r_object(df1)
+    r_obj2 = convert_python_to_r_object(df2)
+
+    assert str(r_obj1) != str(r_obj2)
+    assert r_obj1 != r_obj2
+
+
+def test_convert_dataframe_rangeindex_flattened() -> None:
+    """Test converting dataframe with rangeindex."""
+    data = {"data": np.array([10, 20, 30], dtype=np.int32)}
+
+    df1 = pd.DataFrame(data, index=pd.RangeIndex(3, 8, 2))
+    df2 = pd.DataFrame(data, index=pd.Index([3, 5, 7]))
+
+    r_obj1 = convert_python_to_r_object(df1)
+    r_obj2 = convert_python_to_r_object(df2)
+
+    assert str(r_obj1) == str(r_obj2)
+    assert r_obj1 == r_obj2
+
+
 @pytest.mark.parametrize("compression", [*valid_compressions, "fail"])
 @pytest.mark.parametrize("file_format", [*valid_formats, None, "fail"])
 @pytest.mark.parametrize("file_type", ["rds", "rda"])

From 87f4c652a5b7f33895164d4810bd5ccd188bf589 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Mon, 28 Oct 2024 09:14:37 +0300
Subject: [PATCH 088/100] Remove conversion of altrep to range

---
 rdata/conversion/_conversion.py | 37 ---------------------------------
 1 file changed, 37 deletions(-)

diff --git a/rdata/conversion/_conversion.py b/rdata/conversion/_conversion.py
index 8530b8a..dcd5305 100644
--- a/rdata/conversion/_conversion.py
+++ b/rdata/conversion/_conversion.py
@@ -14,8 +14,6 @@
 import xarray
 from typing_extensions import override
 
-from rdata.parser._parser import get_altrep_name
-
 from .. import parser
 
 ConversionFunction = Callable[[Union[parser.RData, parser.RObject]], Any]
@@ -396,38 +394,6 @@ def convert_array(
     return value  # type: ignore [no-any-return]
 
 
-def convert_altrep_to_range(
-    r_altrep: parser.RObject,
-) -> range:
-    """
-    Convert a R altrep to range object.
-
-    Args:
-        r_altrep: R altrep object
-
-    Returns:
-        Range object.
-    """
-    if r_altrep.info.type != parser.RObjectType.ALTREP:
-        msg = "Must receive an altrep object"
-        raise TypeError(msg)
-
-    info, state, attr = r_altrep.value
-    assert attr.info.type == parser.RObjectType.NILVALUE
-
-    altrep_name = get_altrep_name(info)
-
-    if altrep_name != b"compact_intseq":
-        msg = "Only compact integer sequences can be converted to range"
-        raise NotImplementedError(msg)
-
-    n = int(state.value[0])
-    start = int(state.value[1])
-    step = int(state.value[2])
-    stop = start + (n - 1) * step
-    return range(start, stop + 1, step)
-
-
 def _dataframe_column_transform(source: Any) -> Any:  # noqa: ANN401
 
     if isinstance(source, np.ndarray):
@@ -872,9 +838,6 @@ def _convert_next(  # noqa: C901, PLR0912, PLR0915
 
             value = None
 
-        elif obj.info.type == parser.RObjectType.ALTREP:
-            value = convert_altrep_to_range(obj)
-
         else:
             msg = f"Type {obj.info.type} not implemented"
             raise NotImplementedError(msg)

From 25a14af66554caec8fb4d888e8667f0994563bc2 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Mon, 28 Oct 2024 11:00:52 +0200
Subject: [PATCH 089/100] Clarify skip message

---
 rdata/tests/test_write.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/rdata/tests/test_write.py b/rdata/tests/test_write.py
index 9034055..e733518 100644
--- a/rdata/tests/test_write.py
+++ b/rdata/tests/test_write.py
@@ -106,7 +106,7 @@ def test_unparse(fname: str) -> None:
 def test_convert_to_r(fname: str, expand_altrep: bool) -> None:  # noqa: FBT001
     """Test converting Python data to RData object."""
     with (TESTDATA_PATH / fname).open("rb") as f:
-        # Skip test files without unique R->py->R transformation
+        # Skip test files without unique transformation
         if fname in [
             # encoding not kept in Python
             "test_encodings.rda",
@@ -117,7 +117,7 @@ def test_convert_to_r(fname: str, expand_altrep: bool) -> None:  # noqa: FBT001
             "test_altrep_wrap_real_attributes.rds",
             "test_altrep_wrap_real_class_attribute.rds",
         ]:
-            pytest.skip("ambiguous R->py->R transformation")
+            pytest.skip("ambiguous R-to-Python-to-R transformation")
 
         data = decompress_data(f.read())
         file_type, file_format = parse_file_type_and_format(data)

From 798408927426ee0a7269f5a84d43a381fd62ccd1 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Mon, 28 Oct 2024 12:29:18 +0200
Subject: [PATCH 090/100] Fix ruff formatting

---
 rdata/_write.py              |  1 +
 rdata/conversion/__init__.py |  1 +
 rdata/conversion/to_r.py     | 67 +++++++++++++++++++-----------------
 rdata/parser/_ascii.py       |  7 ++--
 rdata/parser/_xdr.py         |  6 ++--
 rdata/unparser/__init__.py   | 40 +++++++++++----------
 rdata/unparser/_ascii.py     |  5 +--
 rdata/unparser/_unparser.py  |  3 +-
 rdata/unparser/_xdr.py       |  3 +-
 9 files changed, 72 insertions(+), 61 deletions(-)

diff --git a/rdata/_write.py b/rdata/_write.py
index 3a03128..a534af0 100644
--- a/rdata/_write.py
+++ b/rdata/_write.py
@@ -1,4 +1,5 @@
 """Functions to perform conversion and unparsing in one step."""
+
 from __future__ import annotations
 
 from typing import TYPE_CHECKING
diff --git a/rdata/conversion/__init__.py b/rdata/conversion/__init__.py
index 3d3a699..ab13caa 100644
--- a/rdata/conversion/__init__.py
+++ b/rdata/conversion/__init__.py
@@ -1,4 +1,5 @@
 """Utilities for converting R objects to Python ones."""
+
 from ._conversion import (
     DEFAULT_CLASS_MAP as DEFAULT_CLASS_MAP,
     Converter as Converter,
diff --git a/rdata/conversion/to_r.py b/rdata/conversion/to_r.py
index 335eb6f..7ecf7f5 100644
--- a/rdata/conversion/to_r.py
+++ b/rdata/conversion/to_r.py
@@ -38,6 +38,7 @@
 
     class Converter(Protocol):
         """Protocol for class converting Python objects to R objects."""
+
         format_version: int
 
         def convert_to_r_sym(self, name: str) -> RObject:
@@ -122,10 +123,7 @@ def dataframe_constructor(
     index = data.index
     if isinstance(index, pd.RangeIndex):
         assert isinstance(index.start, int)
-        if (index.start == 1
-            and index.stop == data.shape[0] + 1
-            and index.step == 1
-        ):
+        if index.start == 1 and index.stop == data.shape[0] + 1 and index.step == 1:
             row_names = np.ma.array(
                 data=[R_INT_NA, -data.shape[0]],
                 mask=[True, False],
@@ -134,8 +132,7 @@ def dataframe_constructor(
         else:
             row_names = index
     elif isinstance(index, pd.Index):
-        if (index.dtype == "object"
-            or np.issubdtype(str(index.dtype), np.integer)):
+        if index.dtype == "object" or np.issubdtype(str(index.dtype), np.integer):
             row_names = index.to_numpy()
         else:
             msg = f"pd.DataFrame pd.Index {index.dtype} not implemented"
@@ -296,10 +293,11 @@ def build_r_object(
     """
     assert r_type is not None
     reference_id, referenced_object = reference
-    assert ((reference_id == 0)
-            == (referenced_object is None)
-            == (r_type != RObjectType.REF)
-            )
+    assert (
+        (reference_id == 0)
+        == (referenced_object is None)
+        == (r_type != RObjectType.REF)
+    )
     return RObject(
         RObjectInfo(
             r_type,
@@ -308,12 +306,12 @@ def build_r_object(
             tag=tag is not None,
             gp=gp,
             reference=reference_id,
-         ),
-         value,
-         attributes,
-         tag,
-         referenced_object,
-     )
+        ),
+        value,
+        attributes,
+        tag,
+        referenced_object,
+    )
 
 
 def build_r_list(
@@ -393,7 +391,10 @@ class ConverterFromPythonToR:
         r_version_serialized: R version written as the creator of the object.
         constructor_dict: Dictionary mapping Python types to R classes.
     """
-    def __init__(self, *,
+
+    def __init__(
+        self,
+        *,
         encoding: Encoding = "utf-8",
         format_version: int = DEFAULT_FORMAT_VERSION,
         r_version_serialized: int = DEFAULT_R_VERSION_SERIALIZED,
@@ -412,11 +413,12 @@ def __init__(self, *,
         self.format_version = format_version
         self.r_version_serialized = r_version_serialized
         self.constructor_dict = constructor_dict
-        self._references: dict[str | None, tuple[int, RObject | None]] \
-            = {None: (0, None)}
+        self._references: dict[str | None, tuple[int, RObject | None]] = {
+            None: (0, None),
+        }
 
-
-    def convert_to_r_data(self,
+    def convert_to_r_data(
+        self,
         data: Any,  # noqa: ANN401
         *,
         file_type: FileType = "rds",
@@ -451,14 +453,16 @@ def convert_to_r_data(self,
             R_MINIMUM_VERSIONS[self.format_version],
         )
 
-        extra = (RExtraInfo(self.encoding.upper())
-                 if versions.format >= R_MINIMUM_VERSION_WITH_ENCODING
-                 else RExtraInfo(None))
+        extra = (
+            RExtraInfo(self.encoding.upper())
+            if versions.format >= R_MINIMUM_VERSION_WITH_ENCODING
+            else RExtraInfo(None)
+        )
 
         return RData(versions, extra, r_object)
 
-
-    def convert_to_r_attributes(self,
+    def convert_to_r_attributes(
+        self,
         data: dict[str, Any],
     ) -> RObject:
         """
@@ -479,8 +483,8 @@ def convert_to_r_attributes(self,
 
         return build_r_list(converted)
 
-
-    def convert_to_r_sym(self,
+    def convert_to_r_sym(
+        self,
         name: str,
     ) -> RObject:
         """
@@ -507,8 +511,8 @@ def convert_to_r_sym(self,
         self._references[name] = (len(self._references), r_object)
         return r_object
 
-
-    def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
+    def convert_to_r_object(  # noqa: C901, PLR0912, PLR0915
+        self,
         data: Any,  # noqa: ANN401
     ) -> RObject:
         """
@@ -583,8 +587,7 @@ def convert_to_r_object(self,  # noqa: C901, PLR0912, PLR0915
             elif data.dtype.kind in ["U"]:
                 assert data.ndim == 1
                 r_type = RObjectType.STR
-                r_value = [build_r_char(el, encoding=self.encoding)
-                           for el in data]
+                r_value = [build_r_char(el, encoding=self.encoding) for el in data]
 
             else:
                 r_type = {
diff --git a/rdata/parser/_ascii.py b/rdata/parser/_ascii.py
index 23d22b1..a79d2ba 100644
--- a/rdata/parser/_ascii.py
+++ b/rdata/parser/_ascii.py
@@ -40,11 +40,10 @@ def _readline(self) -> str:
         return self.file.readline()[:-1]
 
     def _parse_array_values(
-            self,
-            dtype: npt.DTypeLike,
-            length: int,
+        self,
+        dtype: npt.DTypeLike,
+        length: int,
     ) -> npt.NDArray[Any]:
-
         array = np.empty(length, dtype=dtype)
         value: int | float | complex
 
diff --git a/rdata/parser/_xdr.py b/rdata/parser/_xdr.py
index 6d265dd..fe5211a 100644
--- a/rdata/parser/_xdr.py
+++ b/rdata/parser/_xdr.py
@@ -26,9 +26,9 @@ def __init__(
         self.file = io.BytesIO(data)
 
     def _parse_array_values(
-            self,
-            dtype: npt.DTypeLike,
-            length: int,
+        self,
+        dtype: npt.DTypeLike,
+        length: int,
     ) -> npt.NDArray[Any]:
         dtype = np.dtype(dtype)
         buffer = self.file.read(length * dtype.itemsize)
diff --git a/rdata/unparser/__init__.py b/rdata/unparser/__init__.py
index 0fdc243..02a189e 100644
--- a/rdata/unparser/__init__.py
+++ b/rdata/unparser/__init__.py
@@ -25,12 +25,12 @@
 
 
 def unparse_file(
-        path: os.PathLike[Any] | str,
-        r_data: RData,
-        *,
-        file_format: FileFormat = "xdr",
-        file_type: FileType = "rds",
-        compression: Compression = "gzip",
+    path: os.PathLike[Any] | str,
+    r_data: RData,
+    *,
+    file_format: FileFormat = "xdr",
+    file_type: FileType = "rds",
+    compression: Compression = "gzip",
 ) -> None:
     """
     Unparse RData object to a file.
@@ -59,11 +59,11 @@ def unparse_file(
 
 
 def unparse_fileobj(
-        fileobj: IO[Any],
-        r_data: RData,
-        *,
-        file_format: FileFormat = "xdr",
-        file_type: FileType = "rds",
+    fileobj: IO[Any],
+    r_data: RData,
+    *,
+    file_format: FileFormat = "xdr",
+    file_type: FileType = "rds",
 ) -> None:
     """
     Unparse RData object to a file object.
@@ -78,9 +78,11 @@ def unparse_fileobj(
 
     if file_format == "ascii":
         from ._ascii import UnparserASCII as Unparser
+
         rda_magic = "RDA"
     elif file_format == "xdr":
         from ._xdr import UnparserXDR as Unparser
+
         rda_magic = "RDX"
     else:
         msg = f"Unknown file format: {file_format}"
@@ -89,9 +91,11 @@ def unparse_fileobj(
     # Check that RData object for rda file is of correct kind
     if file_type == "rda":
         r_object = r_data.object
-        if not (r_object.info.type is RObjectType.LIST
-                and r_object.tag is not None
-                and r_object.tag.info.type is RObjectType.SYM):
+        if not (
+            r_object.info.type is RObjectType.LIST
+            and r_object.tag is not None
+            and r_object.tag.info.type is RObjectType.SYM
+        ):
             msg = "r_data object must be dictionary-like for rda file"
             raise ValueError(msg)
 
@@ -104,10 +108,10 @@ def unparse_fileobj(
 
 
 def unparse_data(
-        r_data: RData,
-        *,
-        file_format: FileFormat = "xdr",
-        file_type: FileType = "rds",
+    r_data: RData,
+    *,
+    file_format: FileFormat = "xdr",
+    file_type: FileType = "rds",
 ) -> bytes:
     """
     Unparse RData object to a bytestring.
diff --git a/rdata/unparser/_ascii.py b/rdata/unparser/_ascii.py
index b9ce4f4..0881631 100644
--- a/rdata/unparser/_ascii.py
+++ b/rdata/unparser/_ascii.py
@@ -35,7 +35,7 @@ def escape(b: bytes) -> str:
         byte_to_str[byte] = escape(bytes([byte]))
 
     # Update mapping for special characters
-    byte_to_str[b'"'[0]] = r'\"'
+    byte_to_str[b'"'[0]] = r"\""
     byte_to_str[b"'"[0]] = r"\'"
     byte_to_str[b"?"[0]] = r"\?"
     byte_to_str[b" "[0]] = r"\040"
@@ -68,7 +68,8 @@ def unparse_magic(self) -> None:
         """Unparse magic bits."""
         self._add_line("A")
 
-    def _unparse_array_values_raw(self,
+    def _unparse_array_values_raw(
+        self,
         array: npt.NDArray[np.int32 | np.float64 | np.complex128],
     ) -> None:
         # Convert complex to pairs of floats
diff --git a/rdata/unparser/_unparser.py b/rdata/unparser/_unparser.py
index 6079e89..b17630e 100644
--- a/rdata/unparser/_unparser.py
+++ b/rdata/unparser/_unparser.py
@@ -94,7 +94,8 @@ def _unparse_array_values(self, array: npt.NDArray[Any]) -> None:
         self._unparse_array_values_raw(array)
 
     @abc.abstractmethod
-    def _unparse_array_values_raw(self,
+    def _unparse_array_values_raw(
+        self,
         array: npt.NDArray[np.int32 | np.float64 | np.complex128],
     ) -> None:
         """Unparse the values of an array as such."""
diff --git a/rdata/unparser/_xdr.py b/rdata/unparser/_xdr.py
index 0c6d3fc..255c182 100644
--- a/rdata/unparser/_xdr.py
+++ b/rdata/unparser/_xdr.py
@@ -27,7 +27,8 @@ def unparse_magic(self) -> None:
         """Unparse magic bits."""
         self.file.write(b"X\n")
 
-    def _unparse_array_values_raw(self,
+    def _unparse_array_values_raw(
+        self,
         array: npt.NDArray[np.int32 | np.float64 | np.complex128],
     ) -> None:
         # Convert to big endian if needed

From 9570204680b62608ea79ce636e8a007a57baf9fe Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Wed, 22 Jan 2025 14:25:34 +0200
Subject: [PATCH 091/100] Fix docstring

---
 rdata/conversion/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rdata/conversion/__init__.py b/rdata/conversion/__init__.py
index ab13caa..c5855df 100644
--- a/rdata/conversion/__init__.py
+++ b/rdata/conversion/__init__.py
@@ -1,4 +1,4 @@
-"""Utilities for converting R objects to Python ones."""
+"""Utilities for converting between R and Python objects."""
 
 from ._conversion import (
     DEFAULT_CLASS_MAP as DEFAULT_CLASS_MAP,

From b057ee18cebd814f405b08792f17bb8d1645a6bd Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Wed, 22 Jan 2025 14:35:58 +0200
Subject: [PATCH 092/100] Include converter always in constructor functions

This reverts commit 8a269ae91c0da35f92846ee62211fc74931f5c5f.
---
 rdata/conversion/to_r.py | 20 +++++---------------
 1 file changed, 5 insertions(+), 15 deletions(-)

diff --git a/rdata/conversion/to_r.py b/rdata/conversion/to_r.py
index 7ecf7f5..62e5d6e 100644
--- a/rdata/conversion/to_r.py
+++ b/rdata/conversion/to_r.py
@@ -2,7 +2,6 @@
 
 from __future__ import annotations
 
-import inspect
 import string
 from types import MappingProxyType
 from typing import TYPE_CHECKING
@@ -48,10 +47,8 @@ def convert_to_r_object(self, data: Any) -> RObject:  # noqa: ANN401
             """Convert Python data to R object."""
 
     ConstructorReturnValue = tuple[RObjectType, Any, dict[str, Any] | None]
-    ConstructorFunction1 = Callable[[Any], ConstructorReturnValue]
-    ConstructorFunction2 = Callable[[Any, Converter], ConstructorReturnValue]
-
-    ConstructorDict = Mapping[type, ConstructorFunction1 | ConstructorFunction2]
+    ConstructorFunction = Callable[[Any, Converter], ConstructorReturnValue]
+    ConstructorDict = Mapping[type, ConstructorFunction]
 
 
 # Default values for RVersions object
@@ -69,12 +66,14 @@ def convert_to_r_object(self, data: Any) -> RObject:  # noqa: ANN401
 
 def categorical_constructor(
     data: pd.Categorical,
+    converter: Converter,  # noqa: ARG001
 ) -> ConstructorReturnValue:
     """
     Construct R object components from pandas categorical.
 
     Args:
         data: Pandas categorical.
+        converter: Python-to-R converter.
 
     Returns:
         Components of the R object.
@@ -611,16 +610,7 @@ def convert_to_r_object(  # noqa: C901, PLR0912, PLR0915
             # Check available constructors
             for t, constructor in self.constructor_dict.items():
                 if isinstance(data, t):
-                    n_params = len(inspect.signature(constructor).parameters)
-                    args: tuple[Any] | tuple[Any, Converter]
-                    if n_params == 1:
-                        args = (data,)
-                    elif n_params == 2:  # noqa: PLR2004
-                        args = (data, self)
-                    else:
-                        msg = "constructor function has wrong call signature"
-                        raise ValueError(msg)
-                    r_type, r_value, attributes = constructor(*args)
+                    r_type, r_value, attributes = constructor(data, self)
                     break
 
             if r_type is None:

From fb76598967117def95e004bcc865df6058aeef57 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Wed, 22 Jan 2025 15:01:56 +0200
Subject: [PATCH 093/100] Return R object from constructors

---
 rdata/conversion/to_r.py | 67 +++++++++++++++++++++++++---------------
 1 file changed, 42 insertions(+), 25 deletions(-)

diff --git a/rdata/conversion/to_r.py b/rdata/conversion/to_r.py
index 62e5d6e..ee80ae6 100644
--- a/rdata/conversion/to_r.py
+++ b/rdata/conversion/to_r.py
@@ -40,14 +40,16 @@ class Converter(Protocol):
 
         format_version: int
 
+        def convert_to_r_attributes(self, data: dict[str, Any]) -> RObject:
+            """Convert dictionary to R attributes list."""
+
         def convert_to_r_sym(self, name: str) -> RObject:
             """Convert string to R symbol."""
 
         def convert_to_r_object(self, data: Any) -> RObject:  # noqa: ANN401
             """Convert Python data to R object."""
 
-    ConstructorReturnValue = tuple[RObjectType, Any, dict[str, Any] | None]
-    ConstructorFunction = Callable[[Any, Converter], ConstructorReturnValue]
+    ConstructorFunction = Callable[[Any, Converter], RObject]
     ConstructorDict = Mapping[type, ConstructorFunction]
 
 
@@ -66,8 +68,8 @@ def convert_to_r_object(self, data: Any) -> RObject:  # noqa: ANN401
 
 def categorical_constructor(
     data: pd.Categorical,
-    converter: Converter,  # noqa: ARG001
-) -> ConstructorReturnValue:
+    converter: Converter,
+) -> RObject:
     """
     Construct R object components from pandas categorical.
 
@@ -79,19 +81,23 @@ def categorical_constructor(
         Components of the R object.
     """
     assert isinstance(data, pd.Categorical)
-    r_type = RObjectType.INT
-    r_value = data.codes + 1
-    attributes = {
+    r_attributes = converter.convert_to_r_attributes({
         "levels": data.categories.to_numpy(),
         "class": "factor",
-    }
-    return r_type, r_value, attributes
+    })
+
+    return build_r_object(
+        RObjectType.INT,
+        value=data.codes + 1,
+        is_object=True,
+        attributes=r_attributes,
+    )
 
 
 def dataframe_constructor(
     data: pd.DataFrame,
     converter: Converter,
-) -> ConstructorReturnValue:
+) -> RObject:
     """
     Construct R object components from pandas dataframe.
 
@@ -103,7 +109,6 @@ def dataframe_constructor(
         Components of the R object.
     """
     assert isinstance(data, pd.DataFrame)
-    r_type = RObjectType.VEC
     column_names = []
     r_value = []
     for column, series in data.items():
@@ -140,18 +145,24 @@ def dataframe_constructor(
         msg = f"pd.DataFrame index {type(index)} not implemented"
         raise NotImplementedError(msg)
 
-    attributes = {
+    r_attributes = converter.convert_to_r_attributes({
         "names": np.array(column_names, dtype=np.dtype("U")),
         "class": "data.frame",
         "row.names": row_names,
-    }
-    return r_type, r_value, attributes
+    })
+
+    return build_r_object(
+        RObjectType.VEC,
+        value=r_value,
+        is_object=True,
+        attributes=r_attributes,
+    )
 
 
 def rangeindex_constructor(
     data: pd.RangeIndex,
     converter: Converter,
-) -> ConstructorReturnValue:
+) -> RObject:
     """
     Construct R object components from pandas rangeindex.
 
@@ -166,15 +177,20 @@ def rangeindex_constructor(
     if converter.format_version < R_MINIMUM_VERSION_WITH_ALTREP:
         # ALTREP support is from R version 3.5.0
         # (minimum version for format version 3)
-        return RObjectType.INT, np.array(data), None
+        return build_r_object(
+            RObjectType.INT,
+            value=np.array(data),
+        )
 
     assert isinstance(data.step, int)
     if data.step != 1:
         # R supports compact sequences only with step 1;
         # convert the range to an array of values
-        return RObjectType.INT, np.array(data), None
+        return build_r_object(
+            RObjectType.INT,
+            value=np.array(data),
+        )
 
-    r_type = RObjectType.ALTREP
     r_value = (
         build_r_list([
             converter.convert_to_r_sym("compact_intseq"),
@@ -188,8 +204,11 @@ def rangeindex_constructor(
         ], dtype=float)),
         converter.convert_to_r_object(None),
     )
-    attributes = None
-    return r_type, r_value, attributes
+
+    return build_r_object(
+        RObjectType.ALTREP,
+        value=r_value,
+    )
 
 
 DEFAULT_CONSTRUCTOR_DICT: Final[ConstructorDict] = MappingProxyType({
@@ -610,12 +629,10 @@ def convert_to_r_object(  # noqa: C901, PLR0912, PLR0915
             # Check available constructors
             for t, constructor in self.constructor_dict.items():
                 if isinstance(data, t):
-                    r_type, r_value, attributes = constructor(data, self)
-                    break
+                    return constructor(data, self)
 
-            if r_type is None:
-                msg = f"type {type(data)} not implemented"
-                raise NotImplementedError(msg)
+            msg = f"type {type(data)} not implemented"
+            raise NotImplementedError(msg)
 
         if attributes is not None:
             is_object = "class" in attributes

From ad30ca3aa64e5da292759e46debfa4300377dd4d Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Wed, 22 Jan 2025 15:17:06 +0200
Subject: [PATCH 094/100] Fix docstring

---
 rdata/_write.py          |  6 ++++--
 rdata/conversion/to_r.py | 12 ++++++++----
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/rdata/_write.py b/rdata/_write.py
index a534af0..b655b26 100644
--- a/rdata/_write.py
+++ b/rdata/_write.py
@@ -42,7 +42,8 @@ def write_rds(
         compression: Compression.
         encoding: Encoding to be used for strings within data.
         format_version: File format version.
-        constructor_dict: Dictionary mapping Python types to R classes.
+        constructor_dict: Dictionary mapping Python classes to
+            functions converting them to R classes.
 
     See Also:
         :func:`write_rda`: Similar function that writes an RDA or RDATA file.
@@ -94,7 +95,8 @@ def write_rda(
         compression: Compression.
         encoding: Encoding to be used for strings within data.
         format_version: File format version.
-        constructor_dict: Dictionary mapping Python types to R classes.
+        constructor_dict: Dictionary mapping Python classes to
+            functions converting them to R classes.
 
     See Also:
         :func:`write_rds`: Similar function that writes an RDS file.
diff --git a/rdata/conversion/to_r.py b/rdata/conversion/to_r.py
index ee80ae6..ff24b65 100644
--- a/rdata/conversion/to_r.py
+++ b/rdata/conversion/to_r.py
@@ -407,7 +407,8 @@ class ConverterFromPythonToR:
         encoding: Encoding to be used for strings within data.
         format_version: File format version.
         r_version_serialized: R version written as the creator of the object.
-        constructor_dict: Dictionary mapping Python types to R classes.
+        constructor_dict: Dictionary mapping Python classes to
+            functions converting them to R classes.
     """
 
     def __init__(
@@ -425,7 +426,8 @@ def __init__(
             encoding: Encoding to be used for strings within data.
             format_version: File format version.
             r_version_serialized: R version written as the creator of the object.
-            constructor_dict: Dictionary mapping Python types to R classes.
+            constructor_dict: Dictionary mapping Python classes to
+                functions converting them to R classes.
         """
         self.encoding = encoding
         self.format_version = format_version
@@ -665,7 +667,8 @@ def convert_python_to_r_data(
         encoding: Encoding to be used for strings within data.
         format_version: File format version.
         r_version_serialized: R version written as the creator of the object.
-        constructor_dict: Dictionary mapping Python types to R classes.
+        constructor_dict: Dictionary mapping Python classes to
+            functions converting them to R classes.
         file_type: File type.
 
     Returns:
@@ -698,7 +701,8 @@ def convert_python_to_r_object(
         encoding: Encoding to be used for strings within data.
         format_version: File format version.
         r_version_serialized: R version written as the creator of the object.
-        constructor_dict: Dictionary mapping Python types to R classes.
+        constructor_dict: Dictionary mapping Python classes to
+            functions converting them to R classes.
 
     Returns:
         Corresponding RObject object.

From 8a4758a5935a154513809ccebc98f2c3921bb77c Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Wed, 22 Jan 2025 15:24:01 +0200
Subject: [PATCH 095/100] Do not expose DEFAULT_CONSTRUCTOR_DICT

---
 rdata/_write.py              | 4 +++-
 rdata/conversion/__init__.py | 1 -
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/rdata/_write.py b/rdata/_write.py
index b655b26..225f76f 100644
--- a/rdata/_write.py
+++ b/rdata/_write.py
@@ -5,10 +5,12 @@
 from typing import TYPE_CHECKING
 
 from .conversion import (
-    DEFAULT_CONSTRUCTOR_DICT,
     DEFAULT_FORMAT_VERSION,
     convert_python_to_r_data,
 )
+from .conversion.to_r import (
+    DEFAULT_CONSTRUCTOR_DICT,
+)
 from .unparser import unparse_file
 
 if TYPE_CHECKING:
diff --git a/rdata/conversion/__init__.py b/rdata/conversion/__init__.py
index c5855df..e6b5909 100644
--- a/rdata/conversion/__init__.py
+++ b/rdata/conversion/__init__.py
@@ -26,7 +26,6 @@
     ts_constructor as ts_constructor,
 )
 from .to_r import (
-    DEFAULT_CONSTRUCTOR_DICT as DEFAULT_CONSTRUCTOR_DICT,
     DEFAULT_FORMAT_VERSION as DEFAULT_FORMAT_VERSION,
     ConverterFromPythonToR as ConverterFromPythonToR,
     convert_python_to_r_data as convert_python_to_r_data,

From 760684fbeb1f06e911409f85a4303f294a6185d7 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Wed, 22 Jan 2025 15:41:41 +0200
Subject: [PATCH 096/100] Do not expose DEFAULT_FORMAT_VERSION

---
 rdata/_write.py              | 2 +-
 rdata/conversion/__init__.py | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/rdata/_write.py b/rdata/_write.py
index 225f76f..cce2fc9 100644
--- a/rdata/_write.py
+++ b/rdata/_write.py
@@ -5,11 +5,11 @@
 from typing import TYPE_CHECKING
 
 from .conversion import (
-    DEFAULT_FORMAT_VERSION,
     convert_python_to_r_data,
 )
 from .conversion.to_r import (
     DEFAULT_CONSTRUCTOR_DICT,
+    DEFAULT_FORMAT_VERSION,
 )
 from .unparser import unparse_file
 
diff --git a/rdata/conversion/__init__.py b/rdata/conversion/__init__.py
index e6b5909..2d5a0ec 100644
--- a/rdata/conversion/__init__.py
+++ b/rdata/conversion/__init__.py
@@ -26,7 +26,6 @@
     ts_constructor as ts_constructor,
 )
 from .to_r import (
-    DEFAULT_FORMAT_VERSION as DEFAULT_FORMAT_VERSION,
     ConverterFromPythonToR as ConverterFromPythonToR,
     convert_python_to_r_data as convert_python_to_r_data,
     convert_python_to_r_object as convert_python_to_r_object,

From be97c4c8254dff13f9937219e86f6d9a4991fe60 Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Wed, 22 Jan 2025 15:47:56 +0200
Subject: [PATCH 097/100] Remove asserts encoded in type hints

---
 rdata/conversion/to_r.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/rdata/conversion/to_r.py b/rdata/conversion/to_r.py
index ff24b65..f3e3d54 100644
--- a/rdata/conversion/to_r.py
+++ b/rdata/conversion/to_r.py
@@ -80,7 +80,6 @@ def categorical_constructor(
     Returns:
         Components of the R object.
     """
-    assert isinstance(data, pd.Categorical)
     r_attributes = converter.convert_to_r_attributes({
         "levels": data.categories.to_numpy(),
         "class": "factor",
@@ -108,7 +107,6 @@ def dataframe_constructor(
     Returns:
         Components of the R object.
     """
-    assert isinstance(data, pd.DataFrame)
     column_names = []
     r_value = []
     for column, series in data.items():
@@ -173,7 +171,6 @@ def rangeindex_constructor(
     Returns:
         Components of the R object.
     """
-    assert isinstance(data, pd.RangeIndex)
     if converter.format_version < R_MINIMUM_VERSION_WITH_ALTREP:
         # ALTREP support is from R version 3.5.0
         # (minimum version for format version 3)
@@ -516,8 +513,6 @@ def convert_to_r_sym(
         Returns:
             R object.
         """
-        assert isinstance(name, str)
-
         # Reference to existing symbol if exists
         if name in self._references:
             reference = self._references[name]

From d755d089da8a91868eb8eb10f99842bc9037c94b Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Wed, 22 Jan 2025 15:56:15 +0200
Subject: [PATCH 098/100] Add comment on default row names

---
 rdata/conversion/to_r.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/rdata/conversion/to_r.py b/rdata/conversion/to_r.py
index f3e3d54..05f2b22 100644
--- a/rdata/conversion/to_r.py
+++ b/rdata/conversion/to_r.py
@@ -126,6 +126,7 @@ def dataframe_constructor(
     if isinstance(index, pd.RangeIndex):
         assert isinstance(index.start, int)
         if index.start == 1 and index.stop == data.shape[0] + 1 and index.step == 1:
+            # Construct default row names stored as [R_INT_NA, -len]
             row_names = np.ma.array(
                 data=[R_INT_NA, -data.shape[0]],
                 mask=[True, False],

From a086138ddc57819ba8539bae9b5009a3bd6ad3df Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Wed, 22 Jan 2025 17:08:57 +0200
Subject: [PATCH 099/100] Fix ruff

---
 rdata/unparser/_ascii.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/rdata/unparser/_ascii.py b/rdata/unparser/_ascii.py
index 0881631..bab0461 100644
--- a/rdata/unparser/_ascii.py
+++ b/rdata/unparser/_ascii.py
@@ -93,8 +93,7 @@ def _unparse_array_values_raw(
                     line = "-Inf"
                 else:
                     line = str(value)
-                    if line.endswith(".0"):
-                        line = line[:-2]
+                    line = line.removesuffix(".0")
 
             else:
                 msg = f"Unknown dtype: {array.dtype}"

From e50eb9ca752d23d0f5af9d52acc8f225a45f2dfe Mon Sep 17 00:00:00 2001
From: Tuomas Rossi <tuomas.rossi@csc.fi>
Date: Mon, 27 Jan 2025 11:00:09 +0200
Subject: [PATCH 100/100] Rename default constructors to DEFAULT_CLASS_MAP

---
 rdata/_write.py          | 6 +++---
 rdata/conversion/to_r.py | 8 ++++----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/rdata/_write.py b/rdata/_write.py
index cce2fc9..696546f 100644
--- a/rdata/_write.py
+++ b/rdata/_write.py
@@ -8,7 +8,7 @@
     convert_python_to_r_data,
 )
 from .conversion.to_r import (
-    DEFAULT_CONSTRUCTOR_DICT,
+    DEFAULT_CLASS_MAP,
     DEFAULT_FORMAT_VERSION,
 )
 from .unparser import unparse_file
@@ -29,7 +29,7 @@ def write_rds(
     compression: Compression = "gzip",
     encoding: Encoding = "utf-8",
     format_version: int = DEFAULT_FORMAT_VERSION,
-    constructor_dict: ConstructorDict = DEFAULT_CONSTRUCTOR_DICT,
+    constructor_dict: ConstructorDict = DEFAULT_CLASS_MAP,
 ) -> None:
     """
     Write an RDS file.
@@ -82,7 +82,7 @@ def write_rda(
     compression: Compression = "gzip",
     encoding: Encoding = "utf-8",
     format_version: int = DEFAULT_FORMAT_VERSION,
-    constructor_dict: ConstructorDict = DEFAULT_CONSTRUCTOR_DICT,
+    constructor_dict: ConstructorDict = DEFAULT_CLASS_MAP,
 ) -> None:
     """
     Write an RDA or RDATA file.
diff --git a/rdata/conversion/to_r.py b/rdata/conversion/to_r.py
index 05f2b22..9ec55d3 100644
--- a/rdata/conversion/to_r.py
+++ b/rdata/conversion/to_r.py
@@ -209,7 +209,7 @@ def rangeindex_constructor(
     )
 
 
-DEFAULT_CONSTRUCTOR_DICT: Final[ConstructorDict] = MappingProxyType({
+DEFAULT_CLASS_MAP: Final[ConstructorDict] = MappingProxyType({
     pd.Categorical: categorical_constructor,
     pd.DataFrame: dataframe_constructor,
     pd.RangeIndex: rangeindex_constructor,
@@ -415,7 +415,7 @@ def __init__(
         encoding: Encoding = "utf-8",
         format_version: int = DEFAULT_FORMAT_VERSION,
         r_version_serialized: int = DEFAULT_R_VERSION_SERIALIZED,
-        constructor_dict: ConstructorDict = DEFAULT_CONSTRUCTOR_DICT,
+        constructor_dict: ConstructorDict = DEFAULT_CLASS_MAP,
     ) -> None:
         """
         Init class.
@@ -652,7 +652,7 @@ def convert_python_to_r_data(
     encoding: Encoding = "utf-8",
     format_version: int = DEFAULT_FORMAT_VERSION,
     r_version_serialized: int = DEFAULT_R_VERSION_SERIALIZED,
-    constructor_dict: ConstructorDict = DEFAULT_CONSTRUCTOR_DICT,
+    constructor_dict: ConstructorDict = DEFAULT_CLASS_MAP,
     file_type: FileType = "rds",
 ) -> RData:
     """
@@ -687,7 +687,7 @@ def convert_python_to_r_object(
     encoding: Encoding = "utf-8",
     format_version: int = DEFAULT_FORMAT_VERSION,
     r_version_serialized: int = DEFAULT_R_VERSION_SERIALIZED,
-    constructor_dict: ConstructorDict = DEFAULT_CONSTRUCTOR_DICT,
+    constructor_dict: ConstructorDict = DEFAULT_CLASS_MAP,
 ) -> RObject:
     """
     Convert Python data to R object.