-
Notifications
You must be signed in to change notification settings - Fork 480
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
SNOW-1000283: Add support for Structured Types. (#1853)
- Loading branch information
1 parent
3cced62
commit 6a2a5b6
Showing
13 changed files
with
513 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
64 changes: 64 additions & 0 deletions
64
src/snowflake/connector/nanoarrow_cpp/ArrowIterator/ArrayConverter.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
// | ||
// Copyright (c) 2012-2023 Snowflake Computing Inc. All rights reserved. | ||
// | ||
|
||
#include "ArrayConverter.hpp" | ||
|
||
#include <memory> | ||
|
||
#include "CArrowChunkIterator.hpp" | ||
#include "CArrowIterator.hpp" | ||
#include "SnowflakeType.hpp" | ||
|
||
namespace sf { | ||
Logger* ArrayConverter::logger = | ||
new Logger("snowflake.connector.ArrayConverter"); | ||
|
||
void ArrayConverter::generateError(const std::string& msg) const { | ||
logger->error(__FILE__, __func__, __LINE__, msg.c_str()); | ||
PyErr_SetString(PyExc_Exception, msg.c_str()); | ||
} | ||
|
||
ArrayConverter::ArrayConverter(ArrowSchemaView* schemaView, | ||
ArrowArrayView* array, PyObject* context, | ||
bool useNumpy) { | ||
m_array = array; | ||
|
||
if (schemaView->schema->n_children != 1) { | ||
std::string errorInfo = Logger::formatString( | ||
"[Snowflake Exception] invalid arrow schema for array items expected 1 " | ||
"schema child, but got %d", | ||
schemaView->schema->n_children); | ||
this->generateError(errorInfo); | ||
return; | ||
} | ||
|
||
ArrowSchema* item_schema = schemaView->schema->children[0]; | ||
ArrowArrayView* item_array = array->children[0]; | ||
m_item_converter = getConverterFromSchema(item_schema, item_array, context, | ||
useNumpy, logger); | ||
} | ||
|
||
PyObject* ArrayConverter::toPyObject(int64_t rowIndex) const { | ||
if (ArrowArrayViewIsNull(m_array, rowIndex)) { | ||
Py_RETURN_NONE; | ||
} | ||
|
||
// Array item offsets are stored in the second array buffers | ||
// Infer start an end of this rows slice by looking at the | ||
// current and next offset. If there isn't another offset use | ||
// the end of the array instead. | ||
int start = m_array->buffer_views[1].data.as_int32[rowIndex]; | ||
int end = m_array->children[0]->length; | ||
if (rowIndex + 1 < m_array->length) { | ||
end = m_array->buffer_views[1].data.as_int32[rowIndex + 1]; | ||
} | ||
|
||
PyObject* list = PyList_New(end - start); | ||
for (int i = start; i < end; i++) { | ||
PyList_SetItem(list, i - start, m_item_converter->toPyObject(i)); | ||
} | ||
return list; | ||
} | ||
|
||
} // namespace sf |
33 changes: 33 additions & 0 deletions
33
src/snowflake/connector/nanoarrow_cpp/ArrowIterator/ArrayConverter.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
// | ||
// Copyright (c) 2012-2023 Snowflake Computing Inc. All rights reserved. | ||
// | ||
|
||
#ifndef PC_ARRAYCONVERTER_HPP | ||
#define PC_ARRAYCONVERTER_HPP | ||
|
||
#include <memory> | ||
|
||
#include "IColumnConverter.hpp" | ||
#include "logging.hpp" | ||
#include "nanoarrow.h" | ||
#include "nanoarrow.hpp" | ||
|
||
namespace sf { | ||
|
||
class ArrayConverter : public IColumnConverter { | ||
public: | ||
explicit ArrayConverter(ArrowSchemaView* schemaView, ArrowArrayView* array, | ||
PyObject* context, bool useNumpy); | ||
|
||
PyObject* toPyObject(int64_t rowIndex) const override; | ||
|
||
private: | ||
void generateError(const std::string& msg) const; | ||
|
||
ArrowArrayView* m_array; | ||
std::shared_ptr<sf::IColumnConverter> m_item_converter; | ||
static Logger* logger; | ||
}; | ||
|
||
} // namespace sf | ||
#endif // PC_ARRAYCONVERTER_HPP |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
79 changes: 79 additions & 0 deletions
79
src/snowflake/connector/nanoarrow_cpp/ArrowIterator/MapConverter.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
// | ||
// Copyright (c) 2012-2023 Snowflake Computing Inc. All rights reserved. | ||
// | ||
|
||
#include "MapConverter.hpp" | ||
|
||
#include <memory> | ||
|
||
#include "CArrowChunkIterator.hpp" | ||
#include "CArrowIterator.hpp" | ||
#include "SnowflakeType.hpp" | ||
|
||
namespace sf { | ||
Logger* MapConverter::logger = new Logger("snowflake.connector.MapConverter"); | ||
|
||
void MapConverter::generateError(const std::string& msg) const { | ||
logger->error(__FILE__, __func__, __LINE__, msg.c_str()); | ||
PyErr_SetString(PyExc_Exception, msg.c_str()); | ||
} | ||
|
||
MapConverter::MapConverter(ArrowSchemaView* schemaView, ArrowArrayView* array, | ||
PyObject* context, bool useNumpy) { | ||
m_array = array; | ||
|
||
if (schemaView->schema->n_children != 1) { | ||
std::string errorInfo = Logger::formatString( | ||
"[Snowflake Exception] invalid arrow schema for map entries expected 1 " | ||
"schema child, but got %d", | ||
schemaView->schema->n_children); | ||
this->generateError(errorInfo); | ||
return; | ||
} | ||
|
||
ArrowSchema* entries = schemaView->schema->children[0]; | ||
|
||
if (entries->n_children != 2) { | ||
std::string errorInfo = Logger::formatString( | ||
"[Snowflake Exception] invalid arrow schema for map key/value pair " | ||
"expected 2 entries, but got %d", | ||
entries->n_children); | ||
this->generateError(errorInfo); | ||
return; | ||
} | ||
|
||
ArrowSchema* key_schema = entries->children[0]; | ||
ArrowArrayView* key_array = array->children[0]->children[0]; | ||
m_key_converter = | ||
getConverterFromSchema(key_schema, key_array, context, useNumpy, logger); | ||
|
||
ArrowSchema* value_schema = entries->children[1]; | ||
ArrowArrayView* value_array = array->children[0]->children[1]; | ||
m_value_converter = getConverterFromSchema(value_schema, value_array, context, | ||
useNumpy, logger); | ||
} | ||
|
||
PyObject* MapConverter::toPyObject(int64_t rowIndex) const { | ||
if (ArrowArrayViewIsNull(m_array, rowIndex)) { | ||
Py_RETURN_NONE; | ||
} | ||
|
||
// Map ArrowArrays have two child Arrays that contain the the keys and values. | ||
// The offsets for how many items belong to each row are stored in the parent | ||
// array offset buffer. The start and end of a row slice has to be infered | ||
// from the offsets for each row. | ||
int start = m_array->buffer_views[1].data.as_int32[rowIndex]; | ||
int end = m_array->children[0]->length; | ||
if (rowIndex + 1 < m_array->length) { | ||
end = m_array->buffer_views[1].data.as_int32[rowIndex + 1]; | ||
} | ||
|
||
PyObject* dict = PyDict_New(); | ||
for (int i = start; i < end; i++) { | ||
PyDict_SetItem(dict, m_key_converter->toPyObject(i), | ||
m_value_converter->toPyObject(i)); | ||
} | ||
return dict; | ||
} | ||
|
||
} // namespace sf |
34 changes: 34 additions & 0 deletions
34
src/snowflake/connector/nanoarrow_cpp/ArrowIterator/MapConverter.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
// | ||
// Copyright (c) 2012-2023 Snowflake Computing Inc. All rights reserved. | ||
// | ||
|
||
#ifndef PC_MAPCONVERTER_HPP | ||
#define PC_MAPCONVERTER_HPP | ||
|
||
#include <memory> | ||
|
||
#include "IColumnConverter.hpp" | ||
#include "logging.hpp" | ||
#include "nanoarrow.h" | ||
#include "nanoarrow.hpp" | ||
|
||
namespace sf { | ||
|
||
class MapConverter : public IColumnConverter { | ||
public: | ||
explicit MapConverter(ArrowSchemaView* schemaView, ArrowArrayView* array, | ||
PyObject* context, bool useNumpy); | ||
|
||
PyObject* toPyObject(int64_t rowIndex) const override; | ||
|
||
private: | ||
void generateError(const std::string& msg) const; | ||
|
||
ArrowArrayView* m_array; | ||
std::shared_ptr<sf::IColumnConverter> m_key_converter; | ||
std::shared_ptr<sf::IColumnConverter> m_value_converter; | ||
static Logger* logger; | ||
}; | ||
|
||
} // namespace sf | ||
#endif // PC_MAPCONVERTER_HPP |
50 changes: 50 additions & 0 deletions
50
src/snowflake/connector/nanoarrow_cpp/ArrowIterator/ObjectConverter.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
// | ||
// Copyright (c) 2012-2023 Snowflake Computing Inc. All rights reserved. | ||
// | ||
|
||
#include "ObjectConverter.hpp" | ||
|
||
#include <memory> | ||
|
||
#include "CArrowChunkIterator.hpp" | ||
#include "CArrowIterator.hpp" | ||
#include "SnowflakeType.hpp" | ||
|
||
namespace sf { | ||
Logger* ObjectConverter::logger = | ||
new Logger("snowflake.connector.BinaryConverter"); | ||
|
||
ObjectConverter::ObjectConverter(ArrowSchemaView* schemaView, | ||
ArrowArrayView* array, PyObject* context, | ||
bool useNumpy) { | ||
m_array = array; | ||
m_converters.clear(); | ||
m_property_names.clear(); | ||
m_propertyCount = schemaView->schema->n_children; | ||
|
||
for (int i = 0; i < schemaView->schema->n_children; i++) { | ||
ArrowSchema* property_schema = schemaView->schema->children[i]; | ||
|
||
m_property_names.push_back(property_schema->name); | ||
|
||
ArrowArrayView* child_array = array->children[i]; | ||
|
||
m_converters.push_back(getConverterFromSchema(property_schema, child_array, | ||
context, useNumpy, logger)); | ||
} | ||
} | ||
|
||
PyObject* ObjectConverter::toPyObject(int64_t rowIndex) const { | ||
if (ArrowArrayViewIsNull(m_array, rowIndex)) { | ||
Py_RETURN_NONE; | ||
} | ||
|
||
PyObject* dict = PyDict_New(); | ||
for (int i = 0; i < m_propertyCount; i++) { | ||
PyDict_SetItemString(dict, m_property_names[i], | ||
m_converters[i]->toPyObject(rowIndex)); | ||
} | ||
return dict; | ||
} | ||
|
||
} // namespace sf |
Oops, something went wrong.