Source code for apptools.io.h5.table_node
# (C) Copyright 2005-2024 Enthought, Inc., Austin, TX
# All rights reserved.
#
# This software is provided without warranty under the terms of the BSD
# license included in LICENSE.txt and may be redistributed only under
# the conditions described in the aforementioned license. The license
# is also available online at http://www.enthought.com/licenses/BSD.txt
#
# Thanks for using Enthought open source!
import numpy as np
from tables.table import Table as PyTablesTable
class _TableRowAccessor(object):
"""A simple object which provides read access to the rows in a Table."""
def __init__(self, h5_table):
self._h5_table = h5_table
def __getitem__(self, key):
return self._h5_table[key]
[docs]class H5TableNode(object):
"""A wrapper for PyTables Table nodes.
Parameters
----------
node : tables.Table instance
An H5 node which is a pytables.Table or H5TableNode instance
"""
def __init__(self, node):
# Avoid a circular import
from .file import H5Attrs
assert self.is_table_node(node)
self._h5_table = node._h5_table if hasattr(node, "_h5_table") else node
self.attrs = H5Attrs(self._h5_table._v_attrs)
# --------------------------------------------------------------------------
# Creation methods
# --------------------------------------------------------------------------
[docs] @classmethod
def add_to_h5file(cls, h5, node_path, description, **kwargs):
"""Add table node to an H5 file at the specified path.
Parameters
----------
h5 : H5File
The H5 file where the table node will be stored.
node_path : str
Path to node where data is stored (e.g. '/path/to/my_table')
description : list of tuples or numpy dtype object
The description of the columns in the table. This is either a list
of (column name, dtype, [, shape or itemsize]) tuples or a numpy
record array dtype. For more information, see the documentation for
`Table` in PyTables.
**kwargs : dict
Additional keyword arguments to pass to pytables.File.create_table
"""
if isinstance(description, (tuple, list)):
description = np.dtype(description)
cls._create_pytables_node(h5, node_path, description, **kwargs)
node = h5[node_path]
return cls(node)
[docs] @classmethod
def is_table_node(cls, pytables_node):
"""Return True if pytables_node is a pytables.Table or a H5TableNode.
"""
return isinstance(pytables_node, (PyTablesTable, H5TableNode))
# --------------------------------------------------------------------------
# Public interface
# --------------------------------------------------------------------------
[docs] def append(self, data):
"""Add some data to the table.
Parameters
----------
data : dict
A dictionary of column name -> values items
"""
rows = list(zip(*[data[name] for name in self.keys()]))
self._h5_table.append(rows)
def __getitem__(self, col_or_cols):
"""Return one or more columns of data from the table.
Parameters
----------
col_or_cols : str or list of str
A single column name or a list of column names
Return
------
data : ndarray
An array of column data with the column order matching that of
`col_or_cols`.
"""
if isinstance(col_or_cols, str):
return self._h5_table.col(col_or_cols)
column_data = [self._h5_table.col(name) for name in col_or_cols]
return np.column_stack(column_data)
@property
def ix(self):
"""Return an object which provides access to row data."""
return _TableRowAccessor(self._h5_table)
[docs] def keys(self):
return self._h5_table.colnames
[docs] def to_dataframe(self):
"""Return table data as a pandas `DataFrame`.
XXX: This does not work if the table contains a multidimensional column
This method requires pandas to have been installed in the environment.
"""
from pandas import DataFrame
# Slicing rows gives a numpy struct array, which DataFrame understands.
return DataFrame(self.ix[:])
# --------------------------------------------------------------------------
# Object interface
# --------------------------------------------------------------------------
def __repr__(self):
return repr(self._h5_table)
def __len__(self):
return self._h5_table.nrows
# --------------------------------------------------------------------------
# Private interface
# --------------------------------------------------------------------------
def _f_remove(self):
"""Implement the PyTables `Node._f_remove` method so that H5File
doesn't choke when trying to remove our node.
"""
self._h5_table._f_remove()
self._h5_table = None
@classmethod
def _create_pytables_node(cls, h5, node_path, description, **kwargs):
path, name = h5.split_path(node_path)
pyt_file = h5._h5
pyt_file.create_table(path, name, description, **kwargs)