Source code for marv_robotics.bag

# Copyright 2016 - 2018  Ternaris.
# SPDX-License-Identifier: AGPL-3.0-only

import heapq
import re
import sys
import warnings
from collections import defaultdict, namedtuple
from contextlib import ExitStack, contextmanager
from itertools import groupby
from logging import getLogger
from os import walk
from pathlib import Path

import capnp  # noqa: F401,TC002  pylint: disable=unused-import
from rosbags import rosbag1, rosbag2, serde
from rosbags.serde.messages import MSGDEFCACHE
from rosbags.typesys import get_types_from_idl, get_types_from_msg, register_types, types
from rosbags.typesys.msg import normalize_msgtype

import marv_api as marv
import marv_nodes
from marv_api import DatasetInfo, ReaderError

from .bag_capnp import Bagmeta, Message  # pylint: disable=import-error


class Baginfo(namedtuple('Baginfo', 'filename basename prefix timestamp idx')):

    @classmethod
    def parse(cls, filename):
        assert filename.endswith('.bag'), filename
        basename = filename[:-4]
        parts = basename.rsplit('_', 2)
        if parts[-1].isnumeric():
            idx = int(parts.pop())
        else:
            idx = None

        if re.match(r'\d{4}(?:-\d{2}){5}', parts[-1]):  # noqa: FS003
            timestamp = parts.pop()
        else:
            timestamp = None

        if parts:
            prefix = parts[0]
        else:
            prefix = None
        return cls(filename, basename, prefix, timestamp, idx)


def is_rosbag2(dirpath):
    metadata = dirpath / 'metadata.yaml'
    if not metadata.exists():
        return False

    content = metadata.read_text()
    return content.startswith('rosbag2_bagfile_information:')


def _scan_rosbag2(log, dirpath, dirnames, filenames):
    if not is_rosbag2(dirpath):
        return None

    try:
        reader = rosbag2.Reader(dirpath)
    except rosbag2.ReaderError as exc:
        log.warning('Rosbag2: %s %r', dirpath, exc)
        return None

    if dirnames:
        log.warning('Ignoring subdirectories of dataset %s: %r', dirpath, dirnames[:])
        dirnames[:] = []

        # Already created, we're only called because of ignored files
        if 'metadata.yaml' not in filenames:
            return None

    filenames = set(filenames)
    setfiles = {'metadata.yaml'} | {x.name for x in reader.paths}

    if extra := filenames - setfiles:
        log.warning('Ignoring files not listed in metadata.yaml %s: %r', dirpath, sorted(extra))

    if missing := setfiles - filenames:
        log.error(
            'Refusing to create rosbag2 dataset %s missing files: %r',
            dirpath,
            sorted(missing),
        )
        return None

    return DatasetInfo(dirpath.name, sorted(setfiles))


def _add_message_types(msgpath):
    typs = {}
    for root, dirnames, files in walk(msgpath):
        if '.rosbags_ignore' in files:
            dirnames.clear()
            continue
        dirnames.sort()
        for fname in sorted(files):
            path = Path(root, fname)
            if path.suffix == '.idl':
                typs.update(get_types_from_idl(path.read_text(encoding='utf-8')))
            elif path.suffix == '.msg':
                name = path.relative_to(path.parents[2]).with_suffix('')
                if '/msg/' not in str(name):
                    name = name.parent / 'msg' / name.name
                typs.update(get_types_from_msg(path.read_text(encoding='utf-8'), str(name)))
    register_types(typs)


[docs]def dirscan(dirpath, dirnames, filenames): """Scan for directories containing bags (ROS1 and ROS2). For rosbag2 datasets this scanner behaves identical to default :py:func:`scan` below. For ROS1 bag files it looks for directories containing at least one bag file and will create a dataset with all files contained, ignoring further subdirectories, including rosbag2 datasets; warnings are logged if any such subdirectories are ignored. """ log = getLogger(f'{__name__}.dirscan') dirpath = Path(dirpath) dataset = _scan_rosbag2(log, dirpath, dirnames, filenames) if dataset: return [dataset] if not any(x.endswith('.bag') for x in filenames): return [] if dirnames: log.warning('Ignoring subdirectories of dataset %s: %r', dirpath, dirnames[:]) dirnames[:] = [] return [DatasetInfo(dirpath.name, filenames)]
[docs]def scan(dirpath, dirnames, filenames): # pylint: disable=unused-argument """Scan for sets of ROS bag files (ROS1 and ROS2). Find rosbag2 datasets and log warnings if they contain additional files, not listed in metadata.yaml The remainder is for sets of ROS1 bag files: Bags suffixed with a consecutive index are grouped into sets:: foo_0.bag foo_1.bag foo_3.bag foo_4.bag results in:: foo [foo_0.bag, foo_1.bag] foo_3 [foo_3.bag] foo_4 [foo_4.bag] In this example the bag with index 2 is missing which results in foo_3 and foo_4 to be individual sets with one bag each. The timestamps used by ``rosbag record`` are stripped from the name given to sets, but are kept for the remaining individual sets in case a bag is missing:: foo_2018-01-12-14-05-12_0.bag foo_2018-01-12-14-45-23_1.bag foo_2018-01-12-14-55-42_3.bag results in:: foo [foo_2018-01-12-14-05-12_0.bag, foo_2018-01-12-14-45-23_1.bag] foo_2018-01-12-14-45-23_1 [foo_2018-01-12-14-45-23_1.bag] foo_2018-01-12-14-55-42_3 [foo_2018-01-12-14-55-42_3.bag] For more information on scanners see :any:`marv_api.scanner`. Args: dirpath (str): The path to the directory currently being scanned. dirnames (str): Sorted list of subdirectories of the directory currently being scanned. Change this in-place to control further traversal. filenames (str): Sorted list of files within the directory currently being scanned. Returns: A list of :class:`marv_api.DatasetInfo` instances mapping set of files to dataset names. Absolute filenames must start with :paramref:`.dirpath`, relative filenames are automatically prefixed with it. See :ref:`cfg_c_scanner` config key. """ log = getLogger(f'{__name__}.scan') dataset = _scan_rosbag2(log, Path(dirpath), dirnames, filenames) if dataset: return [dataset] groups = groupby( [Baginfo.parse(x) for x in reversed(filenames) if x.endswith('.bag')], lambda x: x.prefix, ) bags = [] datasets = [] for prefix, group in groups: group = list(group) prev_idx = None for bag in group: expected_idx = bag.idx if prev_idx is None else prev_idx - 1 if bag.idx != expected_idx or \ bags and (bags[0].timestamp is None) != (bag.timestamp is None): datasets[0:0] = [DatasetInfo(x.basename, [x.filename]) for x in bags] bags[:] = [] bags.insert(0, bag) prev_idx = bag.idx if bag.idx == 0: datasets.insert(0, DatasetInfo(prefix or bag.timestamp, [x.filename for x in bags])) bags[:] = [] elif bag.idx is None: assert len(bags) == 1, bags datasets.insert(0, DatasetInfo(bag.basename, [bag.filename])) bags[:] = [] datasets[0:0] = [DatasetInfo(x.basename, [x.filename]) for x in bags] bags[:] = [] return datasets
def _read_bagmeta2(path): reader = rosbag2.Reader(Path(path).parent) return { 'start_time': reader.start_time, 'end_time': reader.end_time, 'duration': reader.duration, 'msg_count': reader.message_count, 'msg_types': sorted({x.msgtype for x in reader.topics.values()}), 'topics': sorted(reader.topics.keys()), 'connections': [ { 'topic': x['topic_metadata']['name'], 'datatype': x['topic_metadata']['type'], 'msg_count': x['message_count'], 'serialization_format': x['topic_metadata']['serialization_format'], } for x in reader.metadata['topics_with_message_count'] ], } @contextmanager def open_rosbag1(path): try: with rosbag1.Reader(path) as bag: yield bag except rosbag1.ReaderError: raise ReaderError( ( f'Unindexed bag file: {path}\n' ' File was not copied in full or recording did not finish properly\n' ' Use `rosbag reindex` to index what is there.' ), ) from None
[docs]@marv.node(Bagmeta) @marv.input('dataset', marv_nodes.dataset) def bagmeta(dataset): """Extract meta information from bag file. In case of multiple connections for one topic, they are assumed to be all of the same message type and either all be latching or none. A topic's message type and latching mode, and a message type's md5sum are assumed not to change across split bags. """ # pylint: disable=too-many-locals dataset = yield marv.pull(dataset) files = list(dataset.files) if metadatapath := next((x.path for x in files if x.path.endswith('metadata.yaml')), None): meta = _read_bagmeta2(metadatapath) if meta: yield marv.push(meta) return paths = [x.path for x in files if x.path.endswith('.bag')] bags = [] start_time = sys.maxsize end_time = 0 connections = {} for path in paths: with open_rosbag1(path) as bag: try: _start_time = int(bag.start_time) _end_time = int(bag.end_time) except ValueError: _start_time = sys.maxsize _end_time = 0 start_time = _start_time if _start_time < start_time else start_time end_time = _end_time if _end_time > end_time else end_time _connections = [ { 'topic': x.topic, 'datatype': x.msgtype, 'md5sum': x.md5sum, 'msg_def': x.msgdef, 'msg_count': len(x.indexes), 'latching': bool(x.latching), } for x in bag.connections.values() ] _start_time = _start_time if _start_time != sys.maxsize else 0 bags.append( { 'start_time': _start_time, 'end_time': _end_time, 'duration': _end_time - _start_time, 'msg_count': sum(x['msg_count'] for x in _connections), 'connections': _connections, 'version': 200, }, ) for _con in _connections: key = (_con['topic'], _con['datatype'], _con['md5sum']) con = connections.get(key) if con: con['msg_count'] += _con['msg_count'] con['latching'] = con['latching'] or _con['latching'] else: connections[key] = _con.copy() connections = sorted( connections.values(), key=lambda x: (x['topic'], x['datatype'], x['md5sum']), ) start_time = start_time if start_time != sys.maxsize else 0 yield marv.push( { 'start_time': start_time, 'end_time': end_time, 'duration': end_time - start_time, 'msg_count': sum(x['msg_count'] for x in bags), 'msg_types': sorted({x['datatype'] for x in connections}), 'topics': sorted({x['topic'] for x in connections}), 'connections': connections, 'bags': bags, }, )
def read_messages(paths, topics=None, start_time=None, end_time=None, wipe_typesys=False): """Iterate chronologically raw BagMessage for topic from paths.""" # pylint: disable=too-many-locals if wipe_typesys: backup = types.FIELDDEFS.copy() for key in list(types.FIELDDEFS.keys()): if key not in [ 'builtin_interfaces/msg/Time', 'builtin_interfaces/msg/Duration', 'std_msgs/msg/Header', ]: types.FIELDDEFS.pop(key) MSGDEFCACHE.clear() with ExitStack() as stack: if wipe_typesys: stack.callback( lambda: (types.FIELDDEFS.clear() or types.FIELDDEFS.update(backup) or MSGDEFCACHE.clear()), ) bags = [stack.enter_context(open_rosbag1(path)) for path in paths] if wipe_typesys: typs = {} for bag in bags: for rconn in bag.connections.values(): typs.update(get_types_from_msg(rconn.msgdef, rconn.msgtype)) register_types(typs) gens = [ bag.messages( connections=[x for x in bag.connections.values() if x.topic in topics], start=start_time, stop=end_time, ) for bag in bags ] prev_time = 0 for connection, time, data in heapq.merge(*gens, key=lambda x: x[1]): assert time >= prev_time, (repr(time), repr(prev_time)) yield connection, time, data prev_time = time
[docs]@marv.node(Message, group='ondemand') @marv.input('dataset', marv_nodes.dataset) @marv.input('bagmeta', bagmeta) def raw_messages(dataset, bagmeta): # noqa: C901 # pylint: disable=redefined-outer-name,too-many-branches,too-many-statements """Stream messages from a set of bag files.""" # pylint: disable=too-many-locals bagmeta, dataset = yield marv.pull_all(bagmeta, dataset) try: rosbag_path = Path(dataset.files[0].path).parent reader = rosbag2.Reader(rosbag_path) msgpath = rosbag_path / 'messages' if not msgpath.exists(): try: msgpath = yield marv.get_resource_path('messages') except marv.ResourceNotFoundError: msgpath = None if msgpath and msgpath.exists(): _add_message_types(msgpath) except rosbag2.ReaderError: reader = None connections = bagmeta.connections requested = yield marv.get_requested() # Selectors are: # - '/topic' -> one individual stream, no group # - '/topic1,/topic2' -> one group with two streams # - '*:sensor_msgs/Imu' -> one group with one stream per matching connection # - '*:sensor_msgs/Imu,*:sensor_msgs/msg/Imu' # -> one group with one stream per matching connection individuals = [] groups = [] for name in (x.name for x in requested): if re.search(r'[:,]', name): groups.append(name) else: individuals.append(name) def make_header(topic): # TODO: topic with more than one type is not supported con = next((x for x in connections if x.topic == topic), None) # TODO: start/end_time per topic? return { 'start_time': bagmeta.start_time, 'end_time': bagmeta.end_time, 'msg_count': con.msg_count if con else 0, 'msg_type': con.datatype if con else '', 'msg_type_def': con.msg_def if con else '', 'msg_type_md5sum': con.md5sum if con else '', 'rosbag2': reader is not None, 'topic': topic, } deprecated_names = set() bytopic = defaultdict(list) for name in groups: topics = set() for selector in name.split(','): try: reqtop, reqtype = selector.split(':') except ValueError: reqtop, reqtype = selector, '*' if reqtype != '*': norm = normalize_msgtype(reqtype) if reqtype != norm: deprecated_names.add((reqtype, norm)) reqtype = norm # TODO: topic with more than one type is not supported topics.update( con.topic for con in connections if reqtop in ('*', con.topic) and reqtype in ('*', con.datatype) ) group = yield marv.create_group(name) for topic in sorted(topics): stream = yield group.create_stream(f'{name}.{topic}', **make_header(topic)) bytopic[topic].append(stream) yield group.finish() for old, new in deprecated_names: warnings.warn( ( f'marv.select: Change all occurrences of {old} to {new}.' ' Support for old ROS1 names will be removed in 21.12.0' ), FutureWarning, ) bagtopics = bagmeta.topics for topic in individuals: stream = yield marv.create_stream(topic, **make_header(topic)) if topic not in bagtopics: yield stream.finish() bytopic[topic].append(stream) if not bytopic: return if not reader: paths = [x.path for x in dataset.files if x.path.endswith('.bag')] # TODO: topic with more than one type is not supported for conn, timestamp, data in read_messages(paths, topics=list(bytopic), wipe_typesys=True): dct = {'data': data, 'timestamp': timestamp} for stream in bytopic[conn.topic]: yield stream.msg(dct) return with reader: connections = [x for x in reader.connections.values() if x.topic in bytopic] for conn, timestamp, data in reader.messages(connections=connections): dct = {'data': data, 'timestamp': timestamp} for stream in bytopic[conn.topic]: yield stream.msg(dct)
messages = raw_messages # pylint: disable=invalid-name
[docs]def make_deserialize(stream): """Create appropriate deserialize function for rosbag1 and 2.""" deserialize_cdr = serde.deserialize_cdr ros1_to_cdr = serde.ros1_to_cdr typename = stream.msg_type if stream.rosbag2: return lambda data: deserialize_cdr(data, typename) return lambda data: deserialize_cdr(ros1_to_cdr(data, typename), typename)
def get_float_seconds(stamp): """Get floating point seconds from ros1 and ros2 message header stamp.""" if hasattr(stamp, 'to_sec'): return stamp.to_sec() return stamp.sec + stamp.nanosec * 1e-9
[docs]def make_get_timestamp(log, stream=None): """Make utitliy to get message header timestamp in nanoseconds. Falling back to bag message timestamp if header stamp is zero or unavailable. Args: log: logger instance stream: Obsolete, previously needed argument Returns: Function retrieving timestamp from message. """ fallback = None if stream is not None: warnings.warn( ( 'Use make_get_timestamp(log) instead of make_get_timestamp(log, stream). ' 'Obsolete stream argument will be removed in 21.12.0' ), FutureWarning, stacklevel=1, ) def stamp_to_nanosec(stamp): return stamp.sec * 10**9 + stamp.nanosec def get_timestamp(rosmsg, bagmsg): """Return header timestamp, falling back to bagmsg timestamp if zero or unavailable. Args: rosmsg: Deserialized ROS message bagmsg: Bag message as streamed by marv_robotics.bag.messages Returns: Message timestamp. """ nonlocal fallback if fallback is None: if hasattr(rosmsg, 'header'): fallback = stamp_to_nanosec(rosmsg.header.stamp) == 0 and bagmsg.timestamp > 600e9 if fallback: log.warning('Header time is zero, will use message time instead.') else: fallback = True if fallback: return bagmsg.timestamp return stamp_to_nanosec(rosmsg.header.stamp) return get_timestamp