//!
//! This module aims to make multi-gpu setups easier to handle for compositors.
//!
//! Its main entry point is the [`GpuManager`]. Initializing this with a
//! [`GraphicsApi`] implementation will allow you to create [`MultiRenderer`]s.
//!
//! smithay provides the following graphics apis:
//! - [`gbm::GbmGlesBackend`]
//!
//! A [`MultiRenderer`] gets created using two [`DrmNode`]s to identify gpus.
//! One gpu will be referred to as the render-gpu, the other as the target-gpu.
//!
//! Note: The render- and target-gpu may be identically to allow the multigpu
//! module to be used on single-gpu systems as well avoiding supporting multiple code-paths.
//! Doing so will not result in worse performance compared to rendering without the multi-gpu module.
//!
//! A [`MultiRenderer`] will support the [`Renderer`]-trait as well
//! as the other corresponding traits of the [`renderer`](crate::backend::renderer)-module,
//! if the [`GraphicsApi`] allows it.
//!
//! Any rendering operations will take place on the render-gpu transparently.
//! Output will be redirected the target gpu, as such any [`Bind`]- and [`Offscreen`]-implementations
//! will be allocated on the target-gpu.
//!
//! The `ImportMem`-implementation will upload the texture on the render-gpu, failing *if*
//! the render-gpu does not support the format as normal. Be careful when mixing gpu's
//! with different memory-formats supported, what formats you annouce.
//!
//! In contrast the `ImportDma`-implementations will transparently create copies
//! of client buffers, if necessary, given it is unclear if a dmabuf can be imported on any given gpu.
//! The implementation strives for the best possible performance for a given setup,
//! when choosing a copy-path.
//!
//! Any `ExportMem`-implementations will originate from the render-gpu, which again
//! needs to support the requested format directly. No paths across other gpus are tested.
//!
//! *Note*: This module will not keep you from selecting sub-optimal configurations.
//! Any heuristics for which render-gpu to use for a given set of client buffers
//! and desired target-gpu are up to be implemented by the compositor. The module only
//! reduces the amount of necessary setup operations.
//!
use aliasable::boxed::AliasableBox;
use std::{
    any::Any,
    collections::HashMap,
    fmt,
    sync::{Arc, Mutex},
};

use super::{
    sync::{self, SyncPoint},
    Bind, Blit, BlitFrame, Color32F, ContextId, DebugFlags, ErasedContextId, ExportMem, Frame, ImportDma,
    ImportMem, Offscreen, Renderer, RendererSuper, Texture, TextureFilter, TextureMapping,
};
#[cfg(feature = "wayland_frontend")]
use super::{ImportDmaWl, ImportMemWl};

#[cfg(feature = "wayland_frontend")]
use crate::{
    backend::renderer::{buffer_type, BufferType},
    wayland::{compositor::SurfaceData, dmabuf::get_dmabuf, shm},
};
use crate::{
    backend::{
        allocator::{
            dmabuf::{AnyError, Dmabuf},
            format::FormatSet,
            Allocator, Buffer as BufferTrait, Format, Fourcc, Modifier,
        },
        drm::DrmNode,
        SwapBuffersError,
    },
    utils::{Buffer as BufferCoords, Physical, Rectangle, Size, Transform},
};
use tracing::{debug, info, info_span, instrument, trace, trace_span, warn};
#[cfg(feature = "wayland_frontend")]
use wayland_server::protocol::{wl_buffer, wl_shm, wl_surface::WlSurface};

#[cfg(all(feature = "backend_gbm", feature = "backend_egl", feature = "renderer_gl"))]
pub mod gbm;

/// Tracks available gpus from a given [`GraphicsApi`]
#[derive(Debug)]
pub struct GpuManager<A: GraphicsApi> {
    api: A,
    devices: Vec<A::Device>,
    dmabuf_cache: HashMap<(DrmNode, DrmNode), Option<(bool, Dmabuf)>>,
    span: tracing::Span,
}

/// Errors generated by [`GpuManager`] and [`MultiRenderer`].
#[derive(thiserror::Error)]
pub enum Error<R: GraphicsApi, T: GraphicsApi>
where
    R::Error: 'static,
    T::Error: 'static,
    <<R::Device as ApiDevice>::Renderer as RendererSuper>::Error: 'static,
    <<T::Device as ApiDevice>::Renderer as RendererSuper>::Error: 'static,
{
    /// The graphics api errored on device enumeration
    #[error("The render graphics api failed enumerating devices {0:?}")]
    RenderApiError(#[source] R::Error),
    /// The graphics api errored on device enumeration
    #[error("The target graphics api failed enumerating devices {0:?}")]
    TargetApiError(#[source] T::Error),
    /// The graphics api has found no node matching the drm node
    #[error("The graphics api has found no node matching {0:?}")]
    NoDevice(DrmNode),
    /// The device requested did not match the expected
    #[error("The devices requested {0:?} did not match the expected")]
    MismatchedDevice(DrmNode),
    /// The device has gone missing
    #[error("The device has gone missing")]
    DeviceMissing,
    /// Error on the rendering device
    #[error("Error on the rendering device: {0:}")]
    Render(#[source] <<R::Device as ApiDevice>::Renderer as RendererSuper>::Error),
    /// Error on the target device
    #[error("Error on the target device: {0:}")]
    Target(#[source] <<T::Device as ApiDevice>::Renderer as RendererSuper>::Error),
    /// Failed to import buffer using the api on any device
    #[error("Failed to import buffer")]
    ImportFailed,
    /// Buffer allocation failed
    #[error("Failed to allocate buffer")]
    AllocatorError(AnyError),
}

impl<R: GraphicsApi, T: GraphicsApi> Error<R, T> {
    fn transpose(self) -> Error<T, R> {
        match self {
            Error::Render(err) => Error::Target(err),
            Error::Target(err) => Error::Render(err),
            Error::RenderApiError(err) => Error::TargetApiError(err),
            Error::TargetApiError(err) => Error::RenderApiError(err),
            Error::NoDevice(d) => Error::NoDevice(d),
            Error::MismatchedDevice(d) => Error::MismatchedDevice(d),
            Error::DeviceMissing => Error::DeviceMissing,
            Error::ImportFailed => Error::ImportFailed,
            Error::AllocatorError(a) => Error::AllocatorError(a),
        }
    }
}

impl<R: GraphicsApi> Error<R, R> {
    fn generalize<T: GraphicsApi>(self) -> Error<R, T> {
        match self {
            Error::Render(err) => Error::Render(err),
            Error::Target(err) => Error::Render(err),
            Error::RenderApiError(err) => Error::RenderApiError(err),
            Error::TargetApiError(err) => Error::RenderApiError(err),
            Error::NoDevice(d) => Error::NoDevice(d),
            Error::MismatchedDevice(d) => Error::MismatchedDevice(d),
            Error::DeviceMissing => Error::DeviceMissing,
            Error::ImportFailed => Error::ImportFailed,
            Error::AllocatorError(a) => Error::AllocatorError(a),
        }
    }
}

impl<R: GraphicsApi, T: GraphicsApi> fmt::Debug for Error<R, T>
where
    R::Error: 'static,
    T::Error: 'static,
    <<R::Device as ApiDevice>::Renderer as RendererSuper>::Error: 'static,
    <<T::Device as ApiDevice>::Renderer as RendererSuper>::Error: 'static,
{
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            Error::RenderApiError(err) => write!(f, "Error::RenderApiError({err:?})"),
            Error::TargetApiError(err) => write!(f, "Error::TargetApiError({err:?})"),
            Error::NoDevice(dev) => write!(f, "Error::NoDevice({dev:?})"),
            Error::MismatchedDevice(dev) => write!(f, "Error::MismatchedDevice({dev:?})"),
            Error::DeviceMissing => write!(f, "Error::DeviceMissing"),
            Error::Render(err) => write!(f, "Error::Render({err:?})"),
            Error::Target(err) => write!(f, "Error::Target({err:?})"),
            Error::ImportFailed => write!(f, "Error::ImportFailed"),
            Error::AllocatorError(err) => write!(f, "Error::AllocationError({err})"),
        }
    }
}

impl<R: GraphicsApi + 'static, T: GraphicsApi + 'static> From<Error<R, T>> for SwapBuffersError
where
    R::Error: Into<SwapBuffersError> + Send + Sync,
    T::Error: Into<SwapBuffersError> + Send + Sync,
    <<R::Device as ApiDevice>::Renderer as RendererSuper>::Error: Into<SwapBuffersError> + Send + Sync,
    <<T::Device as ApiDevice>::Renderer as RendererSuper>::Error: Into<SwapBuffersError> + Send + Sync,
{
    #[inline]
    fn from(err: Error<R, T>) -> SwapBuffersError {
        match err {
            x @ Error::NoDevice(_) | x @ Error::DeviceMissing | x @ Error::AllocatorError(_) => {
                SwapBuffersError::ContextLost(Box::new(x))
            }
            x @ Error::MismatchedDevice(_) | x @ Error::ImportFailed => {
                SwapBuffersError::TemporaryFailure(Box::new(x))
            }
            Error::RenderApiError(x) => x.into(),
            Error::TargetApiError(x) => x.into(),
            Error::Render(x) => x.into(),
            Error::Target(x) => x.into(),
        }
    }
}

impl<A: GraphicsApi> AsRef<A> for GpuManager<A> {
    fn as_ref(&self) -> &A {
        &self.api
    }
}

impl<A: GraphicsApi> AsMut<A> for GpuManager<A> {
    fn as_mut(&mut self) -> &mut A {
        &mut self.api
    }
}

impl<A: GraphicsApi> GpuManager<A> {
    /// Create a new [`GpuManager`] for a given [`GraphicsApi`].
    pub fn new(api: A) -> Result<GpuManager<A>, Error<A, A>> {
        let span = info_span!("renderer_multi", backend = A::identifier());

        let mut devices = Vec::new();
        api.enumerate(&mut devices).map_err(Error::RenderApiError)?;

        Ok(GpuManager {
            api,
            devices,
            dmabuf_cache: HashMap::new(),
            span,
        })
    }

    /// Get all devices enumerated by the API.
    pub fn devices(&mut self) -> Result<impl Iterator<Item = &A::Device>, A::Error> {
        if self.api.needs_enumeration() {
            self.api.enumerate(&mut self.devices)?;
        }
        Ok(self.devices.iter())
    }

    /// Get all devices enumerated by the API.
    pub fn devices_mut(&mut self) -> Result<impl Iterator<Item = &mut A::Device>, A::Error> {
        if self.api.needs_enumeration() {
            self.api.enumerate(&mut self.devices)?;
        }
        Ok(self.devices.iter_mut())
    }

    /// Create a [`MultiRenderer`] from a single device.
    ///
    /// This a convenience function to deal with the same types even, if you only need one device.
    /// Because no copies are necessary in these cases, all extra arguments can be omitted.
    #[instrument(level = "trace", parent = &self.span, skip(self))]
    #[profiling::function]
    pub fn single_renderer<'api>(
        &'api mut self,
        device: &DrmNode,
    ) -> Result<MultiRenderer<'api, 'api, A, A>, Error<A, A>> {
        if !self.devices.iter().any(|dev| dev.node() == device) || self.api.needs_enumeration() {
            self.api
                .enumerate(&mut self.devices)
                .map_err(Error::RenderApiError)?;
        }

        if !self.devices.iter().any(|dev| dev.node() == device) {
            return Err(Error::NoDevice(*device));
        }

        let (mut render, others) = self
            .devices
            .iter_mut()
            .partition::<Vec<_>, _>(|dev| dev.node() == device);

        Ok(MultiRenderer {
            render: render.remove(0),
            target: None,
            other_renderers: others,
            span: tracing::Span::current(),
        })
    }

    /// Create a [`MultiRenderer`].
    ///
    /// - `render_device` should referr to the gpu node rendering operations will take place upon.
    /// - `target_device` should referr to the gpu node the composited buffer will end up upon
    /// - `allocator` should referr to an `Allocator`, that works guaranteed with the `render_device`
    ///     to do offscreen composition on. Dma copies will be used, if buffers returned by the allocator
    ///     also work on the `target_device`.
    /// - `copy_format` denotes the format buffers will be allocated in for offscreen rendering.
    ///
    /// It is valid to pass the same devices for both, but you *should* use [`GpuManager::single_renderer`] in those cases.
    #[instrument(level = "trace", parent = &self.span, skip(self))]
    #[profiling::function]
    pub fn renderer<'api>(
        &'api mut self,
        render_device: &DrmNode,
        target_device: &DrmNode,
        copy_format: Fourcc,
    ) -> Result<MultiRenderer<'api, 'api, A, A>, Error<A, A>>
    where
        <A::Device as ApiDevice>::Renderer: Bind<Dmabuf>,
    {
        if !self.devices.iter().any(|device| device.node() == render_device)
            || !self.devices.iter().any(|device| device.node() == target_device)
            || self.api.needs_enumeration()
        {
            self.api
                .enumerate(&mut self.devices)
                .map_err(Error::RenderApiError)?;
        }

        if !self.devices.iter().any(|device| device.node() == render_device) {
            return Err(Error::NoDevice(*render_device));
        }
        if !self.devices.iter().any(|device| device.node() == target_device) {
            return Err(Error::NoDevice(*target_device));
        }

        let (mut render, others) = self
            .devices
            .iter_mut()
            .partition::<Vec<_>, _>(|device| device.node() == render_device);
        if target_device != render_device {
            let (mut target, others) = others
                .into_iter()
                .partition::<Vec<_>, _>(|device| device.node() == target_device);

            Ok(MultiRenderer {
                render: render.remove(0),
                target: Some(TargetData {
                    device: target.remove(0),
                    cached_buffer: self
                        .dmabuf_cache
                        .entry((*render_device, *target_device))
                        .or_default(),
                    format: copy_format,
                }),
                other_renderers: others,
                span: tracing::Span::current(),
            })
        } else {
            Ok(MultiRenderer {
                render: render.remove(0),
                target: None,
                other_renderers: others,
                span: tracing::Span::current(),
            })
        }
    }

    /// Create a [`MultiRenderer`] from two different [`GraphicsApi`]s.
    ///
    /// - `render_api` should be the [`GpuManager`] used for the `render_device`.
    /// - `target_api` should be the [`GpuManager`] used for the `target_device`.
    /// - `render_device` should referr to the gpu node rendering operations will take place upon.
    /// - `target_device` should referr to the gpu node the composited buffer will end up upon
    /// - `allocator` should referr to an `Allocator`, that works guaranteed with the `render_device`
    ///     to do offscreen composition on. Dma copies will be used, if buffers returned by the allocator
    ///     also work on the `target_device`.
    /// - `copy_format` denotes the format buffers will be allocated in for offscreen rendering.
    #[instrument(level = "trace", skip(render_api, target_api), follows_from = [&render_api.span, &target_api.span])]
    #[profiling::function]
    pub fn cross_renderer<'render, 'target, B: GraphicsApi, Alloc: Allocator>(
        render_api: &'render mut Self,
        target_api: &'target mut GpuManager<B>,
        render_device: &DrmNode,
        target_device: &DrmNode,
        copy_format: Fourcc,
    ) -> Result<MultiRenderer<'render, 'target, A, B>, Error<A, B>>
    where
        <A::Device as ApiDevice>::Renderer: Bind<Dmabuf>,
        <B::Device as ApiDevice>::Renderer: ImportDma,
    {
        if !render_api
            .devices
            .iter()
            .any(|device| device.node() == render_device)
            || render_api.api.needs_enumeration()
        {
            render_api
                .api
                .enumerate(&mut render_api.devices)
                .map_err(Error::RenderApiError)?;
        }

        if !target_api
            .devices
            .iter()
            .any(|device| device.node() == target_device)
            || target_api.api.needs_enumeration()
        {
            target_api
                .api
                .enumerate(&mut target_api.devices)
                .map_err(Error::TargetApiError)?;
        }

        if !render_api
            .devices
            .iter()
            .any(|device| device.node() == render_device)
        {
            return Err(Error::NoDevice(*render_device));
        }
        if !target_api
            .devices
            .iter()
            .any(|device| device.node() == target_device)
        {
            return Err(Error::NoDevice(*target_device));
        }

        let (mut render, others) = render_api
            .devices
            .iter_mut()
            .partition::<Vec<_>, _>(|device| device.node() == render_device);
        if target_device != render_device {
            let target = target_api
                .devices
                .iter_mut()
                .find(|device| device.node() == target_device)
                .unwrap();

            Ok(MultiRenderer {
                render: render.remove(0),
                target: Some(TargetData {
                    device: target,
                    cached_buffer: target_api
                        .dmabuf_cache
                        .entry((*render_device, *target_device))
                        .or_default(),
                    format: copy_format,
                }),
                other_renderers: others,
                span: tracing::Span::current(),
            })
        } else {
            Ok(MultiRenderer {
                render: render.remove(0),
                target: None,
                other_renderers: others,
                span: tracing::Span::current(),
            })
        }
    }

    /// Function for optimizing buffer imports across multiple gpus.
    ///
    /// If you are using [`MultiRenderer`]s do rendering of your client buffers,
    /// you can call `early_import` on commit to start necessary copy processes early.
    ///
    /// - `target` referrs to the gpu node, that the buffer needs to be accessable on later.
    ///    *Note*: Usually this will be **render**ing gpu of a [`MultiRenderer`]
    /// - `surface` is the wayland surface, whose buffer and subsurfaces buffers shall be imported
    ///
    /// Note: This will do nothing, if you are not using
    /// [`crate::backend::renderer::utils::on_commit_buffer_handler`]
    /// to let smithay handle buffer management.
    #[cfg(feature = "wayland_frontend")]
    #[instrument(level = "trace", parent = &self.span, skip(self))]
    #[profiling::function]
    pub fn early_import(&mut self, target: DrmNode, surface: &WlSurface) -> Result<(), Error<A, A>>
    where
        A: 'static,
        <A::Device as ApiDevice>::Renderer: ImportMemWl + ImportDmaWl + ExportMem,
        <<A::Device as ApiDevice>::Renderer as ExportMem>::TextureMapping: 'static,
    {
        use crate::{
            backend::renderer::utils::RendererSurfaceStateUserData,
            wayland::compositor::{with_surface_tree_upward, TraversalAction},
        };

        if self.devices.is_empty() {
            return Err(Error::DeviceMissing);
        }

        let mut result = Ok(());
        with_surface_tree_upward(
            surface,
            (),
            |_surface, states, _| {
                if let Some(data) = states.data_map.get::<RendererSurfaceStateUserData>() {
                    let mut data_ref = data.lock().unwrap();
                    let data = &mut *data_ref;
                    if data.textures.is_empty() {
                        // Import a new buffer if available
                        if let Some(buffer) = data.buffer.as_ref() {
                            // We do an optimistic optimization here, so contrary to many much more defensive damage-tracking algorithms,
                            // we only import the most recent set of damage here.
                            // If we need more on rendering - which we cannot know at this point - we will call import_missing later
                            // to receive the rest.
                            // FIXME: We should be able to get rid of this allocation here
                            let buffer_damage = data.damage().raw().take(1).flatten().cloned().fold(
                                Vec::<Rectangle<i32, BufferCoords>>::new(),
                                |damage, mut rect| {
                                    // replace with drain_filter, when that becomes stable to reuse the original Vec's memory
                                    let (overlapping, mut new_damage): (Vec<_>, Vec<_>) = damage
                                        .into_iter()
                                        .partition(|other| other.overlaps_or_touches(rect));

                                    for overlap in overlapping {
                                        rect = rect.merge(overlap);
                                    }
                                    new_damage.push(rect);
                                    new_damage
                                },
                            );

                            if let Err(err) = self.early_import_buffer(target, buffer, states, &buffer_damage)
                            {
                                result = Err(err);
                            }
                        }
                        // Now, was the import successful?
                        if result.is_ok() {
                            TraversalAction::DoChildren(())
                        } else {
                            // we are not displayed, so our children are neither
                            TraversalAction::SkipChildren
                        }
                    } else {
                        TraversalAction::SkipChildren
                    }
                } else {
                    TraversalAction::SkipChildren
                }
            },
            |_, _, _| {},
            |_, _, _| true,
        );

        result
    }

    #[cfg(feature = "wayland_frontend")]
    #[profiling::function]
    fn early_import_buffer(
        &mut self,
        target_node: DrmNode,
        buffer: &wl_buffer::WlBuffer,
        surface: &SurfaceData,
        damage: &[Rectangle<i32, BufferCoords>],
    ) -> Result<(), Error<A, A>>
    where
        A: 'static,
        <A::Device as ApiDevice>::Renderer: ImportMemWl + ImportDmaWl + ExportMem,
        <<A::Device as ApiDevice>::Renderer as ExportMem>::TextureMapping: 'static,
    {
        match buffer_type(buffer) {
            Some(BufferType::Dma) => {
                let dmabuf = get_dmabuf(buffer).unwrap();
                let mut texture = MultiTexture::from_surface(Some(surface), dmabuf.size(), dmabuf.format());

                if !self.devices.iter().any(|device| target_node == *device.node()) {
                    return Err(Error::DeviceMissing);
                }

                let mut devices = self.devices.iter_mut();
                let first = devices.next().unwrap();

                let src_node = import_on_src_node(dmabuf, Some(damage), &mut texture, first, None, devices)?;
                if src_node != target_node {
                    let target_id = self
                        .devices
                        .iter()
                        .find_map(|dev| (*dev.node() == target_node).then(|| dev.renderer().context_id()))
                        .unwrap();
                    let src_id = self
                        .devices
                        .iter()
                        .find_map(|dev| {
                            (*dev.node() == src_node).then(|| dev.renderer().context_id().erased())
                        })
                        .unwrap();

                    let texture_internal = texture.0.lock().unwrap();
                    {
                        let target_texture = texture_internal.textures.get(&target_id.erased());
                        if !matches!(
                            target_texture,
                            Some(GpuSingleTexture::Mem {
                                external_shadow: None,
                                ..
                            })
                        ) {
                            // if we don't have a mem texture, either
                            // - we don't have any import, in that case lets figure this out during first import
                            // - we have a dma shadow copy, in that case we can't copy without invalidating the shadow buffer
                            //   if the damage doesn't match up with what we be requested during the next import.
                            // - this also applies if it is an external copy, so we additionally need a shadow buffer before reading from memory.
                            return Ok(());
                        }
                    }

                    // if we do need to do a memory copy, we start with the export early

                    let src_texture = match texture_internal.textures.get(&src_id).unwrap() {
                        GpuSingleTexture::Direct(tex) => tex
                            .downcast_ref::<<<A::Device as ApiDevice>::Renderer as RendererSuper>::TextureId>(
                            )
                            .unwrap(),
                        _ => unreachable!(),
                    };

                    let format = src_texture
                        .format()
                        // Check the target device supports the format, if not use 8-bit format
                        .filter(|format| {
                            self.devices
                                .iter()
                                .find(|device| target_node == *device.node())
                                .unwrap()
                                .renderer()
                                .mem_formats()
                                .any(|fmt| fmt == *format)
                        })
                        // The GL spec *always* supports this format.
                        // TODO: Re-evaluate this, once we support vulkan
                        .unwrap_or(Fourcc::Abgr8888);

                    let src_device = self
                        .devices
                        .iter_mut()
                        .find(|device| src_node == *device.node())
                        .unwrap();

                    let mappings = {
                        let damage = damage
                            .iter()
                            .flat_map(|rect| rect.intersection(Rectangle::from_size(src_texture.size())))
                            .fold(Vec::<Rectangle<i32, BufferCoords>>::new(), |damage, mut rect| {
                                // replace with drain_filter, when that becomes stable to reuse the original Vec's memory
                                let (overlapping, mut new_damage): (Vec<_>, Vec<_>) = damage
                                    .into_iter()
                                    .partition(|other| other.overlaps_or_touches(rect));

                                for overlap in overlapping {
                                    rect = rect.merge(overlap);
                                }
                                new_damage.push(rect);
                                new_damage
                            });
                        damage
                            .iter()
                            .copied()
                            .map(|damage| {
                                let mapping = src_device
                                    .renderer_mut()
                                    .copy_texture(src_texture, damage, format)
                                    .map_err(Error::Target)?;
                                Ok((damage, mapping))
                            })
                            .collect::<Result<Vec<_>, Error<A, A>>>()?
                    };

                    std::mem::drop(texture_internal);
                    texture.insert_mapping::<A, A, _>(
                        src_node,
                        &target_id,
                        texture.size(),
                        mappings.into_iter(),
                    );
                    surface.data_map.insert_if_missing_threadsafe(|| texture.0);
                }

                Ok(())
            }
            #[cfg(all(
                feature = "wayland_frontend",
                feature = "backend_egl",
                feature = "use_system_lib"
            ))]
            Some(BufferType::Egl) => {
                // we need specialization for requiring ImportEGL
                // or require ImportAll, which will block this function for all
                // renderers that cannot import egl buffers, so we just don't
                // and sadly go the slow path
                Ok(())
            }
            Some(BufferType::Shm) => {
                // we just need to upload in import_shm_buffer
                Ok(())
            }
            Some(BufferType::SinglePixel) => {
                // no need to do anything
                Ok(())
            }
            None => {
                // welp, nothing we can do
                Ok(())
            }
        }
    }
}

/// A graphics api, that supports enumerating graphics devices
pub trait GraphicsApi {
    /// Devices this api produces
    type Device: ApiDevice;
    /// Errors this api returns
    type Error: std::error::Error;

    /// Enumerate available devices by:
    /// - removing gone devices from list
    /// - adding new devices to list
    ///
    /// Existing devices are guranteed to be not recreated
    fn enumerate(&self, list: &mut Vec<Self::Device>) -> Result<(), Self::Error>;
    /// Method to force a re-enumeration, e.g. to free resources
    fn needs_enumeration(&self) -> bool {
        false
    }
    /// Unique name for representing the api type in log messages
    fn identifier() -> &'static str;
}

/// A device produced by a [`GraphicsApi`].
pub trait ApiDevice: fmt::Debug {
    /// The [`Renderer`] this devices contains
    type Renderer: Renderer;

    /// Returns a reference to the underlying renderer
    fn renderer(&self) -> &Self::Renderer;
    /// Returns a mutable reference to the underlying renderer
    fn renderer_mut(&mut self) -> &mut Self::Renderer;
    /// Returns an allocator for buffers renderers of this device can sample from and render to
    fn allocator(&mut self) -> &mut dyn Allocator<Buffer = Dmabuf, Error = AnyError>;

    /// Returns a [`DrmNode`] representing the graphics device
    fn node(&self) -> &DrmNode;

    /// Returns whether the underlying renderer can in principle do cross-device imports.
    /// (With no guarantee on being able to import a specific buffer.)
    fn can_do_cross_device_imports(&self) -> bool;
}

/// Renderer, that transparently copies rendering results to another gpu,
/// as well as transparently importing client buffers residing on different gpus.
pub struct MultiRenderer<'render, 'target, R: GraphicsApi, T: GraphicsApi> {
    render: &'render mut R::Device,
    target: Option<TargetData<'target, T>>,
    other_renderers: Vec<&'render mut R::Device>,
    span: tracing::Span,
}

impl<R: GraphicsApi, T: GraphicsApi> fmt::Debug for MultiRenderer<'_, '_, R, T> {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        f.debug_struct("MultiRenderer")
            .field("render", &self.render)
            .field("target", &self.target)
            .field("other_renderers", &self.other_renderers)
            .finish()
    }
}

impl<R: GraphicsApi, T: GraphicsApi> AsRef<<R::Device as ApiDevice>::Renderer>
    for MultiRenderer<'_, '_, R, T>
{
    fn as_ref(&self) -> &<R::Device as ApiDevice>::Renderer {
        self.render.renderer()
    }
}

impl<R: GraphicsApi, T: GraphicsApi> AsMut<<R::Device as ApiDevice>::Renderer>
    for MultiRenderer<'_, '_, R, T>
{
    fn as_mut(&mut self) -> &mut <R::Device as ApiDevice>::Renderer {
        self.render.renderer_mut()
    }
}

/// A Framebuffer of a [`MultiRenderer`].
pub struct MultiFramebuffer<'buffer, R: GraphicsApi, T: GraphicsApi>(MultiFramebufferInternal<'buffer, R, T>);
enum MultiFramebufferInternal<'buffer, R: GraphicsApi, T: GraphicsApi> {
    Render(<<R::Device as ApiDevice>::Renderer as RendererSuper>::Framebuffer<'buffer>),
    Target(<<T::Device as ApiDevice>::Renderer as RendererSuper>::Framebuffer<'buffer>),
}

impl<'buffer, R: GraphicsApi, T: GraphicsApi> fmt::Debug for MultiFramebuffer<'buffer, R, T>
where
    <<R::Device as ApiDevice>::Renderer as RendererSuper>::Framebuffer<'buffer>: fmt::Debug,
    <<T::Device as ApiDevice>::Renderer as RendererSuper>::Framebuffer<'buffer>: fmt::Debug,
{
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match &self.0 {
            MultiFramebufferInternal::Render(framebuffer) => framebuffer.fmt(f),
            MultiFramebufferInternal::Target(framebuffer) => framebuffer.fmt(f),
        }
    }
}

impl<R: GraphicsApi, T: GraphicsApi> Texture for MultiFramebuffer<'_, R, T> {
    fn size(&self) -> Size<i32, BufferCoords> {
        match &self.0 {
            MultiFramebufferInternal::Render(framebuffer) => framebuffer.size(),
            MultiFramebufferInternal::Target(framebuffer) => framebuffer.size(),
        }
    }

    fn width(&self) -> u32 {
        match &self.0 {
            MultiFramebufferInternal::Render(framebuffer) => framebuffer.width(),
            MultiFramebufferInternal::Target(framebuffer) => framebuffer.width(),
        }
    }

    fn height(&self) -> u32 {
        match &self.0 {
            MultiFramebufferInternal::Render(framebuffer) => framebuffer.height(),
            MultiFramebufferInternal::Target(framebuffer) => framebuffer.height(),
        }
    }

    fn format(&self) -> Option<Fourcc> {
        match &self.0 {
            MultiFramebufferInternal::Render(framebuffer) => framebuffer.format(),
            MultiFramebufferInternal::Target(framebuffer) => framebuffer.format(),
        }
    }
}

/// [`Frame`] implementation of a [`MultiRenderer`].
///
/// Leaking the frame will potentially keep it from doing necessary copies
/// of the internal framebuffer for some multi-gpu configurations. The result would
/// be no updated framebuffer contents.
/// Additionally, all problems related to the Frame-implementation of the underlying
/// [`GraphicsApi`] will be present.
pub struct MultiFrame<'render, 'target, 'frame, 'buffer, R: GraphicsApi, T: GraphicsApi>
where
    'buffer: 'frame,
    R: 'static,
    R::Error: 'static,
    T::Error: 'static,
    <R::Device as ApiDevice>::Renderer: ExportMem + ImportDma + ImportMem,
    <T::Device as ApiDevice>::Renderer: ImportDma + ImportMem,
    <<R::Device as ApiDevice>::Renderer as RendererSuper>::Error: 'static,
    <<T::Device as ApiDevice>::Renderer as RendererSuper>::Error: 'static,
{
    node: DrmNode,

    frame: Option<<<R::Device as ApiDevice>::Renderer as RendererSuper>::Frame<'frame, 'buffer>>,
    framebuffer:
        Option<AliasableBox<<<R::Device as ApiDevice>::Renderer as RendererSuper>::Framebuffer<'frame>>>,
    target: Option<TargetFrameData<'target, 'frame, 'buffer, T>>,
    render: *mut &'render mut R::Device,

    dst_transform: Transform,
    size: Size<i32, Physical>,
    damage: Vec<Rectangle<i32, Physical>>,
    span: tracing::span::EnteredSpan,
}

struct TargetData<'target, T: GraphicsApi> {
    device: &'target mut T::Device,
    cached_buffer: &'target mut Option<(bool, Dmabuf)>,
    format: Fourcc,
}

struct TargetFrameData<'target, 'frame, 'buffer, T: GraphicsApi> {
    device: &'frame mut &'target mut T::Device,
    framebuffer: &'frame mut <<T::Device as ApiDevice>::Renderer as RendererSuper>::Framebuffer<'buffer>,
    texture: Option<<<T::Device as ApiDevice>::Renderer as RendererSuper>::TextureId>,
    format: Fourcc,
}

impl<'frame, 'buffer, R: GraphicsApi + 'frame, T: GraphicsApi> fmt::Debug
    for MultiFrame<'_, '_, 'frame, 'buffer, R, T>
where
    'buffer: 'frame,
    R: 'static,
    R::Error: 'static,
    T::Error: 'static,
    <R::Device as ApiDevice>::Renderer: ExportMem + ImportDma + ImportMem,
    <T::Device as ApiDevice>::Renderer: ImportDma + ImportMem,
    <<R::Device as ApiDevice>::Renderer as RendererSuper>::Error: 'static,
    <<T::Device as ApiDevice>::Renderer as RendererSuper>::Error: 'static,
    <<T::Device as ApiDevice>::Renderer as RendererSuper>::TextureId: fmt::Debug,
    R::Device: fmt::Debug,
    T::Device: fmt::Debug,
{
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        f.debug_struct("MultiFrame")
            .field("node", &self.node)
            .field("render", unsafe { &*self.render })
            .field("target", &self.target)
            .field("dst_transform", &self.dst_transform)
            .field("size", &self.size)
            .field("damage", &self.damage)
            .finish()
    }
}

impl<T: GraphicsApi> fmt::Debug for TargetData<'_, T>
where
    T::Device: fmt::Debug,
    <<T::Device as ApiDevice>::Renderer as RendererSuper>::TextureId: fmt::Debug,
{
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        f.debug_struct("TargetData")
            .field("device", self.device)
            .field("cached_buffer", &self.cached_buffer)
            .field("format", &self.format)
            .finish()
    }
}

impl<T: GraphicsApi> fmt::Debug for TargetFrameData<'_, '_, '_, T>
where
    T::Device: fmt::Debug,
    <<T::Device as ApiDevice>::Renderer as RendererSuper>::TextureId: fmt::Debug,
{
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        f.debug_struct("TargetFrameData")
            .field("device", self.device)
            .field("format", &self.format)
            .finish_non_exhaustive()
    }
}

// These casts are ok, because the frame cannot outlive the MultiFrame,
// see MultiRenderer::render for how this hack works and why it is necessary.

impl<'frame, 'buffer, R: GraphicsApi, T: GraphicsApi>
    AsRef<<<R::Device as ApiDevice>::Renderer as RendererSuper>::Frame<'frame, 'buffer>>
    for MultiFrame<'_, '_, 'frame, 'buffer, R, T>
where
    'buffer: 'frame,
    R: 'static,
    R::Error: 'static,
    T::Error: 'static,
    <R::Device as ApiDevice>::Renderer: ExportMem + ImportDma + ImportMem,
    <T::Device as ApiDevice>::Renderer: ImportDma + ImportMem,
    <<R::Device as ApiDevice>::Renderer as RendererSuper>::Error: 'static,
    <<T::Device as ApiDevice>::Renderer as RendererSuper>::Error: 'static,
{
    fn as_ref(&self) -> &<<R::Device as ApiDevice>::Renderer as RendererSuper>::Frame<'frame, 'buffer> {
        self.frame.as_ref().unwrap()
    }
}

impl<'frame, 'buffer, R: GraphicsApi, T: GraphicsApi>
    AsMut<<<R::Device as ApiDevice>::Renderer as RendererSuper>::Frame<'frame, 'buffer>>
    for MultiFrame<'_, '_, 'frame, 'buffer, R, T>
where
    'buffer: 'frame,
    R: 'static,
    R::Error: 'static,
    T::Error: 'static,
    <R::Device as ApiDevice>::Renderer: ExportMem + ImportDma + ImportMem,
    <T::Device as ApiDevice>::Renderer: ImportDma + ImportMem,
    <<R::Device as ApiDevice>::Renderer as RendererSuper>::Error: 'static,
    <<T::Device as ApiDevice>::Renderer as RendererSuper>::Error: 'static,
{
    fn as_mut(
        &mut self,
    ) -> &mut <<R::Device as ApiDevice>::Renderer as RendererSuper>::Frame<'frame, 'buffer> {
        self.frame.as_mut().unwrap()
    }
}

impl<R: GraphicsApi, T: GraphicsApi, Target> Offscreen<Target> for MultiRenderer<'_, '_, R, T>
where
    <T::Device as ApiDevice>::Renderer: Offscreen<Target>,
    <R::Device as ApiDevice>::Renderer: Offscreen<Target>,
    // We need these because the Bind-impl does and Offscreen requires Bind
    <T::Device as ApiDevice>::Renderer: Bind<Target>,
    <R::Device as ApiDevice>::Renderer: Bind<Target>,
    // We need these because the Renderer-impl does and Offscreen requires Bind, which requires Unbind, which requires Renderer
    R: 'static,
    R::Error: 'static,
    T::Error: 'static,
    <R::Device as ApiDevice>::Renderer: Bind<Dmabuf> + ExportMem + ImportDma + ImportMem,
    <T::Device as ApiDevice>::Renderer: ImportDma + ImportMem,
    <<R::Device as ApiDevice>::Renderer as RendererSuper>::TextureId: Clone + Send,
    <<R::Device as ApiDevice>::Renderer as RendererSuper>::Error: 'static,
    <<T::Device as ApiDevice>::Renderer as RendererSuper>::Error: 'static,
{
    #[instrument(level = "trace", parent = &self.span, skip(self))]
    #[profiling::function]
    fn create_buffer(
        &mut self,
        format: Fourcc,
        size: Size<i32, BufferCoords>,
    ) -> Result<Target, <Self as RendererSuper>::Error> {
        if let Some(target) = self.target.as_mut() {
            target
                .device
                .renderer_mut()
                .create_buffer(format, size)
                .map_err(Error::Target)
        } else {
            self.render
                .renderer_mut()
                .create_buffer(format, size)
                .map_err(Error::Render)
        }
    }
}

impl<R: GraphicsApi, T: GraphicsApi, Target> Bind<Target> for MultiRenderer<'_, '_, R, T>
where
    <T::Device as ApiDevice>::Renderer: Bind<Target>,
    <R::Device as ApiDevice>::Renderer: Bind<Target>,
    // We need this because the Renderer-impl does and Bind requires Unbind, which requires Renderer
    R: 'static,
    R::Error: 'static,
    T::Error: 'static,
    <R::Device as ApiDevice>::Renderer: Bind<Dmabuf> + ExportMem + ImportDma + ImportMem,
    <T::Device as ApiDevice>::Renderer: ImportDma + ImportMem,
    <<R::Device as ApiDevice>::Renderer as RendererSuper>::TextureId: Clone + Send,
    <<R::Device as ApiDevice>::Renderer as RendererSuper>::Error: 'static,
    <<T::Device as ApiDevice>::Renderer as RendererSuper>::Error: 'static,
{
    #[instrument(level = "trace", parent = &self.span, skip(self, bind))]
    #[profiling::function]
    fn bind<'a>(
        &mut self,
        bind: &'a mut Target,
    ) -> Result<<Self as RendererSuper>::Framebuffer<'a>, <Self as RendererSuper>::Error> {
        if let Some(target) = self.target.as_mut() {
            target
                .device
                .renderer_mut()
                .bind(bind)
                .map(MultiFramebufferInternal::Target)
                .map(MultiFramebuffer)
                .map_err(Error::Target)
        } else {
            self.render
                .renderer_mut()
                .bind(bind)
                .map(MultiFramebufferInternal::Render)
                .map(MultiFramebuffer)
                .map_err(Error::Render)
        }
    }

    fn supported_formats(&self) -> Option<FormatSet> {
        if let Some(target) = self.target.as_ref() {
            Bind::<Target>::supported_formats(target.device.renderer())
        } else {
            Bind::<Target>::supported_formats(self.render.renderer())
        }
    }
}

const MAX_CPU_COPIES: usize = 3; // TODO, benchmark this

impl<'render, 'target, R: GraphicsApi, T: GraphicsApi> RendererSuper for MultiRenderer<'render, 'target, R, T>
where
    R: 'static,
    R::Error: 'static,
    T::Error: 'static,
    <R::Device as ApiDevice>::Renderer: Bind<Dmabuf> + ExportMem + ImportDma + ImportMem,
    <T::Device as ApiDevice>::Renderer: ImportDma + ImportMem,
    <<R::Device as ApiDevice>::Renderer as RendererSuper>::TextureId: Clone + Send,
    <<R::Device as ApiDevice>::Renderer as RendererSuper>::Error: 'static,
    <<T::Device as ApiDevice>::Renderer as RendererSuper>::Error: 'static,
{
    type Error = Error<R, T>;
    type TextureId = MultiTexture;
    type Framebuffer<'buffer> = MultiFramebuffer<'buffer, R, T>;
    type Frame<'frame, 'buffer>
        = MultiFrame<'render, 'target, 'frame, 'buffer, R, T>
    where
        'buffer: 'frame,
        Self: 'frame;
}

impl<'render, 'target, R: GraphicsApi, T: GraphicsApi> Renderer for MultiRenderer<'render, 'target, R, T>
where
    R: 'static,
    R::Error: 'static,
    T::Error: 'static,
    <R::Device as ApiDevice>::Renderer: Bind<Dmabuf> + ExportMem + ImportDma + ImportMem,
    <T::Device as ApiDevice>::Renderer: ImportDma + ImportMem,
    <<R::Device as ApiDevice>::Renderer as RendererSuper>::TextureId: Clone + Send,
    <<R::Device as ApiDevice>::Renderer as RendererSuper>::Error: 'static,
    <<T::Device as ApiDevice>::Renderer as RendererSuper>::Error: 'static,
{
    fn context_id(&self) -> ContextId<MultiTexture> {
        self.render.renderer().context_id().map()
    }

    fn downscale_filter(&mut self, filter: TextureFilter) -> Result<(), Self::Error> {
        self.render
            .renderer_mut()
            .downscale_filter(filter)
            .map_err(Error::Render)
    }
    fn upscale_filter(&mut self, filter: TextureFilter) -> Result<(), Self::Error> {
        self.render
            .renderer_mut()
            .upscale_filter(filter)
            .map_err(Error::Render)
    }

    fn set_debug_flags(&mut self, flags: DebugFlags) {
        self.render.renderer_mut().set_debug_flags(flags)
    }
    fn debug_flags(&self) -> DebugFlags {
        self.render.renderer().debug_flags()
    }

    #[instrument(level = "trace", parent = &self.span, skip(self, framebuffer))]
    #[profiling::function]
    fn render<'frame, 'buffer>(
        &'frame mut self,
        framebuffer: &'frame mut Self::Framebuffer<'buffer>,
        size: Size<i32, Physical>,
        dst_transform: Transform,
    ) -> Result<MultiFrame<'render, 'target, 'frame, 'buffer, R, T>, Self::Error>
    where
        'buffer: 'frame,
    {
        let target_state = if let Some(target) = self.target.as_mut() {
            let buffer_size = size.to_logical(1).to_buffer(1, Transform::Normal);

            if let Some((_, dmabuf)) = &target.cached_buffer {
                if dmabuf.size() != buffer_size || BufferTrait::format(dmabuf).code != target.format {
                    *target.cached_buffer = None;
                }
            };

            if target.cached_buffer.is_none() {
                match create_shared_dma_framebuffer::<R, T>(buffer_size, self.render, target) {
                    Ok(dmabuf) => {
                        *target.cached_buffer = Some((true, dmabuf));
                    }
                    Err(err) => {
                        warn!(
                            "Error importing dmabuf from {} to {}: {}",
                            *self.render.node(),
                            *target.device.node(),
                            err
                        );
                        info!("Falling back to cpu-copy.");
                        let modifiers = Bind::<Dmabuf>::supported_formats(self.render.renderer())
                            .unwrap_or_default()
                            .into_iter()
                            .filter(|format| format.code == target.format)
                            .map(|f| f.modifier)
                            .collect::<Vec<_>>();
                        let mut dmabuf = self
                            .render
                            .allocator()
                            .create_buffer(
                                buffer_size.w as u32,
                                buffer_size.h as u32,
                                target.format,
                                &modifiers,
                            )
                            .map_err(Error::AllocatorError)?;

                        {
                            // make sure we mark this as a framebuffer on render first (some GL drivers don't like us to do this later).
                            let mut render_framebuffer = self
                                .render
                                .renderer_mut()
                                .bind(&mut dmabuf)
                                .map_err(Error::Render)?;
                            self.render
                                .renderer_mut()
                                .render(&mut render_framebuffer, size, dst_transform)
                                .map_err(Error::Render)?;
                            // drop everything
                        }

                        *target.cached_buffer = Some((false, dmabuf));
                    }
                }
            };

            // try to import on target node
            let (direct, ref mut dmabuf) = target.cached_buffer.as_mut().unwrap();
            // TODO: We could cache that texture all the way back to the GpuManager in a HashMap<WeakDmabuf, Texture>.
            let texture = (*direct)
                .then(|| {
                    target
                        .device
                        .renderer_mut()
                        .import_dmabuf(dmabuf, Some(&[Rectangle::from_size(buffer_size)]))
                        .map_err(Error::Target)
                })
                .transpose()?;
            let framebuffer = self.render.renderer_mut().bind(dmabuf).map_err(Error::Render)?;

            Some((&mut target.device, framebuffer, texture, target.format))
        } else {
            None
        };

        let node = *self.render.node();
        let ptr = &mut self.render as *mut _;

        let mut target = None;
        let mut new_framebuffer = None;
        let frame = match &mut framebuffer.0 {
            MultiFramebufferInternal::Render(framebuffer) => self
                .render
                .renderer_mut()
                .render(framebuffer, size, dst_transform)
                .map_err(Error::Render)?,
            MultiFramebufferInternal::Target(target_framebuffer) => {
                let (target_device, render_framebuffer, texture, format) = target_state.unwrap();
                target = Some(TargetFrameData {
                    device: target_device,
                    framebuffer: target_framebuffer,
                    texture,
                    format,
                });
                let mut render_framebuffer = AliasableBox::from_unique(Box::new(render_framebuffer));

                // We extend the lifetime to 'frame, because this is self-referencial.
                // SAFETY:
                //  - We drop the framebuffer before `target`, which contains the referenced dmabuf
                //  - We drop the frame before the framebuffer as we store both in `MultiFrame`
                //  - `Frame` can't store an invalid pointer into the framebuffer, as the framebuffer is moved
                //    to the heap and won't be moved by the compiler thanks to `AliasableBox`.
                let frame = unsafe {
                    std::mem::transmute::<
                        <<R::Device as ApiDevice>::Renderer as RendererSuper>::Frame<'_, '_>,
                        <<R::Device as ApiDevice>::Renderer as RendererSuper>::Frame<'frame, 'buffer>,
                    >(
                        self.render
                            .renderer_mut()
                            .render(&mut *render_framebuffer, size, dst_transform)
                            .map_err(Error::Render)?,
                    )
                };
                new_framebuffer = Some(render_framebuffer);
                frame
            }
        };

        let span = trace_span!(
            parent: &self.span,
            "renderer_multi_frame",
            direct = target.as_ref().is_some_and(|t| t.texture.is_some()),
        )
        .entered();

        Ok(MultiFrame {
            node,
            frame: Some(frame),
            framebuffer: new_framebuffer,
            render: ptr, // this is fine, as long as we have the frame, this ptr is valid
            target,
            dst_transform,
            size,
            damage: Vec::new(),
            span,
        })
    }

    #[profiling::function]
    fn wait(&mut self, sync: &sync::SyncPoint) -> Result<(), Self::Error> {
        self.render.renderer_mut().wait(sync).map_err(Error::Render)
    }

    #[profiling::function]
    fn cleanup_texture_cache(&mut self) -> Result<(), Self::Error> {
        if let Some(target) = self.target.as_mut() {
            target
                .device
                .renderer_mut()
                .cleanup_texture_cache()
                .map_err(Error::Target)?;
        }
        self.render
            .renderer_mut()
            .cleanup_texture_cache()
            .map_err(Error::Render)?;
        Ok(())
    }
}

fn create_shared_dma_framebuffer<R, T: GraphicsApi>(
    buffer_size: Size<i32, BufferCoords>,
    src: &mut R::Device,
    target: &mut TargetData<'_, T>,
) -> Result<Dmabuf, Error<R, T>>
where
    R: GraphicsApi + 'static,
    R::Error: 'static,
    T::Error: 'static,
    <R::Device as ApiDevice>::Renderer: Bind<Dmabuf> + ExportMem + ImportDma + ImportMem,
    <T::Device as ApiDevice>::Renderer: ImportDma + ImportMem,
    <<R::Device as ApiDevice>::Renderer as RendererSuper>::Error: 'static,
    <<T::Device as ApiDevice>::Renderer as RendererSuper>::Error: 'static,
{
    if !target.device.can_do_cross_device_imports() {
        return Err(Error::ImportFailed);
    }

    let target_formats = ImportDma::dmabuf_formats(target.device.renderer())
        .iter()
        .filter(|format| format.code == target.format)
        .copied()
        .collect::<FormatSet>();
    let render_formats = Bind::<Dmabuf>::supported_formats(src.renderer()).unwrap_or_default();
    let formats = target_formats.intersection(&render_formats);
    let target_modifiers = formats
        .map(|format| format.modifier)
        .filter(|modifier| *modifier != Modifier::Invalid)
        .collect::<Vec<_>>();

    if target_modifiers.is_empty() {
        return Err(Error::ImportFailed);
    }

    info!(
        "Found dma-copy set for {:?} <-> {:?}: {:?}@{:?}",
        src.node(),
        target.device.node(),
        target.format,
        target_modifiers,
    );

    let mut dmabuf = src
        .allocator()
        .create_buffer(
            buffer_size.w as u32,
            buffer_size.h as u32,
            target.format,
            &target_modifiers,
        )
        .map_err(Error::AllocatorError)?;

    // verify we can bind on src and import on target

    src.renderer_mut().bind(&mut dmabuf).map_err(Error::Render)?;

    target
        .device
        .renderer_mut()
        .import_dmabuf(&dmabuf, Some(&[Rectangle::from_size(buffer_size)]))
        .map_err(Error::Target)?;

    Ok(dmabuf)
}

impl<'frame, 'buffer, R: GraphicsApi, T: GraphicsApi> MultiFrame<'_, '_, 'frame, 'buffer, R, T>
where
    R: 'static,
    R::Error: 'static,
    T::Error: 'static,
    <R::Device as ApiDevice>::Renderer: ExportMem + ImportDma + ImportMem,
    <T::Device as ApiDevice>::Renderer: ImportDma + ImportMem,
    <<R::Device as ApiDevice>::Renderer as RendererSuper>::Error: 'static,
    <<T::Device as ApiDevice>::Renderer as RendererSuper>::Error: 'static,
{
    fn flush_frame(&mut self) -> Result<(), Error<R, T>> {
        if self.target.is_some() {
            let _ = self.finish_internal()?;
            // now the frame is gone, lets use our unholy ptr till the end of this call:
            // SAFETY:
            // - The renderer will never be invalid because the lifetime of the frame must be shorter than the renderer.
            // - The pointer can't be aliased because of the following:
            //   - Creating a frame requires an `&mut` reference to the renderer, making the mutable borrow safe.
            //   - The mutable reference is used in a function which mutably borrows the frame, that either being `.finish()`
            //      (which takes ownership of the frame) or dropping the frame.
            let render = unsafe { &mut *self.render };

            // We extend the lifetime to 'frame, because this is self-referencial.
            // SAFETY:
            //  - We drop the framebuffer before `target`, which contains the referenced dmabuf
            //  - We drop the frame before the framebuffer as we store both in `MultiFrame`
            //  - `Frame` can't store an invalid pointer into the framebuffer, as the framebuffer is moved
            //    to the heap and won't be moved by the compiler thanks to `AliasableBox`.
            let frame = unsafe {
                std::mem::transmute::<
                    <<R::Device as ApiDevice>::Renderer as RendererSuper>::Frame<'_, '_>,
                    <<R::Device as ApiDevice>::Renderer as RendererSuper>::Frame<'frame, 'buffer>,
                >(
                    render
                        .renderer_mut()
                        .render(self.framebuffer.as_mut().unwrap(), self.size, self.dst_transform)
                        .map_err(Error::Render)?,
                )
            };
            self.frame = Some(frame);
        }
        Ok(())
    }

    #[instrument(level = "trace", parent = &self.span, skip(self))]
    #[profiling::function]
    fn finish_internal(&mut self) -> Result<sync::SyncPoint, Error<R, T>> {
        if let Some(frame) = self.frame.take() {
            let sync = frame.finish().map_err(Error::Render)?;

            // now the frame is gone, lets use our unholy ptr till the end of this call:
            // SAFETY:
            // - The renderer will never be invalid because the lifetime of the frame must be shorter than the renderer.
            // - The pointer can't be aliased because of the following:
            //   - Creating a frame requires an `&mut` reference to the renderer, making the mutable borrow safe.
            //   - The mutable reference is used in a function which mutably borrows the frame, that either being `.finish()`
            //      (which takes ownership of the frame) or dropping the frame.
            let render = unsafe { &mut *self.render };

            let damage_area = self.dst_transform.transform_size(self.size.to_logical(1));
            let mut damage = std::mem::take(&mut self.damage)
                .into_iter()
                .map(|rect| rect.to_logical(1).to_buffer(1, self.dst_transform, &damage_area))
                .collect::<Vec<_>>();

            let buffer_size = self.size.to_logical(1).to_buffer(1, Transform::Normal);
            if let Some(target) = self.target.as_mut() {
                if let Some(texture) = target.texture.as_ref() {
                    // try gpu copy
                    let damage = damage
                        .iter()
                        .map(|rect| rect.to_logical(1, Transform::Normal, &buffer_size).to_physical(1))
                        .collect::<Vec<_>>();
                    let mut frame = target
                        .device
                        .renderer_mut()
                        .render(target.framebuffer, self.size, Transform::Normal)
                        .map_err(Error::Target)?;
                    frame.wait(&sync).map_err(Error::Target)?;
                    frame
                        .clear(Color32F::TRANSPARENT, &damage)
                        .map_err(Error::Target)?;
                    frame
                        .render_texture_from_to(
                            texture,
                            Rectangle::from_size(buffer_size).to_f64(),
                            Rectangle::from_size(self.size),
                            &damage,
                            &[Rectangle::from_size(self.size)],
                            Transform::Normal,
                            1.0,
                        )
                        .map_err(Error::Target)?;
                    let sync = frame.finish().map_err(Error::Target)?;
                    render
                        .renderer_mut()
                        .cleanup_texture_cache()
                        .map_err(Error::Render)?;

                    return Ok(sync);
                }

                let format = if target
                    .device
                    .renderer()
                    .mem_formats()
                    .any(|fmt| fmt == target.format)
                {
                    target.format
                } else {
                    // GL mandates this to be supported for downloading, this will truncate 10-bit data
                    Fourcc::Abgr8888
                };

                // cpu copy
                damage.dedup();
                damage.retain(|rect| rect.overlaps_or_touches(Rectangle::from_size(buffer_size)));
                damage.retain(|rect| rect.size.h > 0 && rect.size.w > 0);

                let mut copy_rects = // merge overlapping rectangles
                    damage.iter().cloned().fold(Vec::new(), |new_damage, mut rect| {
                        // replace with drain_filter, when that becomes stable to reuse the original Vec's memory
                        let (overlapping, mut new_damage): (Vec<_>, Vec<_>) = new_damage
                            .into_iter()
                            .partition(|other: &Rectangle<i32, BufferCoords>| other.overlaps_or_touches(rect));

                        for overlap in overlapping {
                            rect = rect.merge(overlap);
                        }
                        new_damage.push(rect);
                        new_damage
                    });
                if copy_rects.len() > MAX_CPU_COPIES {
                    copy_rects = Vec::from([Rectangle::from_size(buffer_size)]);
                }

                let mut mappings = Vec::new();
                for rect in copy_rects {
                    let mapping = (
                        ExportMem::copy_framebuffer(
                            render.renderer_mut(),
                            self.framebuffer.as_ref().unwrap(),
                            rect,
                            format,
                        )
                        .map_err(Error::Render)?,
                        rect,
                    );
                    mappings.push(mapping);
                }

                if mappings.is_empty() {
                    render
                        .renderer_mut()
                        .cleanup_texture_cache()
                        .map_err(Error::Render)?;
                    return Ok(sync::SyncPoint::signaled());
                }

                let textures = mappings
                    .into_iter()
                    .map(|(mapping, rect)| {
                        let slice = ExportMem::map_texture(render.renderer_mut(), &mapping)
                            .map_err(Error::Render::<R, T>)?;
                        let texture = target
                            .device
                            .renderer_mut()
                            .import_memory(slice, TextureMapping::format(&mapping), rect.size, false)
                            .map_err(Error::Target)?;
                        Ok((texture, rect))
                    })
                    .collect::<Result<Vec<_>, _>>()?;

                let mut frame = target
                    .device
                    .renderer_mut()
                    .render(target.framebuffer, self.size, Transform::Normal)
                    .map_err(Error::Target)?;
                for (texture, rect) in textures {
                    for damage_rect in damage.iter().filter_map(|dmg_rect| dmg_rect.intersection(rect)) {
                        let dst = damage_rect
                            .to_logical(1, Transform::Normal, &buffer_size)
                            .to_physical(1);
                        let src = Rectangle::new(damage_rect.loc - rect.loc, damage_rect.size).to_f64();
                        let damage = &[Rectangle::from_size(dst.size)];
                        frame
                            .clear(Color32F::TRANSPARENT, &[dst])
                            .map_err(Error::Target)?;
                        frame
                            .render_texture_from_to(
                                &texture,
                                src,
                                dst,
                                damage,
                                &[Rectangle::from_size(self.size)],
                                Transform::Normal,
                                1.0,
                            )
                            .map_err(Error::Target)?;
                    }
                }
                let sync = frame.finish().map_err(Error::Target)?;
                render
                    .renderer_mut()
                    .cleanup_texture_cache()
                    .map_err(Error::Render)?;
                return Ok(sync);
            }

            render
                .renderer_mut()
                .cleanup_texture_cache()
                .map_err(Error::Render)?;
            return Ok(sync);
        }

        Ok(sync::SyncPoint::signaled())
    }
}

impl<R: GraphicsApi, T: GraphicsApi> Drop for MultiFrame<'_, '_, '_, '_, R, T>
where
    R: 'static,
    R::Error: 'static,
    T::Error: 'static,
    <R::Device as ApiDevice>::Renderer: ExportMem + ImportDma + ImportMem,
    <T::Device as ApiDevice>::Renderer: ImportDma + ImportMem,
    <<R::Device as ApiDevice>::Renderer as RendererSuper>::Error: 'static,
    <<T::Device as ApiDevice>::Renderer as RendererSuper>::Error: 'static,
{
    fn drop(&mut self) {
        if let Err(err) = self.finish_internal() {
            warn!("Ignored error finishing MultiFrame on drop: {}", err);
        }
    }
}

/// [`Texture`]s produced by a [`MultiRenderer`].
#[derive(Debug, Clone)]
pub struct MultiTexture(Arc<Mutex<MultiTextureInternal>>);
#[derive(Debug)]
struct MultiTextureInternal {
    textures: HashMap<ErasedContextId, GpuSingleTexture>,
    size: Size<i32, BufferCoords>,
    format: Option<Fourcc>,
    #[allow(dead_code)]
    buffer_format: Format,
}
// SAFETY: We require `Send` for textures of renderers suitable for the MultiRenderer.
//  Type erasure just forces us to do this instead.
unsafe impl Send for MultiTextureInternal {}

type DamageAnyTextureMappings = Vec<(Rectangle<i32, BufferCoords>, Box<dyn Any + 'static>)>;

#[derive(Debug)]
enum GpuSingleTexture {
    Direct(Box<dyn Any + 'static>),
    Dma {
        texture: Box<dyn Any + 'static>,
        dmabuf: Dmabuf,
        sync: Option<SyncPoint>,
    },
    Mem {
        external_shadow: Option<(Dmabuf, Box<dyn Any + 'static>)>,
        texture: Option<Box<dyn Any + 'static>>,
        mappings: Option<(DrmNode, DamageAnyTextureMappings)>,
    },
}

impl MultiTexture {
    #[cfg(feature = "wayland_frontend")]
    #[profiling::function]
    fn from_surface(
        surface: Option<&crate::wayland::compositor::SurfaceData>,
        size: Size<i32, BufferCoords>,
        buffer_format: Format,
    ) -> MultiTexture {
        let internal = surface
            .and_then(|surface| {
                surface
                    .data_map
                    .get::<Arc<Mutex<MultiTextureInternal>>>()
                    .cloned()
            })
            .unwrap_or_else(|| {
                Arc::new(Mutex::new(MultiTextureInternal {
                    textures: HashMap::new(),
                    size,
                    format: None,
                    buffer_format,
                }))
            });
        {
            let mut internal = internal.lock().unwrap();
            if internal.size != size || internal.buffer_format != buffer_format {
                internal.textures.clear();
                internal.format = None;
                internal.size = size;
                internal.buffer_format = buffer_format;
            }
        }
        MultiTexture(internal)
    }

    fn new(size: Size<i32, BufferCoords>, buffer_format: Format) -> MultiTexture {
        MultiTexture(Arc::new(Mutex::new(MultiTextureInternal {
            textures: HashMap::new(),
            size,
            format: None,
            buffer_format,
        })))
    }

    /// Attempt to get a texture of type `T: Renderer::TextureId` given the renderer type `A` for the given `DrmNode`.
    ///
    /// Will return `None` if either:
    ///
    /// - No textures are available for the Renderer type `A``
    /// - No texture of type `T` is available for the given `DrmNode`
    pub fn get<A: GraphicsApi + 'static>(
        &self,
        render_id: &ContextId<<<A::Device as ApiDevice>::Renderer as RendererSuper>::TextureId>,
    ) -> Option<<<A::Device as ApiDevice>::Renderer as RendererSuper>::TextureId>
    where
        <<A::Device as ApiDevice>::Renderer as RendererSuper>::TextureId: Clone + Send + 'static,
    {
        let tex = self.0.lock().unwrap();
        tex.textures
            .get(&render_id.erased())
            .and_then(|texture| match texture {
                GpuSingleTexture::Direct(texture) => Some(texture),
                GpuSingleTexture::Dma { texture, .. } => Some(texture),
                GpuSingleTexture::Mem { texture, .. } => texture.as_ref(),
            })
            .and_then(|texture| {
                <dyn Any>::downcast_ref::<<<A::Device as ApiDevice>::Renderer as RendererSuper>::TextureId>(
                    &**texture,
                )
            })
            .cloned()
    }

    fn needs_synchronization<A: GraphicsApi + 'static>(
        &self,
        render_id: &ContextId<<<A::Device as ApiDevice>::Renderer as RendererSuper>::TextureId>,
    ) -> Option<SyncPoint> {
        let mut tex = self.0.lock().unwrap();
        tex.textures
            .get_mut(&render_id.erased())
            .and_then(|texture| match texture {
                GpuSingleTexture::Direct(_) => None,
                GpuSingleTexture::Dma { sync, .. } => sync.take(),
                GpuSingleTexture::Mem { .. } => None,
            })
    }

    fn insert_texture<A: GraphicsApi + 'static>(
        &mut self,
        render_id: &ContextId<<<A::Device as ApiDevice>::Renderer as RendererSuper>::TextureId>,
        texture: <<A::Device as ApiDevice>::Renderer as RendererSuper>::TextureId,
    ) where
        <<A::Device as ApiDevice>::Renderer as RendererSuper>::TextureId: 'static,
    {
        let mut tex = self.0.lock().unwrap();
        let format = texture.format();
        if format != tex.format && !tex.textures.is_empty() {
            warn!(has = ?tex.format, got = ?format, "Multi-SubTexture with wrong format!");
            return;
        }
        tex.format = format;

        let render_id = render_id.erased();
        trace!(
            "Inserting into: {:p} for {:?}: {:?}",
            Arc::as_ptr(&self.0),
            render_id,
            tex
        );
        tex.textures
            .insert(render_id, GpuSingleTexture::Direct(Box::new(texture) as Box<_>));
    }

    #[cfg(feature = "wayland_frontend")]
    fn insert_mapping<
        R: GraphicsApi + 'static,
        T: GraphicsApi + 'static,
        I: Iterator<
            Item = (
                Rectangle<i32, BufferCoords>,
                <<T::Device as ApiDevice>::Renderer as ExportMem>::TextureMapping,
            ),
        >,
    >(
        &mut self,
        source: DrmNode,
        render_id: &ContextId<<<R::Device as ApiDevice>::Renderer as RendererSuper>::TextureId>,
        size: Size<i32, BufferCoords>,
        new_mappings: I,
    ) where
        <T::Device as ApiDevice>::Renderer: ExportMem,
        <<T::Device as ApiDevice>::Renderer as ExportMem>::TextureMapping: 'static,
    {
        let mut tex_ref = self.0.lock().unwrap();
        let tex = &mut *tex_ref;

        let render_id = render_id.erased();
        let (old_texture, old_mapping, external_shadow) = tex
            .textures
            .remove(&render_id)
            .map(|single| match single {
                GpuSingleTexture::Mem {
                    texture,
                    mappings,
                    external_shadow,
                } => (texture, mappings, external_shadow),
                _ => (None, None, None),
            })
            .unwrap_or((None, None, None));
        let old_texture = old_texture.filter(|tex| {
            <dyn Any>::downcast_ref::<<<R::Device as ApiDevice>::Renderer as RendererSuper>::TextureId>(tex)
                .map(|tex| tex.size())
                == Some(size)
        });

        let mut mappings = old_mapping
            .filter(|(old_src, _)| *old_src == source)
            .map(|(_, mappings)| mappings)
            .unwrap_or_default();

        // don't keep old mappings that are superseeded by new ones
        let format = tex.format;
        let new_mappings = new_mappings
            .filter(|(_, mapping)| {
                let mapping_fmt = TextureMapping::format(mapping);
                if old_texture.is_some() && Some(mapping_fmt) != format {
                    warn!(has = ?format, got = ?mapping_fmt, "Multi-SubTexture Mapping with wrong format!");
                    false
                } else {
                    tex.format = Some(mapping_fmt);
                    true
                }
            })
            .map(|(r, m)| (r, Box::new(m) as Box<dyn Any + 'static>))
            .collect::<Vec<_>>();
        mappings.retain(|(region, _)| {
            !new_mappings
                .iter()
                .any(|(new_region, _)| new_region.contains_rect(*region))
        });
        mappings.extend(new_mappings);

        tex.textures.insert(
            render_id,
            GpuSingleTexture::Mem {
                mappings: Some((source, mappings)),
                texture: old_texture,
                external_shadow,
            },
        );
    }
}

impl Texture for MultiTexture {
    fn size(&self) -> Size<i32, BufferCoords> {
        self.0.lock().unwrap().size
    }
    fn width(&self) -> u32 {
        self.0.lock().unwrap().size.w as u32
    }
    fn height(&self) -> u32 {
        self.0.lock().unwrap().size.h as u32
    }
    fn format(&self) -> Option<Fourcc> {
        self.0.lock().unwrap().format
    }
}

impl<R: GraphicsApi, T: GraphicsApi> Frame for MultiFrame<'_, '_, '_, '_, R, T>
where
    R: 'static,
    R::Error: 'static,
    T::Error: 'static,
    <R::Device as ApiDevice>::Renderer: ExportMem + ImportDma + ImportMem,
    <T::Device as ApiDevice>::Renderer: ImportDma + ImportMem,
    <<R::Device as ApiDevice>::Renderer as RendererSuper>::TextureId: Clone + Send,
    <<R::Device as ApiDevice>::Renderer as RendererSuper>::Error: 'static,
    <<T::Device as ApiDevice>::Renderer as RendererSuper>::Error: 'static,
{
    type Error = Error<R, T>;
    type TextureId = MultiTexture;

    fn context_id(&self) -> ContextId<MultiTexture> {
        self.frame.as_ref().unwrap().context_id().map()
    }

    #[instrument(level = "trace", parent = &self.span, skip(self))]
    #[profiling::function]
    fn clear(&mut self, color: Color32F, at: &[Rectangle<i32, Physical>]) -> Result<(), Error<R, T>> {
        self.damage.extend(at);
        self.frame
            .as_mut()
            .unwrap()
            .clear(color, at)
            .map_err(Error::Render)
    }

    #[instrument(level = "trace", parent = &self.span, skip(self))]
    #[profiling::function]
    fn draw_solid(
        &mut self,
        dst: Rectangle<i32, Physical>,
        damage: &[Rectangle<i32, Physical>],
        color: Color32F,
    ) -> Result<(), Self::Error> {
        self.damage.extend(damage.iter().copied().map(|mut rect| {
            rect.loc += dst.loc;
            rect
        }));
        self.frame
            .as_mut()
            .unwrap()
            .draw_solid(dst, damage, color)
            .map_err(Error::Render)
    }

    #[instrument(level = "trace", parent = &self.span, skip(self))]
    #[profiling::function]
    fn render_texture_from_to(
        &mut self,
        texture: &MultiTexture,
        src: Rectangle<f64, BufferCoords>,
        dst: Rectangle<i32, Physical>,
        damage: &[Rectangle<i32, Physical>],
        opaque_regions: &[Rectangle<i32, Physical>],
        src_transform: Transform,
        alpha: f32,
    ) -> Result<(), Error<R, T>> {
        let render_id = self.frame.as_mut().unwrap().context_id();
        let sync = texture.needs_synchronization::<R>(&render_id);
        if let Some(texture) = texture.get::<R>(&render_id) {
            self.damage.extend(damage.iter().copied().map(|mut rect| {
                rect.loc += dst.loc;
                rect
            }));
            if let Some(sync) = sync {
                if let Err(err) = self.frame.as_mut().unwrap().wait(&sync) {
                    trace!(?err, "Failed to import sync point, blocking");
                    let _ = sync.wait(); // ignore interrupt errors
                }
            }
            self.frame
                .as_mut()
                .unwrap()
                .render_texture_from_to(&texture, src, dst, damage, opaque_regions, src_transform, alpha)
                .map_err(Error::Render)
        } else {
            warn!(
                "Failed to render texture {:?}, import for wrong devices {:?}? {:?}",
                Arc::as_ptr(&texture.0),
                self.node,
                texture.0.lock().unwrap(),
            );
            Ok(())
        }
    }

    fn transformation(&self) -> Transform {
        self.frame.as_ref().unwrap().transformation()
    }

    #[profiling::function]
    fn wait(&mut self, sync: &sync::SyncPoint) -> Result<(), Self::Error> {
        self.frame.as_mut().unwrap().wait(sync).map_err(Error::Render)
    }

    #[profiling::function]
    fn finish(mut self) -> Result<sync::SyncPoint, Self::Error> {
        self.finish_internal()
    }
}

#[cfg(feature = "wayland_frontend")]
impl<R: GraphicsApi, T: GraphicsApi> ImportMemWl for MultiRenderer<'_, '_, R, T>
where
    <R::Device as ApiDevice>::Renderer: ImportMemWl,
    // We need this because the Renderer-impl does and ImportMem requires Renderer
    R: 'static,
    R::Error: 'static,
    T::Error: 'static,
    <R::Device as ApiDevice>::Renderer: Bind<Dmabuf> + ExportMem + ImportDma + ImportMem,
    <T::Device as ApiDevice>::Renderer: ImportDma + ImportMem,
    <<R::Device as ApiDevice>::Renderer as RendererSuper>::TextureId: Clone + Send,
    <<R::Device as ApiDevice>::Renderer as RendererSuper>::Error: 'static,
    <<T::Device as ApiDevice>::Renderer as RendererSuper>::Error: 'static,
{
    #[instrument(level = "trace", parent = &self.span, skip(self))]
    #[profiling::function]
    fn import_shm_buffer(
        &mut self,
        buffer: &wl_buffer::WlBuffer,
        surface: Option<&crate::wayland::compositor::SurfaceData>,
        damage: &[Rectangle<i32, BufferCoords>],
    ) -> Result<<Self as RendererSuper>::TextureId, <Self as RendererSuper>::Error> {
        let shm_texture = self
            .render
            .renderer_mut()
            .import_shm_buffer(buffer, surface, damage)
            .map_err(Error::Render)?;
        let (dimensions, format) = shm::with_buffer_contents(buffer, |_, _, data| {
            Ok((
                (data.width, data.height).into(),
                shm::shm_format_to_fourcc(data.format)
                    .map(|code| Format {
                        code,
                        modifier: Modifier::Linear,
                    })
                    .ok_or(Error::ImportFailed)?,
            ))
        })
        .map_err(|_| Error::ImportFailed)??;
        let mut texture = MultiTexture::from_surface(surface, dimensions, format);
        texture.insert_texture::<R>(&self.render.renderer().context_id(), shm_texture);
        Ok(texture)
    }

    fn shm_formats(&self) -> Box<dyn Iterator<Item = wl_shm::Format>> {
        ImportMemWl::shm_formats(self.render.renderer())
    }
}

impl<R: GraphicsApi, T: GraphicsApi> ImportMem for MultiRenderer<'_, '_, R, T>
where
    <R::Device as ApiDevice>::Renderer: ImportMem,
    // We need this because the Renderer-impl does and ImportMem requires Renderer
    R: 'static,
    R::Error: 'static,
    T::Error: 'static,
    <R::Device as ApiDevice>::Renderer: Bind<Dmabuf> + ExportMem + ImportDma + ImportMem,
    <T::Device as ApiDevice>::Renderer: ImportDma + ImportMem,
    <<R::Device as ApiDevice>::Renderer as RendererSuper>::TextureId: Clone + Send,
    <<R::Device as ApiDevice>::Renderer as RendererSuper>::Error: 'static,
    <<T::Device as ApiDevice>::Renderer as RendererSuper>::Error: 'static,
{
    #[instrument(level = "trace", parent = &self.span, skip(self))]
    #[profiling::function]
    fn import_memory(
        &mut self,
        data: &[u8],
        format: Fourcc,
        size: Size<i32, BufferCoords>,
        flipped: bool,
    ) -> Result<<Self as RendererSuper>::TextureId, <Self as RendererSuper>::Error> {
        let mem_texture = self
            .render
            .renderer_mut()
            .import_memory(data, format, size, flipped)
            .map_err(Error::Render)?;
        let mut texture = MultiTexture::new(
            size,
            Format {
                code: format,
                modifier: Modifier::Linear,
            },
        );
        texture.insert_texture::<R>(&self.render.renderer().context_id(), mem_texture);
        Ok(texture)
    }

    #[instrument(level = "trace", parent = &self.span, skip(self))]
    #[profiling::function]
    fn update_memory(
        &mut self,
        texture: &<Self as RendererSuper>::TextureId,
        data: &[u8],
        region: Rectangle<i32, BufferCoords>,
    ) -> Result<(), <Self as RendererSuper>::Error> {
        let mem_texture = texture
            .get::<R>(&self.render.renderer().context_id())
            .ok_or_else(|| Error::MismatchedDevice(*self.render.node()))?;
        self.render
            .renderer_mut()
            .update_memory(&mem_texture, data, region)
            .map_err(Error::Render)
    }

    fn mem_formats(&self) -> Box<dyn Iterator<Item = Fourcc>> {
        ImportMem::mem_formats(self.render.renderer())
    }
}

#[cfg(feature = "wayland_frontend")]
impl<R: GraphicsApi, T: GraphicsApi> ImportDmaWl for MultiRenderer<'_, '_, R, T>
where
    <R::Device as ApiDevice>::Renderer: ImportDmaWl + ImportMem + ExportMem,
    <T::Device as ApiDevice>::Renderer: Bind<Dmabuf> + ExportMem,
    <<R::Device as ApiDevice>::Renderer as ExportMem>::TextureMapping: 'static,
    <<T::Device as ApiDevice>::Renderer as ExportMem>::TextureMapping: 'static,
    T: 'static,
    // We need this because the Renderer-impl does and ImportDma requires Renderer
    R: 'static,
    <R::Device as ApiDevice>::Renderer: Bind<Dmabuf> + ExportMem + ImportDma + ImportMem,
    <T::Device as ApiDevice>::Renderer: Bind<Dmabuf> + ImportDma + ImportMem,
    <<R::Device as ApiDevice>::Renderer as RendererSuper>::TextureId: Clone + Send,
    <<R::Device as ApiDevice>::Renderer as RendererSuper>::Error: 'static,
    <<T::Device as ApiDevice>::Renderer as RendererSuper>::Error: 'static,
{
    #[instrument(level = "trace", parent = &self.span, skip(self))]
    #[profiling::function]
    fn import_dma_buffer(
        &mut self,
        buffer: &wl_buffer::WlBuffer,
        surface: Option<&SurfaceData>,
        damage: &[Rectangle<i32, BufferCoords>],
    ) -> Result<<Self as RendererSuper>::TextureId, <Self as RendererSuper>::Error> {
        let dmabuf = get_dmabuf(buffer).expect("import_dma_buffer without checking buffer type?");
        let texture = MultiTexture::from_surface(surface, dmabuf.size(), dmabuf.format());
        let texture_ref = texture.0.clone();
        let res = self.import_dmabuf_internal(dmabuf, texture, Some(damage));
        if res.is_ok() {
            if let Some(surface) = surface {
                surface.data_map.insert_if_missing_threadsafe(|| texture_ref);
            }
        }
        res
    }
}

impl<R: GraphicsApi, T: GraphicsApi> ImportDma for MultiRenderer<'_, '_, R, T>
where
    <R::Device as ApiDevice>::Renderer: ImportDma + ImportMem,
    <T::Device as ApiDevice>::Renderer: Bind<Dmabuf> + ExportMem,
    <<R::Device as ApiDevice>::Renderer as ExportMem>::TextureMapping: 'static,
    <<T::Device as ApiDevice>::Renderer as ExportMem>::TextureMapping: 'static,
    T: 'static,
    // We need this because the Renderer-impl does and ImportDma requires Renderer
    R: 'static,
    <R::Device as ApiDevice>::Renderer: Bind<Dmabuf> + ExportMem + ImportDma + ImportMem,
    <T::Device as ApiDevice>::Renderer: ImportDma + ImportMem,
    <<R::Device as ApiDevice>::Renderer as RendererSuper>::TextureId: Clone + Send,
    <<R::Device as ApiDevice>::Renderer as RendererSuper>::Error: 'static,
    <<T::Device as ApiDevice>::Renderer as RendererSuper>::Error: 'static,
{
    fn dmabuf_formats(&self) -> FormatSet {
        ImportDma::dmabuf_formats(self.render.renderer())
    }

    fn has_dmabuf_format(&self, format: Format) -> bool {
        ImportDma::has_dmabuf_format(self.render.renderer(), format)
    }

    #[instrument(level = "trace", parent = &self.span, skip(self))]
    #[profiling::function]
    fn import_dmabuf(
        &mut self,
        dmabuf: &Dmabuf,
        damage: Option<&[Rectangle<i32, BufferCoords>]>,
    ) -> Result<<Self as RendererSuper>::TextureId, <Self as RendererSuper>::Error> {
        let texture = MultiTexture::new(dmabuf.size(), dmabuf.format());
        self.import_dmabuf_internal(dmabuf, texture, damage)
    }
}

fn import_on_src_node<'a, R, T>(
    dmabuf: &Dmabuf,
    damage: Option<&[Rectangle<i32, BufferCoords>]>,
    texture: &mut MultiTexture,
    render: &mut R::Device,
    mut target: Option<&mut T::Device>,
    mut others: impl Iterator<Item = &'a mut R::Device>,
) -> Result<DrmNode, Error<R, T>>
where
    R: GraphicsApi + 'static,
    <R as GraphicsApi>::Device: 'a,
    <<R as GraphicsApi>::Device as ApiDevice>::Renderer: Renderer + ImportDma,
    <<<R as GraphicsApi>::Device as ApiDevice>::Renderer as RendererSuper>::TextureId: 'static,
    T: GraphicsApi + 'static,
    <<T as GraphicsApi>::Device as ApiDevice>::Renderer: Renderer + ImportDma,
    <<<T as GraphicsApi>::Device as ApiDevice>::Renderer as RendererSuper>::TextureId: 'static,
{
    match dmabuf.node() {
        Some(node) => {
            if node == *render.node() {
                let renderer = render.renderer_mut();
                let imported = renderer.import_dmabuf(dmabuf, damage).map_err(Error::Render)?;
                texture.insert_texture::<R>(&renderer.context_id(), imported);
            } else if target.as_ref().is_some_and(|target| node == *target.node()) {
                let renderer = target.unwrap().renderer_mut();
                let imported = renderer.import_dmabuf(dmabuf, damage).map_err(Error::Target)?;
                texture.insert_texture::<T>(&renderer.context_id(), imported);
            } else if let Some(other) = others.find(|other| node == *other.node()) {
                let renderer = other.renderer_mut();
                let imported = renderer.import_dmabuf(dmabuf, damage).map_err(Error::Render)?;
                texture.insert_texture::<R>(&renderer.context_id(), imported);
            } else {
                return Err(Error::DeviceMissing);
            };

            Ok(node)
        }
        None => {
            // try them all
            let node = if let Some(imported) = render
                .can_do_cross_device_imports()
                .then(|| {
                    render
                        .renderer_mut()
                        .import_dmabuf(dmabuf, damage)
                        .inspect_err(|err| {
                            debug!(?err, "failed to import dmabuf on render node {0}", render.node())
                        })
                        .ok()
                })
                .flatten()
            {
                texture.insert_texture::<R>(&render.renderer().context_id(), imported);
                *render.node()
            } else if let Some(imported) = target.as_mut().and_then(|target| {
                target
                    .can_do_cross_device_imports()
                    .then(|| {
                        target
                            .renderer_mut()
                            .import_dmabuf(dmabuf, damage)
                            .inspect_err(|err| {
                                debug!(?err, "failed to import dmabuf on target node {0}", target.node())
                            })
                            .ok()
                    })
                    .flatten()
            }) {
                let target = target.as_ref().unwrap();
                texture.insert_texture::<T>(&target.renderer().context_id(), imported);
                *target.node()
            } else if let Some((other, imported)) = others.find_map(|other| {
                other
                    .can_do_cross_device_imports()
                    .then(|| {
                        other
                            .renderer_mut()
                            .import_dmabuf(dmabuf, damage)
                            .inspect_err(|err| {
                                debug!(?err, "failed to import dmabuf on other node {0}", other.node())
                            })
                            .ok()
                            .map(|imported| (other, imported))
                    })
                    .flatten()
            }) {
                texture.insert_texture::<R>(&other.renderer().context_id(), imported);
                *other.node()
            } else {
                return Err(Error::DeviceMissing);
            };
            dmabuf.set_node(node);

            Ok(node)
        }
    }
}

fn dma_shadow_copy<S, T>(
    src_texture: &<<S::Device as ApiDevice>::Renderer as RendererSuper>::TextureId,
    damage: Option<&[Rectangle<i32, BufferCoords>]>,
    slot: &mut Option<(Dmabuf, Box<dyn Any + 'static>, Option<SyncPoint>)>,
    src: &mut S::Device,
    mut target: Option<&mut T::Device>,
) -> Result<(), Error<S, T>>
where
    S: GraphicsApi,
    T: GraphicsApi,
    <S::Device as ApiDevice>::Renderer: Renderer + ImportDma + Bind<Dmabuf>,
    <T::Device as ApiDevice>::Renderer: Renderer + ImportDma,
    <<S::Device as ApiDevice>::Renderer as RendererSuper>::TextureId: 'static,
    <<T::Device as ApiDevice>::Renderer as RendererSuper>::TextureId: 'static,
{
    if target
        .as_ref()
        .is_some_and(|target| !target.can_do_cross_device_imports())
    {
        return Err(Error::ImportFailed);
    }

    let format = src_texture.format().unwrap_or(Fourcc::Abgr8888);
    let read_formats = if let Some(target) = target.as_ref() {
        ImportDma::dmabuf_formats(target.renderer())
    } else {
        ImportDma::dmabuf_formats(src.renderer())
    }
    .iter()
    .filter(|f| f.code == format)
    .copied()
    .collect::<FormatSet>();
    let write_formats = Bind::<Dmabuf>::supported_formats(src.renderer()).unwrap_or_default();
    let modifiers = read_formats
        .intersection(&write_formats)
        .map(|f| f.modifier)
        .filter(|m| *m != Modifier::Invalid)
        .collect::<Vec<_>>();

    if modifiers.is_empty() {
        return Err(Error::ImportFailed);
    }

    let ((shadow_buffer, _, existing_sync_point), is_new_buffer) = if slot
        .as_ref()
        .is_some_and(|(buffer, _, _)| buffer.format().code == format)
    {
        (slot.as_mut().unwrap(), false)
    } else {
        let shadow_buffer = src
            .allocator()
            .create_buffer(src_texture.width(), src_texture.height(), format, &modifiers)
            .map_err(Error::AllocatorError)?;

        let target_texture = if let Some(target) = target.as_mut() {
            Box::<<<T::Device as ApiDevice>::Renderer as RendererSuper>::TextureId>::new(
                target
                    .renderer_mut()
                    .import_dmabuf(&shadow_buffer, None)
                    .map_err(Error::Target)?,
            ) as Box<dyn Any + 'static>
        } else {
            Box::<<<S::Device as ApiDevice>::Renderer as RendererSuper>::TextureId>::new(
                src.renderer_mut()
                    .import_dmabuf(&shadow_buffer, None)
                    .map_err(Error::Render)?,
            ) as Box<dyn Any + 'static>
        };
        (slot.get_or_insert((shadow_buffer, target_texture, None)), true)
    };

    let src_renderer = src.renderer_mut();
    if let Some(sync) = existing_sync_point.take() {
        if let Err(err) = src_renderer.wait(&sync) {
            debug!(?err, "Unable to wait for existing sync_point, blocking..");
            let _ = sync.wait(); // ignore interrupt errors
        }
    }
    let mut framebuffer = src_renderer.bind(shadow_buffer).map_err(Error::Render)?;
    let shadow_size = Size::from((src_texture.width() as i32, src_texture.height() as i32));
    let mut frame = src_renderer
        .render(&mut framebuffer, shadow_size, Transform::Normal)
        .map_err(Error::Render)?;

    let damage_slice = [Rectangle::from_size(shadow_size)];
    let damage = unsafe {
        std::mem::transmute::<Option<&[Rectangle<i32, BufferCoords>]>, Option<&[Rectangle<i32, Physical>]>>(
            damage,
        )
    } // TODO: use bytemuck for stuff like this?
    .filter(|_| !is_new_buffer)
    .unwrap_or(&damage_slice);

    frame
        .clear(Color32F::TRANSPARENT, damage)
        .map_err(Error::Render)?;
    frame
        .render_texture_from_to(
            src_texture,
            Rectangle::from_size(src_texture.size()).to_f64(),
            Rectangle::from_size(shadow_size),
            damage,
            &[],
            Transform::Normal,
            1.0,
        )
        .map_err(Error::Render)?;
    *existing_sync_point = Some(frame.finish().map_err(Error::Render)?);

    // shadow buffer contains our copy and is readable by target and the original buffer was never migrated
    Ok(())
}

type BoxedTextureMappingAndDamage<S> = (
    Box<<<<S as GraphicsApi>::Device as ApiDevice>::Renderer as ExportMem>::TextureMapping>,
    Rectangle<i32, BufferCoords>,
);

type MemTexture<S, T> = (
    Option<Vec<BoxedTextureMappingAndDamage<S>>>,
    Option<Box<<<<T as GraphicsApi>::Device as ApiDevice>::Renderer as RendererSuper>::TextureId>>,
);

fn mem_copy<S, T>(
    src_texture: &<<S::Device as ApiDevice>::Renderer as RendererSuper>::TextureId,
    damage: Option<&[Rectangle<i32, BufferCoords>]>,
    slot: &mut Option<MemTexture<S, T>>,
    src: &mut S::Device,
    target: &mut T::Device,
) -> Result<(), Error<S, T>>
where
    S: GraphicsApi,
    T: GraphicsApi,
    <S::Device as ApiDevice>::Renderer: Renderer + ExportMem + Bind<Dmabuf>,
    <T::Device as ApiDevice>::Renderer: Renderer + ImportMem,
{
    let format = src_texture
        .format()
        // Check the target device supports the format, if not use 8-bit format
        .filter(|format| target.renderer().mem_formats().any(|fmt| fmt == *format))
        // The GL spec *always* supports this format.
        // TODO: Re-evaluate this, once we support vulkan
        .unwrap_or(Fourcc::Abgr8888);

    let texture_rect = Rectangle::from_size((src_texture.width() as i32, src_texture.height() as i32).into());
    let damage = damage.map(|damage| {
        damage
            .iter()
            .flat_map(|rect| rect.intersection(texture_rect))
            .fold(Vec::<Rectangle<i32, BufferCoords>>::new(), |damage, mut rect| {
                // replace with drain_filter, when that becomes stable to reuse the original Vec's memory
                let (overlapping, mut new_damage): (Vec<_>, Vec<_>) = damage
                    .into_iter()
                    .partition(|other| other.overlaps_or_touches(rect));

                for overlap in overlapping {
                    rect = rect.merge(overlap);
                }
                new_damage.push(rect);
                new_damage
            })
    });

    if slot.is_some() {
        let (mapping, texture) = slot.as_mut().unwrap();
        let mappings = match mapping.take() {
            Some(mut mappings) => {
                mappings.retain(|(mapping, _)| TextureMapping::format(&**mapping) == format);

                let damage_slice = [texture_rect];
                let new_damage = damage
                    .as_deref()
                    .unwrap_or(&damage_slice)
                    .iter()
                    .filter(|rect| !mappings.iter().any(|(_, region)| region.contains_rect(**rect)))
                    .copied()
                    .collect::<Vec<_>>();

                if texture.is_none()
                    && (mappings.len() != 1
                        || <dyn TextureMapping>::size(&*mappings[0].0) != texture_rect.size
                        || !new_damage.is_empty())
                {
                    let mapping = src
                        .renderer_mut()
                        .copy_texture(src_texture, texture_rect, format)
                        .map_err(Error::Render)?;
                    trace!("Creating mapping for: {:?}", damage);
                    mappings = vec![(Box::new(mapping), texture_rect)];
                } else {
                    mappings.extend(
                        new_damage
                            .into_iter()
                            .map(|damage| {
                                let mapping = src
                                    .renderer_mut()
                                    .copy_texture(src_texture, damage, format)
                                    .map_err(Error::Render)?;
                                trace!("Creating mapping for: {:?}", damage);
                                Ok((Box::new(mapping), damage))
                            })
                            .collect::<Result<Vec<_>, Error<S, T>>>()?,
                    );
                }

                mappings
            }
            None => {
                let mapping = src
                    .renderer_mut()
                    .copy_texture(src_texture, texture_rect, format)
                    .map_err(Error::Render)?;
                trace!("Creating mapping for: {:?}", damage);
                vec![(Box::new(mapping), texture_rect)]
            }
        };

        for (mapping, damage) in mappings {
            let data = src.renderer_mut().map_texture(&mapping).map_err(Error::Render)?;
            if let Some(texture) = texture.as_mut() {
                trace!(
                    "Updating texture {:?} with mapping at {:?}",
                    texture.size(),
                    damage,
                );
                target
                    .renderer_mut()
                    .update_memory(texture, data, damage)
                    .map_err(Error::Target)?;
            } else {
                trace!("Importing mapping as full buffer {:?}", mapping.size());
                let target_texture = target
                    .renderer_mut()
                    .import_memory(data, format, texture_rect.size, false)
                    .map_err(Error::Target)?;
                *texture = Some(Box::new(target_texture));
            }
        }
    } else {
        let mapping = src
            .renderer_mut()
            .copy_texture(src_texture, texture_rect, format)
            .map_err(Error::Render)?;
        trace!("Importing mapping as full buffer {:?}", mapping.size());
        let data = src.renderer_mut().map_texture(&mapping).map_err(Error::Render)?;

        let target_texture = target
            .renderer_mut()
            .import_memory(data, format, texture_rect.size, false)
            .map_err(Error::Target)?;
        *slot = Some((None, Some(Box::new(target_texture))));
    };

    Ok(())
}

fn texture_copy<S, T>(
    src: &mut S::Device,
    target: &mut T::Device,
    src_texture: &<<S::Device as ApiDevice>::Renderer as RendererSuper>::TextureId,
    target_texture: &mut Option<GpuSingleTexture>,
    damage: Option<&[Rectangle<i32, BufferCoords>]>,
) -> Result<(), Error<S, T>>
where
    S: GraphicsApi,
    T: GraphicsApi,
    <S::Device as ApiDevice>::Renderer: Renderer + ImportDma + ExportMem + Bind<Dmabuf>,
    <<S::Device as ApiDevice>::Renderer as ExportMem>::TextureMapping: 'static,
    <T::Device as ApiDevice>::Renderer: Renderer + ImportDma + ImportMem,
    <<S::Device as ApiDevice>::Renderer as RendererSuper>::TextureId: 'static,
    <<T::Device as ApiDevice>::Renderer as RendererSuper>::TextureId: 'static,
{
    // we need to reset in a few cases
    if matches!(&target_texture, Some(GpuSingleTexture::Direct(_)))
        || matches!(
            &target_texture,
            Some(GpuSingleTexture::Mem { mappings: Some((node, _)), .. }) if node != src.node()
        )
    {
        *target_texture = None;
    }

    match target_texture.take() {
        Some(GpuSingleTexture::Direct(_)) => unreachable!(),
        Some(GpuSingleTexture::Mem {
            mut external_shadow,
            texture,
            mappings,
        }) => {
            if let Some((dmabuf, texture)) = external_shadow.take() {
                let mut slot = Some((dmabuf, texture, None));
                dma_shadow_copy::<S, S>(src_texture, damage, &mut slot, src, None)
                    .map_err(Error::generalize::<T>)?;
                external_shadow = slot.map(|(dmabuf, texture, sync_point)| {
                    if let Some(sync) = sync_point {
                        // ignore interrupt errors
                        src.renderer_mut().wait(&sync).unwrap_or_else(|_| {
                            let _ = sync.wait();
                        });
                    }
                    (dmabuf, texture)
                });
            }

            let mut slot = Some((
                mappings.map(|(_, mappings)| mappings.into_iter().map(|(damage, mapping)|
                    (mapping.downcast::<<<S::Device as ApiDevice>::Renderer as ExportMem>::TextureMapping>().unwrap(), damage)
                ).collect::<Vec<_>>()),
                texture.map(|texture| texture.downcast::<<<T::Device as ApiDevice>::Renderer as RendererSuper>::TextureId>().unwrap())
            ));

            let src_texture = external_shadow
                .as_ref()
                .map(|(_, texture)| {
                    texture
                        .downcast_ref::<<<S::Device as ApiDevice>::Renderer as RendererSuper>::TextureId>()
                        .unwrap()
                })
                .unwrap_or(src_texture);
            let res = mem_copy::<S, T>(src_texture, damage, &mut slot, src, target);

            *target_texture = slot.map(|(mappings, texture)| GpuSingleTexture::Mem {
                external_shadow,
                texture: texture.map(|texture| texture as Box<dyn Any + 'static>),
                mappings: mappings.map(|mappings| {
                    (
                        *src.node(),
                        mappings
                            .into_iter()
                            .map(|(mapping, damage)| (damage, mapping as Box<dyn Any + 'static>))
                            .collect(),
                    )
                }),
            });
            res
        }
        Some(GpuSingleTexture::Dma {
            texture,
            dmabuf,
            sync,
        }) => {
            let mut slot = Some((dmabuf, texture, sync));
            let res = dma_shadow_copy::<S, T>(src_texture, damage, &mut slot, src, Some(target));
            *target_texture = slot.map(|(dmabuf, texture, sync)| GpuSingleTexture::Dma {
                texture,
                dmabuf,
                sync,
            });
            res
        }
        None => {
            let mut slot = None;
            match dma_shadow_copy::<S, T>(src_texture, damage, &mut slot, src, Some(target)) {
                Ok(()) => {
                    *target_texture = slot.map(|(dmabuf, texture, sync)| GpuSingleTexture::Dma {
                        texture: texture as Box<dyn Any + 'static>,
                        dmabuf,
                        sync,
                    });
                    Ok(())
                }
                Err(err) => {
                    trace!(?err, "Dma shadow copy failed, falling back to cpu");

                    let mut external_shadow = None;
                    if !ExportMem::can_read_texture(src.renderer_mut(), src_texture).map_err(Error::Render)? {
                        let mut slot = None;
                        dma_shadow_copy::<S, S>(src_texture, damage, &mut slot, src, None)
                            .map_err(Error::generalize::<T>)?;
                        external_shadow = slot.map(|(dmabuf, texture, sync_point)| {
                            if let Some(sync) = sync_point {
                                // ignore interrupt errors
                                src.renderer_mut().wait(&sync).unwrap_or_else(|_| {
                                    let _ = sync.wait();
                                });
                            }
                            (dmabuf, texture as Box<dyn Any + 'static>)
                        });
                    };

                    let mut slot = None;
                    let src_texture = external_shadow
                        .as_ref()
                        .map(|(_, texture)| {
                            texture
                                .downcast_ref::<<<S::Device as ApiDevice>::Renderer as RendererSuper>::TextureId>()
                                .unwrap()
                        })
                        .unwrap_or(src_texture);

                    let res = mem_copy::<S, T>(src_texture, damage, &mut slot, src, target);
                    *target_texture = slot.map(|(mappings, texture)| GpuSingleTexture::Mem {
                        texture: texture.map(|texture| texture as Box<dyn Any + 'static>),
                        mappings: mappings.map(|mappings| {
                            (
                                *src.node(),
                                mappings
                                    .into_iter()
                                    .map(|(mapping, damage)| (damage, mapping as Box<dyn Any + 'static>))
                                    .collect(),
                            )
                        }),
                        external_shadow,
                    });
                    res
                }
            }
        }
    }
}

impl<R: GraphicsApi, T: GraphicsApi> MultiRenderer<'_, '_, R, T>
where
    <R::Device as ApiDevice>::Renderer: ImportDma + ImportMem,
    <T::Device as ApiDevice>::Renderer: Bind<Dmabuf> + ExportMem,
    <<R::Device as ApiDevice>::Renderer as ExportMem>::TextureMapping: 'static,
    <<T::Device as ApiDevice>::Renderer as ExportMem>::TextureMapping: 'static,
    T: 'static,
    // We need this because the Renderer-impl does and ImportDma requires Renderer
    R: 'static,
    <R::Device as ApiDevice>::Renderer: Bind<Dmabuf> + ExportMem + ImportDma + ImportMem,
    <T::Device as ApiDevice>::Renderer: ImportDma + ImportMem,
    <<R::Device as ApiDevice>::Renderer as RendererSuper>::TextureId: Clone + Send,
    <<R::Device as ApiDevice>::Renderer as RendererSuper>::Error: 'static,
    <<T::Device as ApiDevice>::Renderer as RendererSuper>::Error: 'static,
{
    #[profiling::function]
    fn import_dmabuf_internal(
        &mut self,
        dmabuf: &Dmabuf,
        mut texture: MultiTexture,
        damage: Option<&[Rectangle<i32, BufferCoords>]>,
    ) -> Result<<Self as RendererSuper>::TextureId, <Self as RendererSuper>::Error> {
        let src_node = import_on_src_node::<R, T>(
            dmabuf,
            damage,
            &mut texture,
            self.render,
            self.target.as_mut().map(|target| &mut *target.device),
            self.other_renderers.iter_mut().map(|d| &mut **d),
        )?;

        if src_node == *self.render.node() {
            // when we are on the same node, we are done
            Ok(texture)
        } else {
            // else we need to copy
            let mut texture_internal = texture.0.lock().unwrap();
            let target_id = self.render.renderer().context_id().erased();
            let mut target_texture = texture_internal.textures.remove(&target_id);

            let res = if let Some(target) = self
                .target
                .as_mut()
                .filter(|target| src_node == *target.device.node())
            {
                let src_id = target.device.renderer().context_id().erased();
                let src_texture = match texture_internal.textures.get(&src_id).unwrap() {
                    GpuSingleTexture::Direct(tex) => tex
                        .downcast_ref::<<<T::Device as ApiDevice>::Renderer as RendererSuper>::TextureId>()
                        .unwrap(),
                    _ => unreachable!(),
                };

                texture_copy::<T, R>(
                    target.device,
                    self.render,
                    src_texture,
                    &mut target_texture,
                    damage,
                )
                .map_err(Error::transpose)
            } else if let Some(other) = self
                .other_renderers
                .iter_mut()
                .find(|other| src_node == *other.node())
            {
                let src_id = other.renderer().context_id().erased();
                let src_texture = match texture_internal.textures.get(&src_id).unwrap() {
                    GpuSingleTexture::Direct(tex) => tex
                        .downcast_ref::<<<R::Device as ApiDevice>::Renderer as RendererSuper>::TextureId>()
                        .unwrap(),
                    _ => unreachable!(),
                };

                texture_copy::<R, R>(other, self.render, src_texture, &mut target_texture, damage)
                    .map_err(Error::generalize::<T>)
            } else {
                Err(Error::DeviceMissing)
            };

            if let Some(target_texture) = target_texture.filter(|_| res.is_ok()) {
                texture_internal.textures.insert(target_id, target_texture);
            }

            std::mem::drop(texture_internal);
            res.map(|_| texture)
        }
    }
}

/// [`TextureMapping`]s produced by [`ExportMem`]-implementations of
/// [`MultiRenderer`]s.
pub struct MultiTextureMapping<A: GraphicsApi, B: GraphicsApi>(TextureMappingInternal<A, B>)
where
    <A::Device as ApiDevice>::Renderer: ExportMem,
    <B::Device as ApiDevice>::Renderer: ExportMem;
enum TextureMappingInternal<A: GraphicsApi, B: GraphicsApi>
where
    <A::Device as ApiDevice>::Renderer: ExportMem,
    <B::Device as ApiDevice>::Renderer: ExportMem,
{
    Either(<<A::Device as ApiDevice>::Renderer as ExportMem>::TextureMapping),
    Or(<<B::Device as ApiDevice>::Renderer as ExportMem>::TextureMapping),
}
impl<A: GraphicsApi, B: GraphicsApi> fmt::Debug for MultiTextureMapping<A, B>
where
    <A::Device as ApiDevice>::Renderer: ExportMem,
    <B::Device as ApiDevice>::Renderer: ExportMem,
    <<A::Device as ApiDevice>::Renderer as ExportMem>::TextureMapping: fmt::Debug,
    <<B::Device as ApiDevice>::Renderer as ExportMem>::TextureMapping: fmt::Debug,
{
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match &self.0 {
            TextureMappingInternal::Either(ref mapping) => mapping.fmt(f),
            TextureMappingInternal::Or(ref mapping) => mapping.fmt(f),
        }
    }
}

impl<A: GraphicsApi, B: GraphicsApi> Texture for MultiTextureMapping<A, B>
where
    <A::Device as ApiDevice>::Renderer: ExportMem,
    <B::Device as ApiDevice>::Renderer: ExportMem,
{
    fn size(&self) -> Size<i32, BufferCoords> {
        match self {
            MultiTextureMapping::<A, B>(TextureMappingInternal::Either(x)) => x.size(),
            MultiTextureMapping::<A, B>(TextureMappingInternal::Or(x)) => x.size(),
        }
    }

    fn width(&self) -> u32 {
        match self {
            MultiTextureMapping::<A, B>(TextureMappingInternal::Either(x)) => x.width(),
            MultiTextureMapping::<A, B>(TextureMappingInternal::Or(x)) => x.width(),
        }
    }
    fn height(&self) -> u32 {
        match self {
            MultiTextureMapping::<A, B>(TextureMappingInternal::Either(x)) => x.height(),
            MultiTextureMapping::<A, B>(TextureMappingInternal::Or(x)) => x.height(),
        }
    }
    fn format(&self) -> Option<Fourcc> {
        match self {
            MultiTextureMapping::<A, B>(TextureMappingInternal::Either(x)) => Texture::format(x),
            MultiTextureMapping::<A, B>(TextureMappingInternal::Or(x)) => Texture::format(x),
        }
    }
}
impl<A: GraphicsApi, B: GraphicsApi> TextureMapping for MultiTextureMapping<A, B>
where
    <A::Device as ApiDevice>::Renderer: ExportMem,
    <B::Device as ApiDevice>::Renderer: ExportMem,
{
    fn flipped(&self) -> bool {
        match self {
            MultiTextureMapping::<A, B>(TextureMappingInternal::Either(x)) => x.flipped(),
            MultiTextureMapping::<A, B>(TextureMappingInternal::Or(x)) => x.flipped(),
        }
    }

    fn format(&self) -> Fourcc {
        match self {
            MultiTextureMapping::<A, B>(TextureMappingInternal::Either(x)) => TextureMapping::format(x),
            MultiTextureMapping::<A, B>(TextureMappingInternal::Or(x)) => TextureMapping::format(x),
        }
    }
}

impl<R: GraphicsApi, T: GraphicsApi> ExportMem for MultiRenderer<'_, '_, R, T>
where
    <T::Device as ApiDevice>::Renderer: ExportMem,
    <R::Device as ApiDevice>::Renderer: ExportMem,
    // We need this because the Renderer-impl does and ExportMem requires Renderer
    R: 'static,
    R::Error: 'static,
    T::Error: 'static,
    <R::Device as ApiDevice>::Renderer: Bind<Dmabuf> + ExportMem + ImportDma + ImportMem,
    <T::Device as ApiDevice>::Renderer: ImportDma + ImportMem,
    <<R::Device as ApiDevice>::Renderer as RendererSuper>::TextureId: Clone + Send,
    <<R::Device as ApiDevice>::Renderer as RendererSuper>::Error: 'static,
    <<T::Device as ApiDevice>::Renderer as RendererSuper>::Error: 'static,
{
    type TextureMapping = MultiTextureMapping<T, R>;

    #[instrument(level = "trace", parent = &self.span, skip(self, framebuffer))]
    #[profiling::function]
    fn copy_framebuffer(
        &mut self,
        framebuffer: &MultiFramebuffer<'_, R, T>,
        region: Rectangle<i32, BufferCoords>,
        format: Fourcc,
    ) -> Result<Self::TextureMapping, <Self as RendererSuper>::Error> {
        match &framebuffer.0 {
            MultiFramebufferInternal::Target(fb) => {
                let target = self.target.as_mut().unwrap();
                target
                    .device
                    .renderer_mut()
                    .copy_framebuffer(fb, region, format)
                    .map(|mapping| MultiTextureMapping(TextureMappingInternal::Either(mapping)))
                    .map_err(Error::Target)
            }
            MultiFramebufferInternal::Render(fb) => self
                .render
                .renderer_mut()
                .copy_framebuffer(fb, region, format)
                .map(|mapping| MultiTextureMapping(TextureMappingInternal::Or(mapping)))
                .map_err(Error::Render),
        }
    }

    #[instrument(level = "trace", parent = &self.span, skip(self))]
    #[profiling::function]
    fn copy_texture(
        &mut self,
        texture: &Self::TextureId,
        region: Rectangle<i32, BufferCoords>,
        format: Fourcc,
    ) -> Result<Self::TextureMapping, Self::Error> {
        let tex = texture
            .get::<R>(&self.render.renderer().context_id())
            .ok_or_else(|| Error::MismatchedDevice(*self.render.node()))?;
        self.render
            .renderer_mut()
            .copy_texture(&tex, region, format)
            .map(|mapping| MultiTextureMapping(TextureMappingInternal::Or(mapping)))
            .map_err(Error::Render)
    }

    fn can_read_texture(&mut self, texture: &Self::TextureId) -> Result<bool, Self::Error> {
        let tex = texture
            .get::<R>(&self.render.renderer().context_id())
            .ok_or_else(|| Error::MismatchedDevice(*self.render.node()))?;
        self.render
            .renderer_mut()
            .can_read_texture(&tex)
            .map_err(Error::Render)
    }

    #[instrument(level = "trace", parent = &self.span, skip(self, texture_mapping))]
    #[profiling::function]
    fn map_texture<'c>(
        &mut self,
        texture_mapping: &'c Self::TextureMapping,
    ) -> Result<&'c [u8], <Self as RendererSuper>::Error> {
        match texture_mapping {
            MultiTextureMapping(TextureMappingInternal::Either(target_mapping)) => self
                .target
                .as_mut()
                .unwrap()
                .device
                .renderer_mut()
                .map_texture(target_mapping)
                .map_err(Error::Target),
            MultiTextureMapping(TextureMappingInternal::Or(render_mapping)) => self
                .render
                .renderer_mut()
                .map_texture(render_mapping)
                .map_err(Error::Render),
        }
    }
}

impl<'frame, 'buffer, R: GraphicsApi, T: GraphicsApi> BlitFrame<MultiFramebuffer<'buffer, R, T>>
    for MultiFrame<'_, '_, 'frame, 'buffer, R, T>
where
    <<R::Device as ApiDevice>::Renderer as RendererSuper>::Frame<'frame, 'buffer>:
        BlitFrame<<<R::Device as ApiDevice>::Renderer as RendererSuper>::Framebuffer<'buffer>>,
    <T::Device as ApiDevice>::Renderer: Blit,
    R: 'static,
    R::Error: 'static,
    T::Error: 'static,
    <R::Device as ApiDevice>::Renderer: Bind<Dmabuf> + ExportMem + ImportDma + ImportMem,
    <T::Device as ApiDevice>::Renderer: ImportDma + ImportMem,
    <<R::Device as ApiDevice>::Renderer as RendererSuper>::TextureId: Clone + Send,
    <<R::Device as ApiDevice>::Renderer as RendererSuper>::Error: 'static,
    <<T::Device as ApiDevice>::Renderer as RendererSuper>::Error: 'static,
{
    #[instrument(level = "trace", parent = &self.span, skip(self, to))]
    #[profiling::function]
    fn blit_to(
        &mut self,
        to: &mut MultiFramebuffer<'buffer, R, T>,
        src: Rectangle<i32, Physical>,
        dst: Rectangle<i32, Physical>,
        filter: TextureFilter,
    ) -> Result<(), Self::Error> {
        self.flush_frame()?;
        if let Some(target) = self.target.as_mut() {
            let MultiFramebufferInternal::Target(ref mut to_fb) = &mut to.0 else {
                unreachable!()
            };
            let sync = target
                .device
                .renderer_mut()
                .blit(target.framebuffer, to_fb, src, dst, filter)
                .map_err(Error::Target)?;
            target.device.renderer_mut().wait(&sync).map_err(Error::Target)?;
            Ok(())
        } else {
            let MultiFramebufferInternal::Render(ref mut to_fb) = &mut to.0 else {
                unreachable!()
            };
            self.frame
                .as_mut()
                .unwrap()
                .blit_to(to_fb, src, dst, filter)
                .map_err(Error::Render)
        }
    }

    #[instrument(level = "trace", parent = &self.span, skip(self, from))]
    #[profiling::function]
    fn blit_from(
        &mut self,
        from: &MultiFramebuffer<'buffer, R, T>,
        src: Rectangle<i32, Physical>,
        dst: Rectangle<i32, Physical>,
        filter: TextureFilter,
    ) -> Result<(), Self::Error> {
        self.flush_frame()?;
        if let Some(target) = self.target.as_mut() {
            let MultiFramebufferInternal::Target(ref from_fb) = &from.0 else {
                unreachable!()
            };
            let sync = target
                .device
                .renderer_mut()
                .blit(from_fb, target.framebuffer, src, dst, filter)
                .map_err(Error::Target)?;
            target.device.renderer_mut().wait(&sync).map_err(Error::Target)?;
            Ok(())
        } else {
            let MultiFramebufferInternal::Render(ref from_fb) = &from.0 else {
                unreachable!()
            };
            self.frame
                .as_mut()
                .unwrap()
                .blit_from(from_fb, src, dst, filter)
                .map_err(Error::Render)
        }
    }
}

impl<R: GraphicsApi, T: GraphicsApi> Blit for MultiRenderer<'_, '_, R, T>
where
    <R::Device as ApiDevice>::Renderer: Blit,
    <T::Device as ApiDevice>::Renderer: Blit,
    // We need this because the Renderer-impl does and Blit requires Renderer
    R: 'static,
    R::Error: 'static,
    T::Error: 'static,
    <R::Device as ApiDevice>::Renderer: Bind<Dmabuf> + ExportMem + ImportDma + ImportMem,
    <T::Device as ApiDevice>::Renderer: ImportDma + ImportMem,
    <<R::Device as ApiDevice>::Renderer as RendererSuper>::TextureId: Clone + Send,
    <<R::Device as ApiDevice>::Renderer as RendererSuper>::Error: 'static,
    <<T::Device as ApiDevice>::Renderer as RendererSuper>::Error: 'static,
{
    #[instrument(level = "trace", parent = &self.span, skip(self, from, to))]
    #[profiling::function]
    fn blit(
        &mut self,
        from: &MultiFramebuffer<'_, R, T>,
        to: &mut MultiFramebuffer<'_, R, T>,
        src: Rectangle<i32, Physical>,
        dst: Rectangle<i32, Physical>,
        filter: TextureFilter,
    ) -> Result<SyncPoint, <Self as RendererSuper>::Error> {
        if let Some(target) = self.target.as_mut() {
            let MultiFramebufferInternal::Target(ref from_fb) = &from.0 else {
                unreachable!()
            };
            let MultiFramebufferInternal::Target(ref mut to_fb) = &mut to.0 else {
                unreachable!()
            };
            target
                .device
                .renderer_mut()
                .blit(from_fb, to_fb, src, dst, filter)
                .map_err(Error::Target)
        } else {
            let MultiFramebufferInternal::Render(ref from_fb) = &from.0 else {
                unreachable!()
            };
            let MultiFramebufferInternal::Render(ref mut to_fb) = &mut to.0 else {
                unreachable!()
            };
            self.render
                .renderer_mut()
                .blit(from_fb, to_fb, src, dst, filter)
                .map_err(Error::Render)
        }
    }
}
