//! This crate provides an implementation of
//! [elastic tabstops](http://nickgravgaard.com/elastictabstops/index.html).
//! It is a minimal port of Go's
//! [tabwriter](http://golang.org/pkg/text/tabwriter/) package.
//! Namely, its main mode of operation is to wrap a `Writer` and implement
//! elastic tabstops for the text written to the wrapped `Writer`.
//!
//! This package is also bundled with a program, `tabwriter`,
//! that exposes this functionality at the command line.
//!
//! Here's an example that shows basic alignment:
//!
//! ```rust
//! use std::io::Write;
//! use qsv_tabwriter::TabWriter;
//!
//! let mut tw = TabWriter::new(vec![]);
//! write!(&mut tw, "
//! Bruce Springsteen\tBorn to Run
//! Bob Seger\tNight Moves
//! Metallica\tBlack
//! The Boss\tDarkness on the Edge of Town
//! ").unwrap();
//! tw.flush().unwrap();
//!
//! let written = String::from_utf8(tw.into_inner().unwrap()).unwrap();
//! assert_eq!(&*written, "
//! Bruce Springsteen  Born to Run
//! Bob Seger          Night Moves
//! Metallica          Black
//! The Boss           Darkness on the Edge of Town
//! ");
//! ```
//!
//! Note that `flush` **must** be called or else `TabWriter` may never write
//! anything. This is because elastic tabstops requires knowing about future
//! lines in order to align output. More precisely, all text considered in a
//! single alignment must fit into memory.
//!
//! Here's another example that demonstrates how *only* contiguous columns
//! are aligned:
//!
//! ```rust
//! use std::io::Write;
//! use qsv_tabwriter::TabWriter;
//!
//! let mut tw = TabWriter::new(vec![]).padding(1);
//! write!(&mut tw, "
//!fn foobar() {{
//!    let mut x = 1+1;\t// addition
//!    x += 1;\t// increment in place
//!    let y = x * x * x * x;\t// multiply!
//!
//!    y += 1;\t// this is another group
//!    y += 2 * 2;\t// that is separately aligned
//!}}
//!").unwrap();
//! tw.flush().unwrap();
//!
//! let written = String::from_utf8(tw.into_inner().unwrap()).unwrap();
//! assert_eq!(&*written, "
//!fn foobar() {
//!    let mut x = 1+1;       // addition
//!    x += 1;                // increment in place
//!    let y = x * x * x * x; // multiply!
//!
//!    y += 1;     // this is another group
//!    y += 2 * 2; // that is separately aligned
//!}
//!");
//! ```

#![deny(missing_docs)]

use std::cmp;
use std::error;
use std::fmt;
use std::io::{self, BufWriter, Write};
use std::mem;
use std::str;

#[cfg(test)]
mod test;

/// `TabWriter` wraps an arbitrary writer and aligns tabbed output.
///
/// Elastic tabstops work by aligning *contiguous* tabbed delimited fields
/// known as *column blocks*. When a line appears that breaks all contiguous
/// blocks, all buffered output will be flushed to the underlying writer.
/// Otherwise, output will stay buffered until `flush` is explicitly called.
#[derive(Debug)]
pub struct TabWriter<W: io::Write> {
    w: BufWriter<W>,
    buf: io::Cursor<Vec<u8>>,
    lines: Vec<Vec<Cell>>,
    curcell: Cell,
    minwidth: usize,
    padding: usize,
    alignment: Alignment,
    ansi: bool,
    tab_indent: bool,
}

/// `Alignment` represents how a `TabWriter` should align text within its cell.
#[derive(Debug, PartialEq, Eq)]
pub enum Alignment {
    /// Text should be aligned with the left edge of the cell
    Left,
    /// Text should be centered within the cell
    Center,
    /// Text should be aligned with the right edge of the cell
    Right,
    /// Like Left, but the last whitespace is a tab
    /// This produces a valid TSV file
    LeftEndTab,
    /// Like Left, but adds a comment line at the top that comma-delimited
    /// enumerates the starting position of each column (Fixed Width Format).
    /// Positions are 1-indexed.
    LeftFwf,
}

enum MainAlignment {
    Left,
    Right,
    Center,
}

#[derive(Debug)]
struct Cell {
    start: usize, // offset into TabWriter.buf
    width: usize, // in characters
    size: usize,  // in bytes
}

impl<W: io::Write> TabWriter<W> {
    /// Create a new `TabWriter` from an existing `Writer`.
    ///
    /// All output written to `Writer` is passed through `TabWriter`.
    /// Contiguous column blocks indicated by tabs are aligned.
    ///
    /// Note that `flush` must be called to guarantee that `TabWriter` will
    /// write to the given writer.
    pub fn new(w: W) -> Self {
        Self {
            w: BufWriter::with_capacity(65536, w),
            buf: io::Cursor::new(Vec::with_capacity(1024)),
            lines: vec![vec![]],
            curcell: Cell::new(0),
            minwidth: 2,
            padding: 2,
            alignment: Alignment::Left,
            ansi: cfg!(feature = "ansi_formatting"),
            tab_indent: false,
        }
    }

    /// Set the minimum width of each column. That is, all columns will have
    /// *at least* the size given here. If a column is smaller than `minwidth`,
    /// then it is padded with spaces.
    ///
    /// The default minimum width is `2`.
    #[must_use]
    pub const fn minwidth(mut self, minwidth: usize) -> Self {
        self.minwidth = minwidth;
        self
    }

    /// Set the padding between columns. All columns will be separated by
    /// *at least* the number of spaces indicated by `padding`. If `padding`
    /// is zero, then columns may run up against each other without any
    /// separation.
    ///
    /// The default padding is `2`.
    #[must_use]
    pub const fn padding(mut self, padding: usize) -> Self {
        self.padding = padding;
        self
    }

    /// Set the alignment of text within cells. This will effect future flushes.
    ///
    /// The default alignment is `Alignment::Left`.
    #[must_use]
    pub const fn alignment(mut self, alignment: Alignment) -> Self {
        self.alignment = alignment;
        self
    }

    /// Ignore ANSI escape codes when computing the number of display columns.
    ///
    /// This is disabled by default. (But is enabled by default when the
    /// deprecated `ansi_formatting` crate feature is enabled.)
    #[must_use]
    pub const fn ansi(mut self, yes: bool) -> Self {
        self.ansi = yes;
        self
    }

    /// Always use tabs for indentation columns (i.e., padding of
    /// leading empty cells on the left).
    ///
    /// This is disabled by default.
    #[must_use]
    pub const fn tab_indent(mut self, yes: bool) -> Self {
        self.tab_indent = yes;
        self
    }

    /// Unwraps this `TabWriter`, returning the underlying writer.
    ///
    /// This internal buffer is flushed before returning the writer. If the
    /// flush fails, then an error is returned.
    ///
    /// # Errors
    ///
    /// This function will return an error if flushing the internal buffer fails.
    /// The error is wrapped in an `IntoInnerError` along with the original `TabWriter`.
    ///
    /// # Panics
    ///
    /// This method will panic if `BufWriter::into_inner()` fails after a successful
    /// flush, which would indicate a serious system-level problem.
    #[allow(clippy::result_large_err)]
    pub fn into_inner(mut self) -> Result<W, IntoInnerError<W>> {
        // First flush our internal buffer
        if let Err(err) = self.flush() {
            return Err(IntoInnerError(self, err));
        }

        // Now extract the BufWriter and try to get the inner writer
        // BufWriter::into_inner() can only fail if there was a previous write error,
        // which would have been caught by our flush() call above.
        self.w.into_inner().map_or_else(|_|
            // This panic should never happen since we flushed above, but if it does,
            // panic as it indicates a serious system-level problem.
            panic!("BufWriter::into_inner() failed unexpectedly after successful flush"),
            |inner_w| Ok(inner_w))
    }

    /// Resets the state of the aligner. Once the aligner is reset, all future
    /// writes will start producing a new alignment.
    fn reset(&mut self) {
        self.buf = io::Cursor::new(Vec::with_capacity(1024));
        self.lines = vec![vec![]];
        self.curcell = Cell::new(0);
    }

    /// Adds the bytes received into the buffer and updates the size of
    /// the current cell.
    fn add_bytes(&mut self, bytes: &[u8]) {
        self.curcell.size += bytes.len();
        let _ = self.buf.write_all(bytes); // cannot fail
    }

    /// Ends the current cell, updates the UTF8 width of the cell and starts
    /// a fresh cell.
    fn term_curcell(&mut self) {
        #[allow(clippy::cast_possible_truncation)]
        let mut curcell = Cell::new(self.buf.position() as usize);
        mem::swap(&mut self.curcell, &mut curcell);

        if self.ansi {
            curcell.update_width(self.buf.get_ref(), count_columns_ansi);
        } else {
            curcell.update_width(self.buf.get_ref(), count_columns_noansi);
        }
        self.curline_mut().push(curcell);
    }

    /// Return a view of the current line of cells.
    fn curline(&self) -> &[Cell] {
        let i = self.lines.len() - 1;
        &self.lines[i]
    }

    /// Return a mutable view of the current line of cells.
    fn curline_mut(&mut self) -> &mut Vec<Cell> {
        let i = self.lines.len() - 1;
        &mut self.lines[i]
    }
}

impl Cell {
    const fn new(start: usize) -> Self {
        Self { start, width: 0, size: 0 }
    }

    fn update_width(
        &mut self,
        buf: &[u8],
        count_columns: impl Fn(&[u8]) -> usize,
    ) {
        let end = self.start + self.size;
        self.width = count_columns(&buf[self.start..end]);
    }
}

impl<W: io::Write> io::Write for TabWriter<W> {
    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
        let mut lastterm = 0usize;
        for (i, &c) in buf.iter().enumerate() {
            match c {
                b'\t' | b'\n' => {
                    self.add_bytes(&buf[lastterm..i]);
                    self.term_curcell();
                    lastterm = i + 1;
                    if c == b'\n' {
                        let ncells = self.curline().len();
                        self.lines.push(vec![]);
                        // Having a single cell means that *all* previous
                        // columns have been broken, so we should just flush.
                        if ncells == 1 {
                            self.flush()?;
                        }
                    }
                }
                _ => {}
            }
        }
        self.add_bytes(&buf[lastterm..]);
        Ok(buf.len())
    }

    fn flush(&mut self) -> io::Result<()> {
        if self.curcell.size > 0 {
            self.term_curcell();
        }
        let widths = cell_widths(&self.lines, self.minwidth);

        // This is a trick to avoid allocating padding for every cell.
        // Just allocate the most we'll ever need and borrow from it.
        let biggest_width = widths
            .iter()
            .map(|ws| ws.iter().copied().max().unwrap_or(0))
            .max()
            .unwrap_or(0);
        let padding: String =
            std::iter::repeat_n(' ', biggest_width + self.padding).collect();

        // Generate comment line for Leftfwf alignment
        if self.alignment == Alignment::LeftFwf
            && !self.lines.is_empty()
            && !self.lines[0].is_empty()
        {
            let comment_line = generate_fwf_comment_line(
                &self.lines[0],
                &widths[0],
                self.padding,
            );
            self.w.write_all(comment_line.as_bytes())?;
        }

        let mut first = true;

        // we do this so that we have a more efficient match pattern
        // in the hot loop below
        let main_alignment = match self.alignment {
            Alignment::Left | Alignment::LeftEndTab | Alignment::LeftFwf => {
                MainAlignment::Left
            }
            Alignment::Right => MainAlignment::Right,
            Alignment::Center => MainAlignment::Center,
        };

        for (line, widths) in self.lines.iter().zip(widths.iter()) {
            if first {
                first = false;
            } else {
                self.w.write_all(b"\n")?;
            }

            let mut use_tabs = self.tab_indent;
            for (i, cell) in line.iter().enumerate() {
                let bytes =
                    &self.buf.get_ref()[cell.start..cell.start + cell.size];
                if i >= widths.len() {
                    // There is no width for the last column
                    assert_eq!(i, line.len() - 1);
                    self.w.write_all(bytes)?;
                } else {
                    if use_tabs && cell.size == 0 {
                        write!(&mut self.w, "\t")?;
                        continue;
                    }
                    use_tabs = false;

                    assert!(widths[i] >= cell.width);
                    let extra_space = widths[i] - cell.width;
                    let (left_spaces, mut right_spaces) = match main_alignment
                    {
                        MainAlignment::Left => (0, extra_space),
                        MainAlignment::Right => (extra_space, 0),
                        MainAlignment::Center => {
                            (extra_space / 2, extra_space - extra_space / 2)
                        }
                    };
                    right_spaces += self.padding;

                    write!(&mut self.w, "{}", &padding[0..left_spaces])?;
                    self.w.write_all(bytes)?;

                    // Handle LeftEndTab alignment
                    if self.alignment == Alignment::LeftEndTab {
                        // use spaces for padding except the last character is a tab
                        if right_spaces > 1 {
                            write!(
                                &mut self.w,
                                "{}",
                                &padding[0..right_spaces - 1]
                            )?;
                        }
                        if right_spaces > 0 {
                            write!(&mut self.w, "\t")?;
                        }
                    } else {
                        write!(&mut self.w, "{}", &padding[0..right_spaces])?;
                    }
                }
            }
        }

        self.reset();
        Ok(())
    }
}

/// An error returned by `into_inner`.
///
/// This combines the error that happened while flushing the buffer with the
/// `TabWriter` itself.
pub struct IntoInnerError<W: io::Write>(TabWriter<W>, io::Error);

impl<W: io::Write> IntoInnerError<W> {
    /// Returns the error which caused the `into_error()` call to fail.
    pub const fn error(&self) -> &io::Error {
        &self.1
    }

    /// Returns the `TabWriter` instance which generated the error.
    pub fn into_inner(self) -> TabWriter<W> {
        self.0
    }
}

impl<W: io::Write> fmt::Debug for IntoInnerError<W> {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        self.error().fmt(f)
    }
}

impl<W: io::Write> fmt::Display for IntoInnerError<W> {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        self.error().fmt(f)
    }
}

impl<W: io::Write + ::std::any::Any> error::Error for IntoInnerError<W> {
    fn cause(&self) -> Option<&dyn error::Error> {
        Some(self.error())
    }
}

/// Generate a comment line for the Fixed Width Format alignment.
///
/// The comment line is a comma-delimited list of the starting position of each
/// column. Positions are 1-indexed.
///
/// # Arguments
/// * `cells` - The cells on the first line of the table.
/// * `widths` - The widths of the columns.
/// * `padding` - The padding between columns.
///
/// # Returns
/// A string containing the comment line.
fn generate_fwf_comment_line(
    cells: &[Cell],
    widths: &[usize],
    padding: usize,
) -> String {
    let mut positions = Vec::new();
    let mut current_pos = 1; // Start with 1-indexed positions

    // Calculate positions for all columns
    for &width in widths {
        positions.push(current_pos.to_string());
        current_pos += width + padding;
    }

    // Add position for the last column if it exists
    if cells.len() > widths.len() {
        positions.push(current_pos.to_string());
    }

    format!("#{}\n", positions.join(","))
}

fn cell_widths(lines: &[Vec<Cell>], minwidth: usize) -> Vec<Vec<usize>> {
    // Naively, this algorithm looks like it could be O(n^2m) where `n` is
    // the number of lines and `m` is the number of contiguous columns.
    //
    // However, I claim that it is actually O(nm). That is, the width for
    // every contiguous column is computed exactly once.
    let mut ws: Vec<_> = (0..lines.len()).map(|_| vec![]).collect();
    let mut width;
    let mut contig_count;
    for (i, iline) in lines.iter().enumerate() {
        if iline.is_empty() {
            continue;
        }
        for col in ws[i].len()..(iline.len() - 1) {
            width = minwidth;
            contig_count = 0;
            for line in &lines[i..] {
                if col + 1 >= line.len() {
                    // ignores last column
                    break;
                }
                contig_count += 1;
                width = cmp::max(width, line[col].width);
            }
            for line_widths in ws.iter_mut().skip(i).take(contig_count) {
                line_widths.push(width);
            }
        }
    }
    ws
}

fn count_columns_noansi(bytes: &[u8]) -> usize {
    use unicode_width::UnicodeWidthChar;

    // If we have a Unicode string, then attempt to guess the number of
    // *display* columns used.
    //
    str::from_utf8(bytes).map_or(bytes.len(), |s| {
        s.chars()
            .map(|c| UnicodeWidthChar::width(c).unwrap_or(0))
            .sum::<usize>()
    })
}

fn count_columns_ansi(bytes: &[u8]) -> usize {
    use unicode_width::UnicodeWidthChar;

    // If we have a Unicode string, then attempt to guess the number of
    // *display* columns used.
    str::from_utf8(bytes).map_or(bytes.len(), |s| {
        strip_formatting(s)
            .chars()
            .map(|c| UnicodeWidthChar::width(c).unwrap_or(0))
            .sum::<usize>()
    })
}

fn strip_formatting(input: &str) -> std::borrow::Cow<'_, str> {
    let mut escapes = find_ansi_escapes(input).peekable();
    if escapes.peek().is_none() {
        return std::borrow::Cow::Borrowed(input);
    }
    let mut without_escapes = String::with_capacity(input.len());
    let mut last_end = 0;
    for mat in escapes {
        without_escapes.push_str(&input[last_end..mat.start]);
        last_end = mat.end;
    }
    without_escapes.push_str(&input[last_end..]);
    std::borrow::Cow::Owned(without_escapes)
}

fn find_ansi_escapes(
    input: &str,
) -> impl Iterator<Item = std::ops::Range<usize>> + '_ {
    const ESCAPE_PREFIX: &str = "\x1B[";
    let mut last_end = 0;
    std::iter::from_fn(move || {
        let start = last_end
            + input[last_end..].match_indices(ESCAPE_PREFIX).next()?.0;
        let after_prefix = start + ESCAPE_PREFIX.len();
        let end = after_prefix
            + input[after_prefix..].match_indices('m').next()?.0
            + 1;
        last_end = end;
        Some(start..end)
    })
}
