#!/usr/bin/perl

# Syntax highlight text using Vim
# simplified version to use it as a monolithic function within lesspipe
#
# This software is copyright (c) 2002-2006 by Geoff Richards.
#
# This software is copyright (c) 2011 by Randy Stauner.
#
# This software is copyright (c) 2021-2023 by Wolfgang Friebel.
#
# This is free software; you can redistribute it and/or modify it under
# the same terms as the Perl 5 programming language system itself.
#
use warnings;
use strict;
use 5.9.3;
use IO::File;
use File::Copy;
use File::Temp;
use Term::ANSIColor qw(color colorvalid);
use IPC::Open3;
use Getopt::Std;

our ($opt_h, $opt_l);
&getopts('hl:') || usage();
usage() if $opt_h;

my ($file) = shift;
my $filetype = $opt_l;

# do a clean up if we get a CTRL-C
our ($tdir,$script_fh, $markup_fh);
$SIG{INT} = sub { if ($tdir) {
	close $script_fh if $script_fh;
	close $markup_fh if $markup_fh;
	unlink $tdir; print "\n"; exit 1 }};

$tdir = File::Temp->newdir('/tmp/vimcolorXXXX');

if (! $file or $file eq '-') {
	$file = "$tdir/inputfile";
	File::Copy::syscopy(\*STDIN, $file);
}

my %ANSI_COLORS = (
	Comment		=> 'blue',
	Constant	=> 'red',
	Identifier	=> 'cyan',
	Statement	=> 'yellow',
	PreProc		=> 'magenta',
	Type		=> 'green',
	Special		=> 'bright_magenta',
	Underlined	=> 'underline',
	Ignore		=> 'bright_white',
	Error		=> 'on_red',
	Todo		=> 'on_cyan',
);

# allow the environment to overwrite:

my $report_colors;
my %COLORS = %ANSI_COLORS;
if ($ENV{TEXT_VIMCOLOR_ANSI}) {
	my @ADD = split /\s*[=;]\s*/, $ENV{TEXT_VIMCOLOR_ANSI};
	if (@ADD % 2) {
		warn "### TEXT_VIMCOLOR_ANSI has wrong content: $ENV{TEXT_VIMCOLOR_ANSI}\n";
	} else {
		while (@ADD) {
			my ($k, $v) = splice @ADD, -2;
			$COLORS{$k} = $v, next if $v ne '?';
			$report_colors=1;
			$COLORS{$k} ||= '';
		}
	}
}

# These extra syntax group are available but linked to the groups above by
# default in vim. They can get their own highlighting (all_syntax_groups => 1).
# Gets activated by setting a color for at least one of these members.
my %SYNTAX_LINKS;

$SYNTAX_LINKS{$_} = 'Constant' for qw(String Character Number Boolean Float);
$SYNTAX_LINKS{$_} = 'Identifier' for qw(Function);
$SYNTAX_LINKS{$_} = 'Statement' for qw(Conditional Repeat Label Operator Keyword Exception);
$SYNTAX_LINKS{$_} = 'PreProc' for qw(Include Define Macro PreCondit);
$SYNTAX_LINKS{$_} = 'Type' for qw(StorageClass Structure Typedef);
$SYNTAX_LINKS{$_} = 'Special' for qw(Tag SpecialChar Delimiter SpecialComment Debug);

my $all_groups = 0;
$all_groups = 1 if grep {defined $COLORS{$_}} keys %SYNTAX_LINKS;
# Copy ansi color for main group to all subgroups.
$COLORS{$_} ||= $COLORS{$SYNTAX_LINKS{$_}} for keys %SYNTAX_LINKS;

my %colors;
my $reset = color('reset');
for (keys (%COLORS)) {
	if (colorvalid($COLORS{$_})) {
		$colors{$_} = color($COLORS{$_});
	} else {
		warn "### invalid color name '$COLORS{$_}' (Term::ANSIColor) ###\n";
		$colors{$_} = $reset;
	}
}
# Build a lookup table to determine if a syntax exists.
my %SYNTAX_TYPE = map {$_, 1} keys %COLORS;

my $defaults = set_defaults();
my $syntax = do_markup($defaults, $file, $filetype);

sub set_defaults {
	return {
		vim_command		=> 'vim',
		vim_options		=> [qw( -RXZ -i NONE -u NONE -N -n ), "+set nomodeline"],
		all_syntax_groups	=> $all_groups,
		vim_let			=> {perl_include_pod => 1, 'b:is_bash' => 1},
	};
}

# Actually run Vim and turn the script's output into a datastructure.
sub do_markup {
	my ($defaults, $file, $filetype) = @_;

	(my $filename = $file) =~s/"/\\"/g;

	# Create a temp file to put the output in.
	my $out_file = File::Temp->new(TEMPLATE => 'vimcXXXX', DIR => '/tmp');
	# Create a temp file for the 'script', which is given to vim
	# with the -s option.
	my $script_file = "$tdir/scriptfile";
	open $script_fh, ">$script_file" or die "$!\n";
	my $markup_file = "$tdir/markupfile";
	open $markup_fh, ">$markup_file" or die "$!\n";
	my $filetype_set = defined $filetype ? ":set filetype=$filetype" : '';
	my $vim_let = $defaults->{vim_let};

	# on linux '-s' is fast and '--cmd' adds the 2-second startup delay
	# are there situations where --cmd is necessary or useful?

	my $markscript= <<'EOF';
set report=1000000
if !strlen(&filetype)
	filetype detect
endif
syn on
new
set modifiable
set paste
set isprint+=9
wincmd p
let s:end = line("$")
let s:lnum = 1
while s:lnum <= s:end
	let s:line = getline(s:lnum)
	let s:len = strlen(s:line)
	let s:new = ""
	let s:col = 1
	while s:col <= s:len
		let s:startcol = s:col " The start column for processing text
		let s:id = synID(s:lnum, s:col, 1)
		let s:col = s:col + 1
		while s:col <= s:len && s:id == synID(s:lnum, s:col, 1) | let s:col = s:col + 1 | endwhile
		let s:id = synIDtrans(s:id)
		let s:name = synIDattr(s:id, 'name')
		let s:new = s:new . '>' . s:name . '>' . substitute(substitute(substitute(strpart(s:line, s:startcol - 1, s:col - s:startcol), '&', '\&a', 'g'), '<', '\&l', 'g'), '>', '\&g', 'g') . '<' . s:name . '<'
		if s:col > s:len
			break
		endif
	endwhile
	exe "normal \<C-W>pa" . strtrans(s:new) . "\n\e\<C-W>p"
	let s:lnum = s:lnum + 1
	+
endwhile
wincmd p
normal dd
EOF
	if ($defaults->{all_syntax_groups}) {
		my %a= (map {$_, 1} ('Normal', keys %ANSI_COLORS, keys %SYNTAX_LINKS));
		printf $markup_fh "hi %-16s ctermfg=7\n", $_ for keys %a;
	}
	print $markup_fh $markscript;
	close $markup_fh;
	my @script_lines = (
		map { "$_\n" }
			# do :edit before :let or the buffer variables may get reset
			":edit $filename",
			(
				map  { ":let $_=$vim_let->{$_}" }
				grep { defined	$vim_let->{$_} }
					keys %$vim_let
			),

			':filetype on',
			$filetype_set,
			":source $markup_file",
			":write! $out_file",
			':qall!',
	);

	print $script_fh @script_lines;
	close $script_fh;

	run(
		$defaults->{vim_command},
		$defaults->{vim_options},
		$filename, ('-s' => "$script_file")
	);

	my $data = do { local $/; <$out_file> };

	# Given an array ref ($syntax), we add a new syntax chunk to it, unescaping
	# the text & making sure that consecutive chunks of the same type are merged.
	my ($syntax, $content, %types);
	$data =~ s/\x0D\x0A?/\n/g;
	$data =~ s/<(.*?)<>\1>//g; # repeating blocks of the same type
	for (keys %colors) {
		$types{$_} = 1 if $data =~ s/>$_>(.*?)<$_</$colors{$_}$1/g; #coloring
	}
	# first line without the reset sequence
	$data =~ s/^>(.*?)>(.*?)<\1</$2/; #no color
	$data =~ s/>(.*?)>(.*?)<\1</$reset$2/g; #no color
	# Unescape ampersands and pointies, restore escape
	my %s=(l=>'<',g=>'>',a=>'&');
	$data =~s/&([lga])/$s{$1}/g;
	$data =~ s/\^\[/\e/g;
	# get start and end of file correct
	$data .= $reset if $data !~ /\n$/;
	print $data if $data;

	return if ! $report_colors;
	print "\n### Report color settings:\n";
	print "$_ ($colors{$_}$COLORS{$_}$reset)\n" for keys %types;
}

# This is a subroutine which runs a program.
# It takes a list of the program name and arguments.
sub run {
	my ($prog, @args) = @_;

	{
		my ($in, $out) = (Symbol::gensym(), Symbol::gensym());
		my $err_fh = Symbol::gensym();

		my $pid = IPC::Open3::open3($in, $out, $err_fh, $prog => @args);

		# close these to avoid any ambiguity that might cause this to block
		# (see also the paragraph about "select" in IPC::Open3)
		close($in);
		close($out);

		# read handle before waitpid to avoid hanging on older systems
		my $errout = do { local $/; <$err_fh> };

		my $gotpid = waitpid($pid, 0);
		die "couldn't run the program '$prog'" if $gotpid == -1;
		my $error = $? >> 8;
		if ($error) {
			$errout =~ s/\n+\z//;
			my $details = $errout eq '' ? '' :
										"\n$prog wrote this error output:\n$errout\n";
			die "$prog returned an error code of '$error'$details";
		}
	}
}

sub usage {
	print <<EOF;
usage: vimcolor [-h] [-l language] [filename]

This program works by running the Vim text editor and getting it to apply its
excellent syntax highlighting (aka 'font-locking') to an input file, and mark
pieces of text according to whether it thinks they are comments, keywords,
strings, etc. The Perl code then reads back this markup and converts it
to text marked with ANSI escape sequences (using Term::ANSIColor)
based on the Vim syntax coloring of the input file.

OPTIONS:
 -h print this help and exit
 -l specify the type of file Vim should expect, if not recognized correctly

ARGUMENT
 filename		name of the file to colorize. If not given or - then STDIN is used

You can alter the color scheme using the TEXT_VIMCOLOR_ANSI
environment variable in the format of "SynGroup=color;"
For example:

	TEXT_VIMCOLOR_ANSI='Comment=green;Statement = magenta; '

A question mark as colorname displays in addition the color settings for
the syntax groups found at the end of the program. By default only the
following SynGroup syntax groups are used:

Comment Constant Identifier Statement PreProc Type Special Underlined
Ignore Error Todo

If you want to benefit from finer-grained syntax highlighting you can
request in TEXT_VIMCOLOR_ANSI additional syntax groups from the list

Character Number Boolean Float Function Conditional Repeat Label Operator
Keyword Exception Include Define Macro PreCondit StorageClass Structure
Typedef Tag SpecialChar Delimiter SpecialComment Debug

and chose appropriate colors.
EOF
	exit;
}
