#!/usr/bin/perl =pod =head1 NAME tv_merge - Merge (combine) two XMLTV files. =head1 SYNOPSIS tv_merge [-t] -i FILE -m FILE -o FILE =head1 DESCRIPTION Read XMLTV listings from two files and merge them together. Unlike tv_cat (which just joins files) this will update (add/replace/delete) the original XMLTV file with channels and programmes contained in the second file. It works with multiple channels, and will insert any new programmes and delete any overlapping programmes. B The input files must be pre-sorted into datetime within channel order by using the "--by-channel" option to tv_sort: tv_sort --by-channel --output FILE FILE All programmes must have start and stop times. (Note: programmes in the merged-in file I any in the master file, i.e. data are not updated I programmes) This program uses XML::TreePP which doesn't write definitions in the output file. If you need to add a suitable tag then use the optional -t parameter: tv_merge -t -i FILE -m FILE -o FILE =head1 EXAMPLE To merge all channels/programmes in F into F, and write the output to F with a DOCTYPE tag: tv_merge -t -i master.xml -m newadditions.xml -o newmaster.xml =head1 SEE ALSO L, L =head1 AUTHOR Copyright Geoff Westcott, February 2013. This code is distributed under the GNU General Public License v2 (GPLv2). =cut # # Inspired by xmltvmerger.py ( http://niels.dybdahl.dk/xmltvdk/index.php/Xmltvmerger.py ) with bug fixes and enhancements. # # IMPORTANT: the input files must be pre-sorted into datetime within channel order by using the "--by-channel" option to tv_sort # e.g. tv_sort --by-channel --output FILE FILE # # Help: tv_merge -h # use warnings; use strict; use XML::TreePP; use Date::Parse; use POSIX qw(strftime); use Getopt::Std; $Getopt::Std::STANDARD_HELP_VERSION = 1; use Data::Dumper; # Process command line arguments my %opts = (); # hash to store input args getopts("dDqti:o:m:hv", \%opts); # load the args into %opts my $input_file = ($opts{'i'}) ? $opts{'i'} : ""; # main file my $merge_file = ($opts{'m'}) ? $opts{'m'} : ""; # file to merge in my $outfile = ($opts{'o'}) ? $opts{'o'} : ""; my $debug = ($opts{'d'}) ? $opts{'d'} : ""; my $debugmore = ($opts{'D'}) ? $opts{'D'} : ""; my $quiet_mode = ($opts{'q'}) ? $opts{'q'} : ""; my $doctype = ($opts{'t'}) ? $opts{'t'} : ""; if ($opts{'h'}) { VERSION_MESSAGE(); HELP_MESSAGE(); exit; } if ($opts{'v'}) { VERSION_MESSAGE(); exit; } if ( !%opts ) { HELP_MESSAGE(); exit 1; } # Check input file if (! defined $opts{'i'} || ! -r $opts{'i'}) { print "Error: Please provide a valid input file\n"; HELP_MESSAGE(); exit 1; } # Check input file if (! defined $opts{'m'} || ! -r $opts{'m'}) { print "Error: Please provide a valid merge file\n"; HELP_MESSAGE(); exit 1; } # Check ouput file if (! defined $opts{'o'}) { print "Error: Please specify an output file\n"; HELP_MESSAGE(); exit 1; } if ($debugmore) { $debug++; } # Parse the XMLTV file my $tpp = XML::TreePP->new(); $tpp->set( force_array => [ 'channel', 'programme' ] ); # force array ref for some fields $tpp->set( indent => 2 ); $tpp->set( first_out => [ 'channel', 'programme', 'title', 'sub-title', 'desc', 'credits', 'date', 'category', 'keyword', 'language', 'orig-language', 'length', 'icon', 'url', 'country', 'episode-num', 'video', 'audio', 'previously-shown', 'premiere', 'last-chance', 'new', 'subtitles', 'rating', 'star-rating', 'review', 'image' ] ); my $xmltv = $tpp->parsefile( $input_file ); if ($debugmore) { print Dumper($xmltv); } my $xmltv_merge = $tpp->parsefile( $merge_file ); if ($debugmore) { print Dumper($xmltv_merge); } my %xmltv_new_channels; my @xmltv_merged_channels; my @xmltv_merged_progs; # Merge the channels &merge_channels(); # Merge the programmes &merge_programmes(); # format and output the data &write_newxml(); ############################################################################### ############################################################################### sub merge_programmes { # Process the XMLTV data structure # # Assumes all data has start and stop times (note: this isn't a requirement of XMLTV DTD) # # If the files contain >1 channel then they must be pre-sorted into datetime within channel # e.g. tv_sort --by-channel --output FILE FILE # # Rules: # 1. always use the new programme details (todo: bit a bit more clever with merging) # my $prog1 = $xmltv->{tv}->{programme}; my $prog2 = $xmltv_merge->{tv}->{programme}; if ($debugmore) { print Dumper("Incoming programmes :", $prog1 ); } if ($debugmore) { print Dumper("Merging programmes :", $prog2 ); } my ($p1_channel, $p1_start, $p1_stop) = ('', 0 , 0); my ($p2_channel, $p2_start, $p2_stop) = ('', 0 , 0); my $i = 0; my ($thischannel, $lastchannel) = ('', ''); my $p2count = scalar (@{ $prog2 }); for my $p (@{ $prog1 }) { $thischannel = $p->{-channel} if $thischannel eq ''; $p1_channel = $p->{-channel}; $p1_start = to_timestamp( $p->{-start} ); $p1_stop = to_timestamp( $p->{-stop} ); if ($p2_stop == -1) { # 'new' file channel has changed # copy any remaining 'old' records for this channel, then insert the new channel's programmes if ($p1_channel eq $lastchannel) { push @xmltv_merged_progs, $p; # copy the old record next; } # old channel has changed = so we have finished copying old records and can now insert new channel my $insertchannel = $p2_channel; while ($p2_channel eq $insertchannel) { push @xmltv_merged_progs, $prog2->[$i]; $i++; if ($i < $p2count) { $p2_channel = $prog2->[$i]->{-channel}; } else { $p2_channel = ''; # no more new records, end the loop } } $p2_stop = 0; } if ($p2_stop != 0 && $p1_start < $p2_stop && $p1_channel eq $p2_channel) { # skip old recs until start time >= last new rec's stop time if ($debug) { print "skippy $p1_start $p2_stop\n"; } next; } if ($i < $p2count) { $p2_channel = $prog2->[$i]->{-channel}; $p2_start = to_timestamp( $prog2->[$i]->{-start} ); $p2_stop = to_timestamp( $prog2->[$i]->{-stop} ); if ($debug) { print "$p1_start $p2_start || $p1_stop $p2_stop \n"; } if ($p1_channel eq $p2_channel) { $lastchannel = $thischannel; $thischannel = $p1_channel; # remember the 'current' channel if ($p1_start == $p2_start) { # either # (a) progs identical - replace old with new # (b) new prog is longer - replace old with new & delete all old progs until new stop time # (c) new prog is shorter - insert new prog # push @xmltv_merged_progs, $prog2->[$i]; $i++; next; } elsif ($p1_start < $p2_start) { # get here either when new schedule hasn't commenced yet, or when there is a gap in the new schedule # keep current old prog # unless this would cause an overlap if ($p1_stop > $p2_start) { # uh oh overlap - we could possibly set the stop time to equal new prog's start, but that # is just making up schedules! On balance I think a hole is better than an overlap: # skip this old prog next; } $p2_stop = 0; # ensure we *don't* skip the next old record } elsif ($p1_start > $p2_start) { # get here when additional prog in new schedule that's not in the old # insert new prog & keep current old prog push @xmltv_merged_progs, $prog2->[$i]; $i++; redo; } } else { # channel has changed on one (or both) of the files if ($p1_channel ne $thischannel) { # 'old' file channel has changed # copy any remaining 'new' records for this channel while ($p2_channel eq $thischannel) { push @xmltv_merged_progs, $prog2->[$i]; $i++; if ($i < $p2count) { $p2_channel = $prog2->[$i]->{-channel}; } else { $p2_channel = ''; # no more new records, end the loop } } $lastchannel = $thischannel; $thischannel = $p1_channel; # remember the new 'current' channel redo; } elsif ($p2_channel ne $thischannel) { # 'new' file channel has changed # copy any remaining 'old' records for this channel # then, if this is a totally new channel, insert the new channel's programmes if (defined $xmltv_new_channels{$p2_channel}) { $p2_stop = -1; redo; } else { $p2_stop = 0; } } } } # copy the old record push @xmltv_merged_progs, $p; } # no more old progs # addend any remaining new progs while ($i < $p2count) { push @xmltv_merged_progs, $prog2->[$i]; $i++; } if ($debugmore) { print Dumper("Merged programmes :", @xmltv_merged_progs); } } sub merge_channels { # Merge the channels in the two input files # # - always use the newer channel details # my $chan1 = $xmltv->{tv}->{channel}; my $chan2 = $xmltv_merge->{tv}->{channel}; if ($debugmore) { print Dumper("Incoming channels :", $chan1 ); } if ($debugmore) { print Dumper("Merging channels :", $chan2 ); } my ($c1_id); my ($c2_id); my %channels; for my $c1 (@{ $chan1 }) { $c1_id = $c1->{-id}; $channels{$c1_id} = $c1; } for my $c2 (@{ $chan2 }) { $c2_id = $c2->{-id}; $xmltv_new_channels{$c2_id} = 1 if !defined $channels{$c2_id}; # array of new channels not in current file $channels{$c2_id} = $c2; } foreach my $key ( keys %channels ) { push @xmltv_merged_channels, $channels{$key}; } if ($debugmore) { print Dumper("Merged channels :", @xmltv_merged_channels); } } sub to_timestamp { my ($ts) = @_; use DateTime::Format::Strptime; my $format = DateTime::Format::Strptime->new( pattern => '%Y%m%d%H%M%S %z', # "20130320060000 +0000" time_zone => 'UTC', # (better than 'local' - e.g. handles DST) on_error => 'croak', ); return ($format->parse_datetime($ts))->epoch(); } sub write_newxml { # Write the output xml files # # create an xml container my %xml = (); $xml{'tv'}{'-generator-info-name'} = $xmltv->{tv}->{'-generator-info-name'} if $xmltv->{tv}->{'-generator-info-name'}; $xml{'tv'}{'-generator-info-url'} = $xmltv->{tv}->{'-generator-info-url'} if $xmltv->{tv}->{'-generator-info-url'}; $xml{'tv'}{'-source-info-name'} = $xmltv->{tv}->{'-source-info-name'} if $xmltv->{tv}->{'-source-info-name'}; $xml{'tv'}{'-source-info-url'} = $xmltv->{tv}->{'-source-info-url'} if $xmltv->{tv}->{'-source-info-url'}; # add the elements $xml{'tv'}{'channel'} = \@xmltv_merged_channels; # add the elements $xml{'tv'}{'programme'} = \@xmltv_merged_progs; # my $ccount = scalar @{ $xml{'tv'}{'channel'} }; my $ccount_plural = $ccount == 1 ? "" : "s"; my $pcount = scalar @{ $xml{'tv'}{'programme'} }; my $pcount_plural = $pcount == 1 ? "" : "s"; if (!$quiet_mode) { print "Writing : $ccount channel$ccount_plural $pcount programme$pcount_plural \n"; } # write the output xml file if ($debugmore) { print Dumper(\%xml); } if ($doctype) { # TreePP doesn't write a DOCTYPE - add one if the user requests it my $xmlout = $tpp->write( \%xml, 'UTF-8' ); $xmlout =~ s/^(<\?xml.*>)/$1\n\n/; open OUT, "> $outfile" or die "Failed to open $outfile for writing"; printf OUT $xmlout; close OUT; } else { $tpp->writefile( $outfile, \%xml, 'UTF-8' ); } } sub VERSION_MESSAGE { use XMLTV; our $VERSION = $XMLTV::VERSION; print "XMLTV module version $XMLTV::VERSION\n"; print "This program version $VERSION\n"; } sub HELP_MESSAGE { # print usage message my $filename = (split(/\//,$0))[-1]; print STDERR << "EOF"; Merge (combine) two XMLTV files Files should be sorted into datetime within channel order e.g. tv_sort --by-channel --output FILE FILE Assumes all data has start and stop times Usage: $filename [-dDq] [-t] -i FILE -m FILE -o FILE -i FILE : input XMLTV file -m FILE : merge XMLTV file -o FILE : output XMLTV file -t : add DOCTYPE to output file -d : print debugging messages -D : print even more debugging messages -q : quiet mode (no STDOUT messages) -h, --help : print help message and exit -v, --version : print version and exit Example: $filename -i master.xml -m newadditions.xml -o newmaster.xml EOF }