]> the.earth.li Git - htag.git/blobdiff - plugins/15merge
Import Upstream version 0.0.24
[htag.git] / plugins / 15merge
index 7b48711562969776490d7ff5d5e9807afdcbad09..53f3b82e2d5e33b72641538e7d6f4276c24a32f5 100644 (file)
@@ -1,6 +1,6 @@
 #!/usr/bin/perl -w
 
-# Copyright (C) 2000-2001 Simon Huggins
+# Copyright (C) 2000-2008 Simon Huggins
 # merge merges the sig and the tag but also merges the sig and the new style
 # plugin things (i.e. all those silly files in $cfg{'tmpdir'}
 
 
 use strict;
 use Text::Wrap;
+use Encode;
 
 $Text::Wrap::columns=defined $cfg{'maxlinelen'} ? $cfg{'maxlinelen'} : 72;
 $cfg{'first'}  ||= "";
 $cfg{'leader'} ||= "";
 
+# Work out the correct locale to use if there is one otherwise we assume
+# UTF-8 which shouldn't kill ascii people and anyone else should have
+# correctly set locale.
+my $fromcharset = 'UTF-8';
+my $tocharset = 'UTF-8';
+
+my @all_encodings = Encode->encodings(":all");
+my $locale;
+if (defined $ENV{'LC_ALL'}) {
+       $locale = $ENV{'LC_ALL'};
+} elsif (defined $ENV{'LC_CTYPE'}) {
+       $locale = $ENV{'LC_CTYPE'};
+} elsif (defined $ENV{'LANG'}) {
+       $locale = $ENV{'LANG'};
+}
+if ($locale) {
+       $locale =~ s/.*\.//;
+       $locale = lc $locale;
+       foreach (@all_encodings) {
+               if ($locale eq lc $_) {
+                       $tocharset = $fromcharset = $_;
+                       last;
+               }
+       }
+}
+
 my $anal_merge_debug=0;
 
 sub remove_space($) {
@@ -52,7 +79,7 @@ sub merge($$) {
        my ($plugin,$len,$align,$wascr);
        $wascr=0;
 
-       while ($sig =~ /@([A-Za-z]?)([1-9][0-9]*)([RC]?)@/) {
+       while ($sig =~ /@([A-Za-z]?)(\*|(?:[1-9][0-9]*))([RC]?)@/) {
                # Ick.
                if (defined $3) {
                        $plugin = $1;
@@ -77,53 +104,61 @@ print STDERR "plugin,len,type = #$plugin#,#$len#,#$align#\n" if $anal_merge_debu
                        $chunk = getplugin($plugin);
                        print STDERR "Got plugin $plugin and $chunk\n"
                                if $anal_merge_debug;
+                       $len = quotemeta $len; # escape * if it is *
                        $sig =~ s/\@$plugin$len[RC]?@/$chunk/;
                        print STDERR "Sig is now:\n$sig" if $anal_merge_debug;
                        $chunk = "";
                } else {
                        my $extra;
                        $notag=0;
-                       $chunk =  substr $tag, 0, $len;
+                       if ($len ne "*") {
+                               $chunk =  substr $tag, 0, $len;
 print STDERR "chunk,tag = #$chunk#,#$tag#".length($tag)." ".length($chunk)."\n"
        if $anal_merge_debug;
-                       if ($chunk =~ s/^([^\n]+)\n+(.*)$/$1/s) {
-                               $extra = $2;
-                               print STDERR "\$extra = [$extra]\n"
-                                       if $anal_merge_debug;
-                       }
-                       if (length($chunk) < $len) {
-                               print STDERR "length(chunk) < $len\n"
-                                       if $anal_merge_debug;
-                               $chunk=&chunksizealign($chunk,$len,$align);
-                               print STDERR "chunk = #$chunk#\n"
-                                       if $anal_merge_debug;
-                       }
-                       if (length($tag) < $len + 1) {
-                               $tag= $extra ? $extra : "";
+                               if ($chunk =~ s/^([^\n]+)\n+(.*)$/$1/s) {
+                                       $extra = $2;
+                                       print STDERR "\$extra = [$extra]\n"
+                                               if $anal_merge_debug;
+                               }
+                               if (length($chunk) < $len) {
+                                       print STDERR "length(chunk) < $len\n"
+                                               if $anal_merge_debug;
+                                       $chunk=&chunksizealign($chunk,$len,$align);
+                                       print STDERR "chunk = #$chunk#\n"
+                                               if $anal_merge_debug;
+                               }
+                               if (length($tag) < $len + 1) {
+                                       $tag= $extra ? $extra : "";
 print STDERR "length(tag) < $len + 1, tag now = #$tag#(extra = #$extra#)\n"
        if $anal_merge_debug;
-                       } elsif (substr $tag, 0,  $len + 1 eq ' ') {
-                               $tag=substr $tag, $len + 1;
-                               $tag=$extra . $tag if defined $extra;
+                               } elsif (substr $tag, 0,  $len + 1 eq ' ') {
+                                       $tag=substr $tag, $len + 1;
+                                       $tag=$extra . $tag if defined $extra;
 print STDERR "substr tag, 0, $len + 1 was a space.  tag now = #$tag#\n"
        if $anal_merge_debug;
-                       } else {
-                               $tag=substr $tag, $len;
-                               ### Back up a word in $chunk
-                               $tag=$extra . $tag if defined $extra;
+                               } else {
+                                       $tag=substr $tag, $len;
+                                       ### Back up a word in $chunk
+                                       $tag=$extra . $tag if defined $extra;
 print STDERR "didn't break at space.  Backing up word.  tag now = #$tag#\n"
        if $anal_merge_debug;
-                               if ($chunk =~ s/(.*) (.*)$/$1/) {
-                                       $tag=$2 . $tag;
-                                       $chunk=&chunksizealign($chunk,$len,$align);
-                               }
+                                       if ($chunk =~ s/(.*) (.*)$/$1/) {
+                                               $tag=$2 . $tag;
+                                               $chunk=&chunksizealign($chunk,$len,$align);
+                                       }
 print STDERR "If space in chunk then change chunk and add word to tag.".
 "Reformat chunk now = #$chunk# (tag = #$tag#)\n" if $anal_merge_debug;
+                               }
+                       } else {
+                               $chunk = $tag;
+                               $tag = "";
                        }
+                       $len = quotemeta $len; # escape * if it's *
                        $sig =~ s/\@$plugin$len[RC]?@/$chunk/;
                }
        }
        $sig =~ s/@([0-9]+)[RC]?@/" " x $1/eg;
+       $sig =~ s/@\*[RC]?@//g;
        $cfg{'notag'} = $notag;
        if ($tag and not $notag) {
                return undef;
@@ -155,15 +190,22 @@ sub getplugin($) {
 my ($tag,$sig,$newsig);
 open(SIG, "<$cfg{'tmpsigfile'}") or htagdie "$0: Could not open $cfg{'tmpsigfile'}: $!\n";
 while(<SIG>) {
-       $sig .= $_;
+       $sig .= decode($fromcharset, $_);
 }
 close(SIG);
-open(TAG, "<$cfg{'tmptagfile'}") or htagdie "$1: Could not open $cfg{'tmptagfile'}: $!\n";
-while(<TAG>) {
-       $tag .= $_;
+my $ret = 0;
+if (grep { /\@NOTAG\@/ } $sig) {
+       $tag="";
+       $ret=26;
+       $sig =~ s/\@NOTAG\@\n//;
+} else {
+       open(TAG, "<$cfg{'tmptagfile'}") or htagdie "$1: Could not open $cfg{'tmptagfile'}: $!\n";
+       while(<TAG>) {
+               $tag .= decode($fromcharset, $_);
+       }
+       close(TAG);
 }
-close(TAG);
-if (defined $sig and $sig =~ /@[A-Za-z]?[1-9][0-9]*[RC]?@/) {
+if (defined $sig and $sig =~ /@[A-Za-z]?\*|(?:[1-9][0-9]*)[RC]?@/) {
        $sig =  merge($tag,$sig);
 } else {
        my $formatted_tag = Text::Wrap::wrap($cfg{'first'},$cfg{'leader'},$tag);
@@ -172,11 +214,12 @@ if (defined $sig and $sig =~ /@[A-Za-z]?[1-9][0-9]*[RC]?@/) {
        $cfg{'notag'} = 0;
 }
 if (defined $sig) {
+       $sig = encode($tocharset, $sig);
        open(SIG, ">$cfg{'tmpsigfile'}") or htagdie "$0: Could not open $cfg{'tmpsigfile'}: $!\n";
        print SIG "\n" while $cfg{'newline'}--;
        print SIG $sig;
        close(SIG);
-       return;
+       return $ret;
 } else {
        return(10);
 }