-
Notifications
You must be signed in to change notification settings - Fork 257
/
TTParser.pm
200 lines (181 loc) · 6.88 KB
/
TTParser.pm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
# Copyright Tamil 2011
#
# This file is part of Koha.
#
# Koha is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# Koha is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Koha; if not, see <http://www.gnu.org/licenses>.
#simple parser for HTML with Template Toolkit directives. Tokens are put into @tokens and are accesible via next_token and peep_token
package C4::TTParser;
use base qw(HTML::Parser);
use C4::TmplToken;
use strict;
use warnings;
#seems to be handled post tokenizer
##hash where key is tag we are interested in and the value is a hash of the attributes we want
#my %interesting_tags = (
# img => { alt => 1 },
#);
#tokens found so far (used like a stack)
my ( @tokens );
#shiftnext token or undef
sub next_token{
return shift @tokens;
}
#unshift token back on @tokens
sub unshift_token{
my $self = shift;
unshift @tokens, shift;
}
#have a peep at next token
sub peep_token{
return $tokens[0];
}
#wrapper for parse
#please use this method INSTEAD of the HTML::Parser->parse_file method (and HTML::Parser->parse)
#signature build_tokens( self, filename)
sub build_tokens{
my ($self, $filename) = @_;
$self->{filename} = $filename;
$self->handler(start => "start", "self, line, tagname, attr, text"); #signature is start( self, linenumber, tagname, hash of attributes, original text )
$self->handler(text => "text", "self, line, text, is_cdata"); #signature is text( self, linenumber, original text, is_cdata )
$self->handler(end => "end", "self, line, tag, attr, text"); #signature is end( self, linenumber, tagename, original text )
$self->handler(declaration => "declaration", "self, line, text, is_cdata"); # declaration
$self->handler(comment => "comment", "self, line, text, is_cdata"); # comments
$self->handler(process => "process", "self, line, text, is_cdata"); # processing statement <?...?>
# $self->handler(default => "default", "self, line, text, is_cdata"); # anything else
$self->marked_sections(1); #treat anything inside CDATA tags as text, should really make it a C4::TmplTokenType::CDATA
$self->unbroken_text(1); #make contiguous whitespace into a single token (can span multiple lines)
open(my $fh, "<:encoding(utf8)", $filename) || die "Cannot open $filename ($!)";
$self->parse_file($fh);
return $self;
}
#handle parsing of text
sub text{
my $self = shift;
my $line = shift;
my $work = shift; # original text
my $is_cdata = shift;
# If there is a split template toolkit tag
if ( $work =~ m/^(?:(?!\[%).)*?%\]/s ) {
my @strings = ($&);
my $token;
$work = $';
while ( $token = pop @tokens ) {
if ( $token->string =~ m/\[%.*?$/s ) {
push @tokens, C4::TmplToken->new( $`, $token->type, $token->line_number, $token->pathname );
push @strings, $&;
last;
} else {
push @strings, $token->string;
}
}
push @tokens,
C4::TmplToken->new(
join( '', reverse @strings ), C4::TmplTokenType::DIRECTIVE,
$token ? $token->line_number : $line, $self->{filename}
);
}
while($work){
# if there is a template_toolkit tag
if( $work =~ m/\[%.*?%\]/ ){
#everything before this tag is text (or possibly CDATA), add a text token to tokens if $`
if( $` ){
my $t = C4::TmplToken->new( $`, ($is_cdata? C4::TmplTokenType::CDATA : C4::TmplTokenType::TEXT), $line, $self->{filename} );
push @tokens, $t;
}
#the match itself is a DIRECTIVE $&
my $t = C4::TmplToken->new( $&, C4::TmplTokenType::DIRECTIVE, $line, $self->{filename} );
push @tokens, $t;
# put work still to do back into work
$work = $' ? $' : 0;
} else {
# If there is some left over work, treat it as text token
my $t = C4::TmplToken->new( $work, ($is_cdata? C4::TmplTokenType::CDATA : C4::TmplTokenType::TEXT), $line, $self->{filename} );
push @tokens, $t;
last;
}
}
}
sub declaration {
my $self = shift;
my $line = shift;
my $work = shift; #original text
my $is_cdata = shift;
my $t = C4::TmplToken->new( $work, ($is_cdata? C4::TmplTokenType::CDATA : C4::TmplTokenType::TEXT), $line, $self->{filename} );
push @tokens, $t;
}
sub comment {
my $self = shift;
my $line = shift;
my $work = shift; #original text
my $is_cdata = shift;
my $t = C4::TmplToken->new( $work, ($is_cdata? C4::TmplTokenType::CDATA : C4::TmplTokenType::TEXT), $line, $self->{filename} );
push @tokens, $t;
}
sub process {
my $self = shift;
my $line = shift;
my $work = shift; #original text
my $is_cdata = shift;
my $t = C4::TmplToken->new( $work, ($is_cdata? C4::TmplTokenType::CDATA : C4::TmplTokenType::TEXT), $line, $self->{filename} );
push @tokens, $t;
}
sub default {
my $self = shift;
my $line = shift;
my $work = shift; #original text
my $is_cdata = shift;
my $t = C4::TmplToken->new( $work, ($is_cdata? C4::TmplTokenType::CDATA : C4::TmplTokenType::TEXT), $line, $self->{filename} );
push @tokens, $t;
}
#handle opening html tags
sub start{
my $self = shift;
my $line = shift;
my $tag = shift;
my $hash = shift; #hash of attr/value pairs
my $text = shift; #original text
my $t = C4::TmplToken->new( $text, C4::TmplTokenType::TAG, $line, $self->{filename});
my %attr;
# tags seem to be uses in an 'interesting' way elsewhere..
for my $key( %$hash ) {
next unless defined $hash->{$key};
if ($key eq "/"){
$attr{+lc($key)} = [ $key, $hash->{$key}, $key."=".$hash->{$key}, 1 ];
}
else {
$attr{+lc($key)} = [ $key, $hash->{$key}, $key."=".$hash->{$key}, 0 ];
}
}
$t->set_attributes( \%attr );
push @tokens, $t;
}
#handle closing html tags
sub end{
my $self = shift;
my $line = shift;
my $tag = shift;
my $hash = shift;
my $text = shift;
# what format should this be in?
my $t = C4::TmplToken->new( $text, C4::TmplTokenType::TAG, $line, $self->{filename} );
my %attr;
# tags seem to be uses in an 'interesting' way elsewhere..
for my $key( %$hash ) {
next unless defined $hash->{$key};
$attr{+lc($key)} = [ $key, $hash->{$key}, $key."=".$hash->{$key}, 0 ];
}
$t->set_attributes( \%attr );
push @tokens, $t;
}
1;