Whitespace and indentation cleanup
This commit is contained in:
parent
1dfb66c576
commit
3ddc898553
@ -14,7 +14,7 @@ proc {xml init} {xml} {
|
||||
regsub -all {<!--.*?-->} $xml {} xml
|
||||
return [xml new [list xml [string trim $xml] loc 0]]
|
||||
}
|
||||
|
||||
|
||||
xml method next {{peek 0}} {
|
||||
set n [regexp -start $loc -indices {(.*?)\s*?<(/?)(.*?)(/?)>} \
|
||||
$xml all txt stok tok etok]
|
||||
@ -26,7 +26,7 @@ xml method next {{peek 0}} {
|
||||
lassign $stok stok0 stok1
|
||||
lassign $tok tok0 tok1
|
||||
lassign $etok etok0 etok1
|
||||
|
||||
|
||||
if {$txt1 >= $txt0} {
|
||||
set txt [string range $xml $txt0 $txt1]
|
||||
if {!$peek} {
|
||||
@ -37,7 +37,7 @@ xml method next {{peek 0}} {
|
||||
}
|
||||
return [list TXT $txt]
|
||||
}
|
||||
|
||||
|
||||
set token [string range $xml $tok0 $tok1]
|
||||
if {!$peek} {
|
||||
set loc [expr {$all1 + 1}]
|
||||
@ -45,7 +45,7 @@ xml method next {{peek 0}} {
|
||||
if {[regexp {^!\[CDATA\[(.*)\]\]} $token => txt]} {
|
||||
return [list TXT $txt]
|
||||
}
|
||||
|
||||
|
||||
# Check for Processing Instruction <?...?>
|
||||
set type XML
|
||||
if {[regexp {^\?(.*)\?$} $token => token]} {
|
||||
@ -79,20 +79,20 @@ xml method reset {} {
|
||||
# attributes: doesn't check anything: quotes, equals, unique, etc.
|
||||
# text stuff: references, entities, parameters, etc.
|
||||
# doctype internal stuff
|
||||
#
|
||||
#
|
||||
xml method _NextToken {{peek 0}} {
|
||||
set result [$self next $peek]
|
||||
if { $result eq "EOF" } {
|
||||
if { $result eq "EOF" } {
|
||||
return [list $result]
|
||||
}
|
||||
return $result
|
||||
}
|
||||
|
||||
|
||||
xml method isWellFormed {} {
|
||||
set result [$self _isWellFormed]
|
||||
set loc 0
|
||||
return $result
|
||||
}
|
||||
set result [$self _isWellFormed]
|
||||
$self reset
|
||||
return $result
|
||||
}
|
||||
|
||||
xml method _isWellFormed {} {
|
||||
array set emsg {
|
||||
@ -105,7 +105,7 @@ xml method _isWellFormed {} {
|
||||
BADNAME "Bad name '$val'"
|
||||
BADPI "No processing instruction starts with 'xml'"
|
||||
}
|
||||
|
||||
|
||||
# [1] document ::= prolog element Misc*
|
||||
# [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
|
||||
# [27] Misc ::= Comment | PI | S
|
||||
@ -122,7 +122,7 @@ xml method _isWellFormed {} {
|
||||
# XMLDecl
|
||||
if {$val eq "xml"} {
|
||||
if {$seen != 0} { return $emsg(XMLDECLFIRST) }
|
||||
# TODO: check version number exist and only encoding and
|
||||
# TODO: check version number exists and only encoding and
|
||||
# standalone attributes are allowed
|
||||
# Mark as seen XMLDecl
|
||||
incr seen
|
||||
@ -132,7 +132,7 @@ xml method _isWellFormed {} {
|
||||
# Mark as seen PI
|
||||
set seen [expr {$seen | 2}]
|
||||
continue
|
||||
} elseif {$type eq "XML" && $val eq "!DOCTYPE"} {
|
||||
} elseif {$type eq "XML" && $val eq "!DOCTYPE"} {
|
||||
# Doctype
|
||||
if {$seen & 4} { return $emsg(MULTIDOCTYPE) }
|
||||
set seen [expr {$seen | 4}]
|
||||
@ -140,14 +140,14 @@ xml method _isWellFormed {} {
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
|
||||
# [39] element ::= EmptyElemTag | STag content ETag
|
||||
# [40] STag ::= < Name (S Attribute)* S? >
|
||||
# [42] ETag ::= </ Name S? >
|
||||
# [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
|
||||
# [44] EmptyElemTag ::= < Name (S Attribute)* S? />
|
||||
#
|
||||
|
||||
#
|
||||
|
||||
set stack {}
|
||||
set first 1
|
||||
while {1} {
|
||||
@ -158,15 +158,15 @@ xml method _isWellFormed {} {
|
||||
if {$type ne "XML" && $type ne "EOF"} { return $emsg(INVALID) }
|
||||
set first 0
|
||||
}
|
||||
|
||||
|
||||
if {$type eq "EOF"} break
|
||||
# TODO: check attributes: quotes, equals and unique
|
||||
|
||||
|
||||
if {$type eq "TXT"} continue
|
||||
if {! [regexp {^[a-zA-Z_:][a-zA-Z0-9.-_:\xB7]+$} $val]} {
|
||||
return [subst $emsg(BADNAME)]
|
||||
}
|
||||
|
||||
|
||||
if {$type eq "PI"} {
|
||||
if {[string equal -nocase xml $val]} { return $emsg(BADPI) }
|
||||
continue
|
||||
@ -174,17 +174,17 @@ xml method _isWellFormed {} {
|
||||
if {$etype eq "START"} {
|
||||
# Starting tag
|
||||
lappend stack $val
|
||||
} elseif {$etype eq "END"} {
|
||||
} elseif {$etype eq "END"} {
|
||||
# </tag>
|
||||
if {$val ne [lindex $stack end]} { return [subst $emsg(MISMATCH)] }
|
||||
set stack [lrange $stack 0 end-1]
|
||||
# Empty stack
|
||||
if {[llength $stack] == 0} break
|
||||
} elseif {$etype eq "EMPTY"} {
|
||||
} elseif {$etype eq "EMPTY"} {
|
||||
# <tag/>
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
# End-of-Document can only contain processing instructions
|
||||
while {1} {
|
||||
foreach {type val attr etype} [$self _NextToken] break
|
||||
@ -199,29 +199,29 @@ xml method _isWellFormed {} {
|
||||
}
|
||||
|
||||
|
||||
# Transform some characters that might be sent as SGML entities.
|
||||
# Transform some characters that might be sent as SGML entities.
|
||||
# Initially &<>, but add some others (sent by TheTVDB.com?).
|
||||
|
||||
proc {xml decodeCharEntities} {xmlText} {
|
||||
# Entity names are case-sensitive but HTML5 adds AMP, etc;
|
||||
# other syntaxes (eg &#xhhhh;) aren't. Here treat all entity
|
||||
# names as case-insensitive at the risk of transforming an
|
||||
# other syntaxes (eg &#xhhhh;) aren't. Here treat all entity
|
||||
# names as case-insensitive at the risk of transforming an
|
||||
# illegal &APOS;, eg.
|
||||
|
||||
|
||||
# XML "predefined entities"
|
||||
set mapping {
|
||||
& &
|
||||
< <
|
||||
> >
|
||||
' "'"
|
||||
" "\""
|
||||
& &
|
||||
< <
|
||||
> >
|
||||
' "'"
|
||||
" "\""
|
||||
}
|
||||
|
||||
# This parser <https://github.com/dbohdan/jimhttp/blob/master/json.tcl>
|
||||
# could be used to import the HTML5 entity list from the JSON file at
|
||||
# <https://html.spec.whatwg.org/entities.json> instead of the above. But
|
||||
# this is XML without any additional DTD.
|
||||
|
||||
|
||||
# This parser <https://github.com/dbohdan/jimhttp/blob/master/json.tcl>
|
||||
# could be used to import the HTML5 entity list from the JSON file at
|
||||
# <https://html.spec.whatwg.org/entities.json> instead of the above.
|
||||
# But this is XML without any additional DTD.
|
||||
|
||||
# add any numeric character entity in the input to the mapping
|
||||
foreach {e _e xnum num} \
|
||||
[regexp -all -inline \
|
||||
@ -242,9 +242,10 @@ proc {xml decodeCharEntities} {xmlText} {
|
||||
}
|
||||
# substitute & last in case of eg "xxx&quot;yyy"
|
||||
return [string map -nocase \
|
||||
[lrange $mapping 0 1] \
|
||||
[string map -nocase [lrange $mapping 2 end] $xmlText]]
|
||||
[lrange $mapping 0 1] \
|
||||
[string map -nocase [lrange $mapping 2 end] $xmlText]]
|
||||
}
|
||||
|
||||
# test parsing XML; default from tvdb.class with added char entity goodness
|
||||
proc {xml test} \
|
||||
{{testXml { <?xml version="1.0" encoding="ISO-8859-1"?>
|
||||
@ -257,7 +258,7 @@ proc {xml test} \
|
||||
<FirstAired>2012-09-29</FirstAired>
|
||||
<Network>Nickelodeon</Network>
|
||||
<id>261451</id>
|
||||
</Series>
|
||||
</Series>
|
||||
}}} {
|
||||
set testXml [xml init $testXml]
|
||||
for {set type ""} {$type ne "EOF"} {} {
|
||||
@ -265,3 +266,4 @@ proc {xml test} \
|
||||
puts "looking at: $type '$val' '$attr' '$etype'"
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user