Skip to content

Commit 6569bb7

Browse files
committed
Introduced ParseSettings for tag and attribute name case options.
1 parent f6a1ef3 commit 6569bb7

26 files changed

Lines changed: 411 additions & 150 deletions

CHANGES

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
jsoup changelog
22

3+
*** Release 1.10.1 [PENDING]
4+
* New feature: added the option to preserve case for tags and/or attributes, with ParseSettings. By default, the HTML
5+
parser will continue to normalize tag names and attribute names to lower case, and the XML parser will now preserve
6+
case, according to the relevant spec. The CSS selectors for tags and attributes remain case insensitive, per the CSS
7+
spec.
8+
39
*** Release 1.9.2 [2016-May-17]
410
* Fixed an issue where tag names that contained non-ascii characters but started with an ascii character
511
would cause the parser to get stuck in an infinite loop.

src/main/java/org/jsoup/nodes/Attribute.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,14 @@ public class Attribute implements Map.Entry<String, String>, Cloneable {
2424

2525
/**
2626
* Create a new attribute from unencoded (raw) key and value.
27-
* @param key attribute key
27+
* @param key attribute key; case is preserved.
2828
* @param value attribute value
2929
* @see #createFromEncoded
3030
*/
3131
public Attribute(String key, String value) {
3232
Validate.notEmpty(key);
3333
Validate.notNull(value);
34-
this.key = key.trim().toLowerCase();
34+
this.key = key.trim();
3535
this.value = value;
3636
}
3737

@@ -44,12 +44,12 @@ public String getKey() {
4444
}
4545

4646
/**
47-
Set the attribute key. Gets normalised as per the constructor method.
47+
Set the attribute key; case is preserved.
4848
@param key the new key; must not be null
4949
*/
5050
public void setKey(String key) {
5151
Validate.notEmpty(key);
52-
this.key = key.trim().toLowerCase();
52+
this.key = key.trim();
5353
}
5454

5555
/**

src/main/java/org/jsoup/nodes/Attributes.java

Lines changed: 74 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -4,30 +4,39 @@
44
import org.jsoup.helper.Validate;
55

66
import java.io.IOException;
7-
import java.util.*;
7+
import java.util.AbstractMap;
8+
import java.util.AbstractSet;
9+
import java.util.ArrayList;
10+
import java.util.Collections;
11+
import java.util.Iterator;
12+
import java.util.LinkedHashMap;
13+
import java.util.List;
14+
import java.util.Map;
15+
import java.util.Set;
816

917
/**
1018
* The attributes of an Element.
1119
* <p>
12-
* Attributes are treated as a map: there can be only one value associated with an attribute key.
20+
* Attributes are treated as a map: there can be only one value associated with an attribute key/name.
1321
* </p>
1422
* <p>
15-
* Attribute key and value comparisons are done case insensitively, and keys are normalised to
16-
* lower-case.
23+
* Attribute name and value comparisons are <b>case sensitive</b>. By default for HTML, attribute names are
24+
* normalized to lower-case on parsing. That means you should use lower-case strings when referring to attributes by
25+
* name.
1726
* </p>
18-
*
27+
*
1928
* @author Jonathan Hedley, [email protected]
2029
*/
2130
public class Attributes implements Iterable<Attribute>, Cloneable {
2231
protected static final String dataPrefix = "data-";
23-
32+
2433
private LinkedHashMap<String, Attribute> attributes = null;
2534
// linked hash map to preserve insertion order.
2635
// null be default as so many elements have no attributes -- saves a good chunk of memory
2736

2837
/**
2938
Get an attribute value by key.
30-
@param key the attribute key
39+
@param key the (case-sensitive) attribute key
3140
@return the attribute value if set; or empty string if not set.
3241
@see #hasKey(String)
3342
*/
@@ -37,10 +46,27 @@ public String get(String key) {
3746
if (attributes == null)
3847
return "";
3948

40-
Attribute attr = attributes.get(key.toLowerCase());
49+
Attribute attr = attributes.get(key);
4150
return attr != null ? attr.getValue() : "";
4251
}
4352

53+
/**
54+
* Get an attribute's value by case-insensitive key
55+
* @param key the attribute name
56+
* @return the first matching attribute value if set; or empty string if not set.
57+
*/
58+
public String getIgnoreCase(String key) {
59+
Validate.notEmpty(key);
60+
if (attributes == null)
61+
return "";
62+
63+
for (String attrKey : attributes.keySet()) {
64+
if (attrKey.equalsIgnoreCase(key))
65+
return attributes.get(attrKey).getValue();
66+
}
67+
return "";
68+
}
69+
4470
/**
4571
Set a new attribute, or replace an existing one by key.
4672
@param key attribute key
@@ -50,7 +76,7 @@ public void put(String key, String value) {
5076
Attribute attr = new Attribute(key, value);
5177
put(attr);
5278
}
53-
79+
5480
/**
5581
Set a new boolean attribute, remove attribute if value is false.
5682
@param key attribute key
@@ -75,23 +101,52 @@ public void put(Attribute attribute) {
75101
}
76102

77103
/**
78-
Remove an attribute by key.
104+
Remove an attribute by key. <b>Case sensitive.</b>
79105
@param key attribute key to remove
80106
*/
81107
public void remove(String key) {
82108
Validate.notEmpty(key);
83109
if (attributes == null)
84110
return;
85-
attributes.remove(key.toLowerCase());
111+
attributes.remove(key);
112+
}
113+
114+
/**
115+
Remove an attribute by key. <b>Case insensitive.</b>
116+
@param key attribute key to remove
117+
*/
118+
public void removeIgnoreCase(String key) {
119+
Validate.notEmpty(key);
120+
if (attributes == null)
121+
return;
122+
for (String attrKey : attributes.keySet()) {
123+
if (attrKey.equalsIgnoreCase(key))
124+
attributes.remove(attrKey);
125+
}
86126
}
87127

88128
/**
89129
Tests if these attributes contain an attribute with this key.
90-
@param key key to check for
130+
@param key case-sensitive key to check for
91131
@return true if key exists, false otherwise
92132
*/
93133
public boolean hasKey(String key) {
94-
return attributes != null && attributes.containsKey(key.toLowerCase());
134+
return attributes != null && attributes.containsKey(key);
135+
}
136+
137+
/**
138+
Tests if these attributes contain an attribute with this key.
139+
@param key key to check for
140+
@return true if key exists, false otherwise
141+
*/
142+
public boolean hasKeyIgnoreCase(String key) {
143+
if (attributes == null)
144+
return false;
145+
for (String attrKey : attributes.keySet()) {
146+
if (attrKey.equalsIgnoreCase(key))
147+
return true;
148+
}
149+
return false;
95150
}
96151

97152
/**
@@ -115,7 +170,7 @@ public void addAll(Attributes incoming) {
115170
attributes = new LinkedHashMap<String, Attribute>(incoming.size());
116171
attributes.putAll(incoming.attributes);
117172
}
118-
173+
119174
public Iterator<Attribute> iterator() {
120175
return asList().iterator();
121176
}
@@ -159,18 +214,18 @@ public String html() {
159214
}
160215
return accum.toString();
161216
}
162-
217+
163218
void html(Appendable accum, Document.OutputSettings out) throws IOException {
164219
if (attributes == null)
165220
return;
166-
221+
167222
for (Map.Entry<String, Attribute> entry : attributes.entrySet()) {
168223
Attribute attribute = entry.getValue();
169224
accum.append(" ");
170225
attribute.html(accum, out);
171226
}
172227
}
173-
228+
174229
@Override
175230
public String toString() {
176231
return html();
@@ -185,9 +240,9 @@ public String toString() {
185240
public boolean equals(Object o) {
186241
if (this == o) return true;
187242
if (!(o instanceof Attributes)) return false;
188-
243+
189244
Attributes that = (Attributes) o;
190-
245+
191246
return !(attributes != null ? !attributes.equals(that.attributes) : that.attributes != null);
192247
}
193248

src/main/java/org/jsoup/nodes/Document.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import org.jsoup.helper.StringUtil;
44
import org.jsoup.helper.Validate;
5+
import org.jsoup.parser.ParseSettings;
56
import org.jsoup.parser.Tag;
67
import org.jsoup.select.Elements;
78

@@ -27,7 +28,7 @@ public class Document extends Element {
2728
@see #createShell
2829
*/
2930
public Document(String baseUri) {
30-
super(Tag.valueOf("#root"), baseUri);
31+
super(Tag.valueOf("#root", ParseSettings.htmlDefault), baseUri);
3132
this.location = baseUri;
3233
}
3334

@@ -103,7 +104,7 @@ public void title(String title) {
103104
@return new element
104105
*/
105106
public Element createElement(String tagName) {
106-
return new Element(Tag.valueOf(tagName), this.baseUri());
107+
return new Element(Tag.valueOf(tagName, ParseSettings.preserveCase), this.baseUri());
107108
}
108109

109110
/**

src/main/java/org/jsoup/nodes/Element.java

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import org.jsoup.helper.StringUtil;
44
import org.jsoup.helper.Validate;
5+
import org.jsoup.parser.ParseSettings;
56
import org.jsoup.parser.Parser;
67
import org.jsoup.parser.Tag;
78
import org.jsoup.select.Collector;
@@ -58,7 +59,7 @@ public Element(Tag tag, String baseUri, Attributes attributes) {
5859
* @param tag element tag
5960
* @param baseUri the base URI of this element. It is acceptable for the base URI to be an empty
6061
* string, but not null.
61-
* @see Tag#valueOf(String)
62+
* @see Tag#valueOf(String, ParseSettings)
6263
*/
6364
public Element(Tag tag, String baseUri) {
6465
this(tag, baseUri, new Attributes());
@@ -87,7 +88,7 @@ public String tagName() {
8788
*/
8889
public Element tagName(String tagName) {
8990
Validate.notEmpty(tagName, "Tag name must not be empty.");
90-
tag = Tag.valueOf(tagName);
91+
tag = Tag.valueOf(tagName, ParseSettings.preserveCase); // preserve the requested tag case
9192
return this;
9293
}
9394

@@ -116,7 +117,7 @@ public boolean isBlock() {
116117
* @return The id attribute, if present, or an empty string if not.
117118
*/
118119
public String id() {
119-
return attributes.get("id");
120+
return attributes.getIgnoreCase("id");
120121
}
121122

122123
/**
@@ -668,7 +669,7 @@ public Elements getElementsByClass(String className) {
668669
*/
669670
public Elements getElementsByAttribute(String key) {
670671
Validate.notEmpty(key);
671-
key = key.trim().toLowerCase();
672+
key = key.trim();
672673

673674
return Collector.collect(new Evaluator.Attribute(key), this);
674675
}
@@ -681,7 +682,7 @@ public Elements getElementsByAttribute(String key) {
681682
*/
682683
public Elements getElementsByAttributeStarting(String keyPrefix) {
683684
Validate.notEmpty(keyPrefix);
684-
keyPrefix = keyPrefix.trim().toLowerCase();
685+
keyPrefix = keyPrefix.trim();
685686

686687
return Collector.collect(new Evaluator.AttributeStarting(keyPrefix), this);
687688
}

src/main/java/org/jsoup/nodes/Node.java

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ Get the node name of this node. Use for debugging purposes and not logic switchi
5858
public abstract String nodeName();
5959

6060
/**
61-
* Get an attribute's value by its key.
61+
* Get an attribute's value by its key. <b>Case insensitive</b>
6262
* <p>
6363
* To get an absolute URL from an attribute that may be a relative URL, prefix the key with <code><b>abs</b></code>,
6464
* which is a shortcut to the {@link #absUrl} method.
@@ -75,8 +75,9 @@ Get the node name of this node. Use for debugging purposes and not logic switchi
7575
public String attr(String attributeKey) {
7676
Validate.notNull(attributeKey);
7777

78-
if (attributes.hasKey(attributeKey))
79-
return attributes.get(attributeKey);
78+
String val = attributes.getIgnoreCase(attributeKey);
79+
if (val.length() > 0)
80+
return val;
8081
else if (attributeKey.toLowerCase().startsWith("abs:"))
8182
return absUrl(attributeKey.substring("abs:".length()));
8283
else return "";
@@ -102,7 +103,7 @@ public Node attr(String attributeKey, String attributeValue) {
102103
}
103104

104105
/**
105-
* Test if this element has an attribute.
106+
* Test if this element has an attribute. <b>Case insensitive</b>
106107
* @param attributeKey The attribute key to check.
107108
* @return true if the attribute exists, false if not.
108109
*/
@@ -111,10 +112,10 @@ public boolean hasAttr(String attributeKey) {
111112

112113
if (attributeKey.startsWith("abs:")) {
113114
String key = attributeKey.substring("abs:".length());
114-
if (attributes.hasKey(key) && !absUrl(key).equals(""))
115+
if (attributes.hasKeyIgnoreCase(key) && !absUrl(key).equals(""))
115116
return true;
116117
}
117-
return attributes.hasKey(attributeKey);
118+
return attributes.hasKeyIgnoreCase(attributeKey);
118119
}
119120

120121
/**
@@ -124,7 +125,7 @@ public boolean hasAttr(String attributeKey) {
124125
*/
125126
public Node removeAttr(String attributeKey) {
126127
Validate.notNull(attributeKey);
127-
attributes.remove(attributeKey);
128+
attributes.removeIgnoreCase(attributeKey);
128129
return this;
129130
}
130131

0 commit comments

Comments
 (0)