1 | |
|
2 | |
|
3 | |
|
4 | |
|
5 | |
|
6 | |
|
7 | |
|
8 | |
|
9 | |
|
10 | |
|
11 | |
|
12 | |
|
13 | |
|
14 | |
|
15 | |
|
16 | |
|
17 | |
|
18 | |
|
19 | |
|
20 | |
|
21 | |
|
22 | |
|
23 | |
|
24 | |
|
25 | |
|
26 | |
|
27 | |
|
28 | |
|
29 | |
|
30 | |
package org.xembly; |
31 | |
|
32 | |
import lombok.EqualsAndHashCode; |
33 | |
|
34 | |
|
35 | |
|
36 | |
|
37 | |
|
38 | |
|
39 | |
|
40 | |
|
41 | |
|
42 | |
|
43 | 0 | @EqualsAndHashCode(of = "value") |
44 | |
final class Arg { |
45 | |
|
46 | |
|
47 | |
|
48 | |
|
49 | |
private final transient String value; |
50 | |
|
51 | |
|
52 | |
|
53 | |
|
54 | |
|
55 | |
|
56 | 616 | Arg(final String val) throws XmlContentException { |
57 | 3968 | for (final char chr : val.toCharArray()) { |
58 | 3357 | Arg.legal(chr); |
59 | |
} |
60 | 613 | this.value = val; |
61 | 613 | } |
62 | |
|
63 | |
@Override |
64 | |
public String toString() { |
65 | 45 | final String escaped = Arg.escape(this.value); |
66 | 90 | return new StringBuilder(this.value.length() + 2 + escaped.length()) |
67 | 45 | .append('"').append(escaped).append('"').toString(); |
68 | |
} |
69 | |
|
70 | |
|
71 | |
|
72 | |
|
73 | |
|
74 | |
public String raw() { |
75 | 571 | return this.value; |
76 | |
} |
77 | |
|
78 | |
|
79 | |
|
80 | |
|
81 | |
|
82 | |
|
83 | |
|
84 | |
@SuppressWarnings("PMD.AvoidInstantiatingObjectsInLoops") |
85 | |
public static String unescape(final String text) |
86 | |
throws XmlContentException { |
87 | 130 | final char[] chars = text.toCharArray(); |
88 | 130 | if (chars.length < 2) { |
89 | 0 | throw new IllegalArgumentException( |
90 | |
"internal error, argument can't be shorter than 2 chars" |
91 | |
); |
92 | |
} |
93 | 130 | final int len = chars.length - 1; |
94 | 130 | final StringBuilder output = new StringBuilder(text.length()); |
95 | 844 | for (int idx = 1; idx < len; ++idx) { |
96 | 717 | if (chars[idx] == '&') { |
97 | 32 | final StringBuilder sbuf = new StringBuilder(0); |
98 | 166 | while (chars[idx] != ';') { |
99 | |
|
100 | 134 | ++idx; |
101 | 134 | if (idx == chars.length) { |
102 | 0 | throw new XmlContentException( |
103 | |
"reached EOF while parsing XML symbol" |
104 | |
); |
105 | |
} |
106 | 134 | sbuf.append(chars[idx]); |
107 | |
} |
108 | 32 | output.append(Arg.symbol(sbuf.substring(0, sbuf.length() - 1))); |
109 | 29 | } else { |
110 | 685 | output.append(chars[idx]); |
111 | |
} |
112 | |
} |
113 | 127 | return output.toString(); |
114 | |
} |
115 | |
|
116 | |
|
117 | |
|
118 | |
|
119 | |
|
120 | |
|
121 | |
private static String escape(final String text) { |
122 | 45 | final StringBuilder output = new StringBuilder(text.length()); |
123 | 368 | for (final char chr : text.toCharArray()) { |
124 | 323 | if (chr < ' ') { |
125 | 7 | output.append("&#").append((int) chr).append(';'); |
126 | 316 | } else if (chr == '"') { |
127 | 3 | output.append("""); |
128 | 313 | } else if (chr == '&') { |
129 | 2 | output.append("&"); |
130 | 311 | } else if (chr == '\'') { |
131 | 3 | output.append("'"); |
132 | 308 | } else if (chr == '<') { |
133 | 3 | output.append("<"); |
134 | 305 | } else if (chr == '>') { |
135 | 3 | output.append(">"); |
136 | |
} else { |
137 | 302 | output.append(chr); |
138 | |
} |
139 | |
} |
140 | 45 | return output.toString(); |
141 | |
} |
142 | |
|
143 | |
|
144 | |
|
145 | |
|
146 | |
|
147 | |
|
148 | |
|
149 | |
private static char symbol(final String symbol) throws XmlContentException { |
150 | |
final char chr; |
151 | 32 | if ('#' == symbol.charAt(0)) { |
152 | 10 | final int num = Integer.parseInt(symbol.substring(1)); |
153 | 10 | chr = Arg.legal((char) num); |
154 | 7 | } else if ("apos".equalsIgnoreCase(symbol)) { |
155 | 5 | chr = '\''; |
156 | 17 | } else if ("quot".equalsIgnoreCase(symbol)) { |
157 | 7 | chr = '"'; |
158 | 10 | } else if ("lt".equalsIgnoreCase(symbol)) { |
159 | 3 | chr = '<'; |
160 | 7 | } else if ("gt".equalsIgnoreCase(symbol)) { |
161 | 3 | chr = '>'; |
162 | 4 | } else if ("amp".equalsIgnoreCase(symbol)) { |
163 | 4 | chr = '&'; |
164 | |
} else { |
165 | 0 | throw new XmlContentException( |
166 | 0 | String.format("unknown XML symbol &%s;", symbol) |
167 | |
); |
168 | |
} |
169 | 29 | return chr; |
170 | |
} |
171 | |
|
172 | |
|
173 | |
|
174 | |
|
175 | |
|
176 | |
|
177 | |
|
178 | |
private static char legal(final char chr) throws XmlContentException { |
179 | |
|
180 | 3378 | Arg.range(chr, 0x00, 0x08); |
181 | 3377 | Arg.range(chr, 0x0B, 0x0C); |
182 | 3373 | Arg.range(chr, 0x0E, 0x1F); |
183 | 3365 | Arg.range(chr, 0x7F, 0x84); |
184 | 3357 | Arg.range(chr, 0x86, 0x9F); |
185 | 3354 | return chr; |
186 | |
} |
187 | |
|
188 | |
|
189 | |
|
190 | |
|
191 | |
|
192 | |
|
193 | |
|
194 | |
|
195 | |
private static void range(final char chr, final int left, final int right) |
196 | |
throws XmlContentException { |
197 | 16665 | if (chr >= left && chr <= right) { |
198 | 5 | throw new XmlContentException( |
199 | 5 | String.format( |
200 | |
|
201 | |
"Character #%02X is in the restricted XML range #%02X-#%02X, see http://www.w3.org/TR/2004/REC-xml11-20040204/#charsets", |
202 | 5 | (int) chr, left, right |
203 | |
) |
204 | |
); |
205 | |
} |
206 | 16663 | } |
207 | |
|
208 | |
} |