package org.aliabdelaziz.arabic.util; import java.net.URLEncoder; import java.io.UnsupportedEncodingException; import java.text.CharacterIterator; import java.text.StringCharacterIterator; import java.util.HashMap; import java.util.Map; import java.util.regex.Pattern; import java.util.regex.Matcher; /** * Convenience methods for escaping special characters related to HTML, XML, * and regular expressions. * *
To keep you safe by default, WEB4J goes to some effort to escape * characters in your data when appropriate, such that you usually * don't need to think too much about escaping special characters. Thus, you * shouldn't need to directly use the services of this class very often. * *
For Model Objects containing free form user input, * it is highly recommended that you use {@link SafeText}, not String. * Free form user input is open to malicious use, such as * Cross Site Scripting * attacks. * Using SafeText will protect you from such attacks, by always escaping * special characters automatically in its toString() method. * *
The following WEB4J classes will automatically escape special characters * for you, when needed : *
This method exists as a defence against Cross Site Scripting (XSS) hacks. * The idea is to neutralize control characters commonly used by scripts, such that * they will not be executed by the browser. This is done by replacing the control * characters with their escaped equivalents. * See {@link hirondelle.web4j.security.SafeText} as well. * *
The following characters are replaced with corresponding * HTML character entities : *
| Character | Replacement |
|---|---|
| < | < |
| > | > |
| & | & |
| " | " |
| \t | |
| ! | ! |
| # | # |
| $ | $ |
| % | % |
| ' | ' |
| ( | ( |
| ) | ) |
| * | * |
| + | + |
| , | , |
| - | - |
| . | . |
| / | / |
| : | : |
| ; | ; |
| = | = |
| ? | ? |
| @ | @ |
| [ | [ |
| \ | \ |
| ] | ] |
| ^ | ^ |
| _ | _ |
| ` | ` |
| { | { |
| | | | |
| } | } |
| ~ | ~ |
Note that JSTL's {@code Replaces all '&' characters with '&'.
*
* An ampersand character may appear in the query string of a URL.
* The ampersand character is indeed valid in a URL.
* However, URLs usually appear as an HREF attribute, and
* such attributes have the additional constraint that ampersands
* must be escaped.
*
* The JSTL Used to ensure that HTTP query strings are in proper form, by escaping
* special characters such as spaces.
*
* It is important to note that if a query string appears in an HREF
* attribute, then there are two issues - ensuring the query string is valid HTTP
* (it is URL-encoded), and ensuring it is valid HTML (ensuring the
* ampersand is escaped).
*/
public static String forURL(String aURLFragment){
String result = null;
try {
result = URLEncoder.encode(aURLFragment, "UTF-8");
}
catch (UnsupportedEncodingException ex){
throw new RuntimeException("UTF-8 not supported", ex);
}
return result;
}
/**
* Escape characters for text appearing as XML data, between tags.
*
* The following characters are replaced with corresponding character entities :
* Note that JSTL's {@code The following commonly used control characters are escaped :
See RFC 4627 for more information.
*/
public static String forJSON(String aText){
final StringBuffer result = new StringBuffer();
StringCharacterIterator iterator = new StringCharacterIterator(aText);
char character = iterator.current();
while (character != StringCharacterIterator.DONE){
if( character == '\"' ){
result.append("\\\"");
}
else if(character == '\\'){
result.append("\\\\");
}
else if(character == '/'){
result.append("\\/");
}
else if(character == '\b'){
result.append("\\b");
}
else if(character == '\f'){
result.append("\\f");
}
else if(character == '\n'){
result.append("\\n");
}
else if(character == '\r'){
result.append("\\r");
}
else if(character == '\t'){
result.append("\\t");
}
else {
//the char is not a special one
//add it to the result as is
result.append(character);
}
character = iterator.next();
}
return result.toString();
}
/**
Return aText with all '<' and '>' characters
replaced by their escaped equivalents.
*/
public static String toDisableTags(String aText){
final StringBuffer result = new StringBuffer();
final StringCharacterIterator iterator = new StringCharacterIterator(aText);
char character = iterator.current();
while (character != CharacterIterator.DONE ){
if (character == '<') {
result.append("<");
}
else if (character == '>') {
result.append(">");
}
else {
//the char is not a special one
//add it to the result as is
result.append(character);
}
character = iterator.next();
}
return result.toString();
}
/**
Replace characters having special meaning in regular expressions
with their escaped equivalents, preceded by a '\' character.
The escaped characters include :
Synonym for Matcher.quoteReplacement(String).
The following methods use replacement strings which treat
'$' and '\' as special characters:
If replacement text can contain arbitrary characters, then you
will usually need to escape that text, to ensure special characters
are interpreted literally.
*/
public static String forReplacementString(String aInput){
return quoteReplacement(aInput);
}
/**
* Returns a replacement string for the given one that has all backslashes
* and dollar signs escaped.
*
* @param s
the input string.
* @return the input string, with all backslashes and dollar signs having
* been escaped.
*/
public static String quoteReplacement(String s) {
// first check whether we have smth to quote
if (s.indexOf('\\') < 0 && s.indexOf('$') < 0)
return s;
StringBuffer res = new StringBuffer(s.length() * 2);
char ch;
int len = s.length();
for (int i = 0; i < len; i++) {
switch (ch = s.charAt(i)) {
case '$':
res.append('\\');
res.append('$');
break;
case '\\':
res.append('\\');
res.append('\\');
break;
default:
res.append(ch);
}
}
return res.toString();
}
/**
* Disable all tags in aText.
*
* Insensitive to case.
*/
public static String forScriptTagsOnly(String aText){
String result = null;
Matcher matcher = SCRIPT.matcher(aText);
result = matcher.replaceAll("<SCRIPT>");
matcher = SCRIPT_END.matcher(result);
result = matcher.replaceAll("</SCRIPT>");
return result;
}
/**
* add HTML ASCII reprsentation for the passed aIdx char int value to
* StringBuffer aBuilder
*
* @param aIdx
* @param aBuilder
*/
private static void addCharEntity(int aIdx, StringBuffer aBuilder){
String padding = "";
if( aIdx <= 9 ){
padding = "00";
}
else if( aIdx <= 99 ){
padding = "0";
}
else {
//no prefix
}
String number = padding + aIdx;
aBuilder.append("" + number + ";");
}
/**
* get HTML ASCII reprsentation for the passed aIdx char int value.
*
* @param aIdx
* @return htmlAscii representation
*/
private static String getCharEntity(int aIdx){
String padding = "";
if( aIdx <= 9 ){
padding = "00";
}
else if( aIdx <= 99 ){
padding = "0";
}
else {
//no prefix
}
String number = padding + aIdx;
return "" + number + ";";
}
/**
* print MAP values for english and escape characters
*/
private static void printEnglishAscii() {
for(int i=0; i<100; i++) {
String character = "";
char ch = (char) i;
if(i<=9) {
character += "000" + i + ";";
} else if(i<=99) {
character += "00" + i + ";";
} else if (i<=999) {
character += "0" + i + ";";
} else {
character += i + ";";
}
System.out.println("CHARACTER_REPLACEMEN.put(new Character('" + ch + "'), \"" + character + "\");");
}
}
/**
* print MAP values for the arabic letters to be used as the initialization
* for CHARACTER_REPLACEMEN Map variable.
*/
private static void printArabicAscii() {
final char[] arabicLetters = {'Ã', 'Ç', 'È', 'Ê', 'Ë', 'Ì', 'Í', 'Î',
'Ï', 'Ð', 'Ñ', 'Ò', 'Ó', 'Ô', 'Õ', 'Ö',
'Ø', 'Ù', 'Ú', 'Û', 'Ý', 'Þ', 'ß', 'á',
'ã', 'ä', 'å', 'æ', 'ì', 'í'};
for(int i=0; i
*
*
*
* Character Encoding
* < <
* > >
* & &
* " "
* ' '
Character Escaped As " \" \ \\ / \/ back space \b form feed \f line feed \n carriage return \r tab \t
*/
public static String forRegex(String aRegexFragment){
final StringBuffer result = new StringBuffer();
final StringCharacterIterator iterator =
new StringCharacterIterator(aRegexFragment)
;
char character = iterator.current();
while (character != CharacterIterator.DONE ){
/*
All literals need to have backslashes doubled.
*/
if (character == '.') {
result.append("\\.");
}
else if (character == '\\') {
result.append("\\\\");
}
else if (character == '?') {
result.append("\\?");
}
else if (character == '*') {
result.append("\\*");
}
else if (character == '+') {
result.append("\\+");
}
else if (character == '&') {
result.append("\\&");
}
else if (character == ':') {
result.append("\\:");
}
else if (character == '{') {
result.append("\\{");
}
else if (character == '}') {
result.append("\\}");
}
else if (character == '[') {
result.append("\\[");
}
else if (character == ']') {
result.append("\\]");
}
else if (character == '(') {
result.append("\\(");
}
else if (character == ')') {
result.append("\\)");
}
else if (character == '^') {
result.append("\\^");
}
else if (character == '$') {
result.append("\\$");
}
else {
//the char is not a special one
//add it to the result as is
result.append(character);
}
character = iterator.next();
}
return result.toString();
}
/**
Escape '$' and '\' characters in replacement strings.