PROGRAMMING/JAVA

HTML Entity 제거

OJR 2012. 11. 8. 21:18

// entity ref 처리

 

Pattern Eentity = Pattern.compile("&[^;]+;");


mat = Eentity.matcher(str);
str = mat.replaceAll("");

 

 

public static String removeHtmlEntities(String str) {
  // Define regular expression to match HTML entities
  String regex = "&(?:#[0-9]+|[a-zA-Z]+|#[xX][0-9a-fA-F]+);";
  
  // Replace HTML entities with empty string
  String result = str.replaceAll(regex, "");
  
  return result;
}


String myString = "This is an example with "quotes" and <tags>.";
String result = removeHtmlEntities(myString);
System.out.println(result); // Output: "This is an example with quotes and tags."