XSS 跨站脚本攻击预防(文件上传)

XSS 跨站脚本攻击预防(文件上传)

注意:可以根据需求自定义,改造为拦截器、或者 AOP 等方式实现

package com.aspire.sslca.cms.manage.util;

import cn.hutool.extra.spring.SpringUtil;
import com.aspire.webbas.common.lang.exception.ConditionNoPassException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.boot.autoconfigure.web.servlet.MultipartProperties;
import org.springframework.web.multipart.MultipartFile;

import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Pattern;


public class FileUploadCheckUtils {

    private static final Logger log = LoggerFactory.getLogger(FileUploadCheckUtils.class);


    private static final MultipartProperties multipartProperties = SpringUtil.getBean(MultipartProperties.class);


    // 文件类型常量
    public static final String JPEG = "jpeg";
    public static final String JPG = "jpg";
    public static final String PNG = "png";
    public static final String GIF = "gif";
    public static final String PDF = "pdf";
    public static final String ZIP = "zip";
    public static final String RAR = "rar";
    public static final String DOC = "doc";
    public static final String DOCX = "docx";
    public static final String XLS = "xls";
    public static final String XLSX = "xlsx";
    public static final String PPT = "ppt";
    public static final String PPTX = "pptx";

    // 魔数常量
    public static final String JPEG_MAGIC = "FFD8FF";
    public static final String JPG_MAGIC = "FFD8FF";
    public static final String PNG_MAGIC = "89504E47";
    public static final String GIF_MAGIC = "47494638";
    public static final String PDF_MAGIC = "25504446";
    public static final String ZIP_MAGIC = "504B0304";
    public static final String RAR_MAGIC = "52617221";
    public static final String DOC_MAGIC = "D0CF11E0";
    public static final String DOCX_MAGIC = "504B0304";
    public static final String XLS_MAGIC = "D0CF11E0";
    public static final String XLSX_MAGIC = "504B0304";
    public static final String PPT_MAGIC = "D0CF11E0";
    public static final String PPTX_MAGIC = "504B0304";

    // 允许的文件类型
    // key-value : 文件类型-文件魔数
    private static final Map<String, String> FILE_TYPE_MAGIC_NUMBERS = new HashMap<>();

    static {
        FILE_TYPE_MAGIC_NUMBERS.put(JPEG, JPEG_MAGIC);
        FILE_TYPE_MAGIC_NUMBERS.put(JPG, JPG_MAGIC);
        FILE_TYPE_MAGIC_NUMBERS.put(PNG, PNG_MAGIC);
        FILE_TYPE_MAGIC_NUMBERS.put(GIF, GIF_MAGIC);
        FILE_TYPE_MAGIC_NUMBERS.put(PDF, PDF_MAGIC);
        FILE_TYPE_MAGIC_NUMBERS.put(ZIP, ZIP_MAGIC);
        FILE_TYPE_MAGIC_NUMBERS.put(RAR, RAR_MAGIC);
        FILE_TYPE_MAGIC_NUMBERS.put(DOC, DOC_MAGIC);
        FILE_TYPE_MAGIC_NUMBERS.put(DOCX, DOCX_MAGIC);
        FILE_TYPE_MAGIC_NUMBERS.put(XLS, XLS_MAGIC);
        FILE_TYPE_MAGIC_NUMBERS.put(XLSX, XLSX_MAGIC);
        FILE_TYPE_MAGIC_NUMBERS.put(PPT, PPT_MAGIC);
        FILE_TYPE_MAGIC_NUMBERS.put(PPTX, PPTX_MAGIC);
    }

    // 定义更加全面的XSS攻击模式
    private static final Pattern[] XSS_PATTERNS = new Pattern[]{
            // 匹配script标签
            Pattern.compile("<script>(.*?)</script>", Pattern.CASE_INSENSITIVE),
            Pattern.compile("<script(.*?)>(.*?)</script>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL),
            Pattern.compile("</script>", Pattern.CASE_INSENSITIVE),
            Pattern.compile("<script(.*?)>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL),

            // 匹配img、iframe、embed、object标签中的恶意代码
            Pattern.compile("<img(.*?)src[\r\n]*=[\r\n]*\\'(.*?)\\'(.*?)>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL),
            Pattern.compile("<img(.*?)src[\r\n]*=[\r\n]*\\\"(.*?)\\\"(.*?)>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL),
            Pattern.compile("<iframe(.*?)src[\r\n]*=[\r\n]*\\'(.*?)\\'(.*?)>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL),
            Pattern.compile("<iframe(.*?)src[\r\n]*=[\r\n]*\\\"(.*?)\\\"(.*?)>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL),
            Pattern.compile("<embed(.*?)src[\r\n]*=[\r\n]*\\'(.*?)\\'(.*?)>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL),
            Pattern.compile("<embed(.*?)src[\r\n]*=[\r\n]*\\\"(.*?)\\\"(.*?)>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL),
            Pattern.compile("<object(.*?)data[\r\n]*=[\r\n]*\\'(.*?)\\'(.*?)>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL),
            Pattern.compile("<object(.*?)data[\r\n]*=[\r\n]*\\\"(.*?)\\\"(.*?)>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL),

            // 匹配JavaScript事件处理程序
            Pattern.compile("onload(.*?)=", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL),
            Pattern.compile("onerror(.*?)=", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL),
            Pattern.compile("onmouseover(.*?)=", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL),
            Pattern.compile("onclick(.*?)=", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL),
            Pattern.compile("onfocus(.*?)=", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL),
            Pattern.compile("onblur(.*?)=", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL),
            Pattern.compile("onchange(.*?)=", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL),
            Pattern.compile("onsubmit(.*?)=", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL),
            Pattern.compile("onreset(.*?)=", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL),
            Pattern.compile("onselect(.*?)=", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL),
            Pattern.compile("onunload(.*?)=", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL),
            Pattern.compile("onkeydown(.*?)=", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL),
            Pattern.compile("onkeyup(.*?)=", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL),
            Pattern.compile("onkeypress(.*?)=", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL),

            // 匹配其他危险的JavaScript代码
            Pattern.compile("eval\\((.*?)\\)", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL),
            Pattern.compile("expression\\((.*?)\\)", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL),
            Pattern.compile("javascript:", Pattern.CASE_INSENSITIVE),
            Pattern.compile("vbscript:", Pattern.CASE_INSENSITIVE),
            Pattern.compile("data:text/html", Pattern.CASE_INSENSITIVE),
            Pattern.compile("document.cookie", Pattern.CASE_INSENSITIVE),
            Pattern.compile("document.write", Pattern.CASE_INSENSITIVE),
            Pattern.compile("window.location", Pattern.CASE_INSENSITIVE),
            Pattern.compile("window.open", Pattern.CASE_INSENSITIVE),
            Pattern.compile("innerHTML", Pattern.CASE_INSENSITIVE),
            Pattern.compile("alert\\((.*?)\\)", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL),
            Pattern.compile("prompt\\((.*?)\\)", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL),
            Pattern.compile("confirm\\((.*?)\\)", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL),

            // 匹配各种形式的javascript关键字
            Pattern.compile("/javascript", Pattern.MULTILINE | Pattern.DOTALL),
            Pattern.compile("/JS", Pattern.MULTILINE | Pattern.DOTALL),
            Pattern.compile("/JavaScript", Pattern.MULTILINE | Pattern.DOTALL),
            Pattern.compile("/jscript", Pattern.MULTILINE | Pattern.DOTALL),
            Pattern.compile("/vbscript", Pattern.MULTILINE | Pattern.DOTALL),
            Pattern.compile("/ecmascript", Pattern.MULTILINE | Pattern.DOTALL),

            // 匹配 CSS 表达式
            Pattern.compile("style=(.*?)/\\*<style>\\*/", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL),
            Pattern.compile("style=(.*?)expression\\((.*?)\\)", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL),
            Pattern.compile("style=(.*?)behaviour\\((.*?)\\)", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL),
            Pattern.compile("style=(.*?)javascript:(.*?)", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL),

            // 匹配 HTML 属性中的 JavaScript
            Pattern.compile("href[\r\n]*=[\r\n]*\\\"(javascript:(.*?))\\\"", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL),
            Pattern.compile("href[\r\n]*=[\r\n]*\\'(javascript:(.*?))\\'", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL),
            Pattern.compile("src[\r\n]*=[\r\n]*\\\"(javascript:(.*?))\\\"", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL),
            Pattern.compile("src[\r\n]*=[\r\n]*\\'(javascript:(.*?))\\'", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL),

            // 匹配形式如 <a οnlοad=evil() /> 的 XSS
            Pattern.compile("<(.*?)on(load|error|mouseover|click|focus|blur|change|submit|reset|select|unload|keydown|keyup|keypress)=(.*?)>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL),

            // 匹配 base64 数据 URI
            Pattern.compile("data:text/html;base64,", Pattern.CASE_INSENSITIVE),

            // 匹配可能的路径级 XSS
            Pattern.compile("/[a-zA-Z0-9\\-_]*[jJ][aA][vV][aA][sS][cC][rR][iI][pP][tT]/"),
            Pattern.compile("/[a-zA-Z0-9\\-_]*[vV][bB][sS][cC][rR][iI][pP][tT]/"),
            Pattern.compile("/[a-zA-Z0-9\\-_]*[eE][cC][mM][aA][sS][cC][rR][iI][pP][tT]/"),

            // 匹配 XSS 关键字在任何位置的情况
            Pattern.compile("[aA][lL][eE][rR][tT]\\("),
            Pattern.compile("[pP][rR][oO][mM][pP][tT]\\("),
            Pattern.compile("[cC][oO][nN][fF][iI][rR][mM]\\("),
            Pattern.compile("[eE][vV][aA][lL]\\("),
            Pattern.compile("[eE][xX][pP][rR][eE][sS][sS][iI][oO][nN]\\("),

            // 匹配 HTML 的属性值内嵌的 Javascript
            Pattern.compile("value[\r\n]*=[\r\n]*\\\"(.*?)\\\"[\r\n]*onchange[\r\n]*=[\r\n]*\\\"(.*?)\\\"", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL),
            Pattern.compile("value[\r\n]*=[\r\n]*\\'(.*?)\\'[\r\n]*onchange[\r\n]*=[\r\n]*\\'(.*?)\\'", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL),

            // 匹配十六进制编码的script
            Pattern.compile("&#x[0-9a-fA-F]+;"),

            // 匹配HTML实体编码的script
            Pattern.compile("&lt;script&gt;(.*?)&lt;/script&gt;", Pattern.CASE_INSENSITIVE),
            Pattern.compile("&lt;script(.*?)&gt;(.*?)&lt;/script&gt;", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL),

            // 其他可能的编码方式
            Pattern.compile("src[\r\n]*=[\r\n]*\\\\x22(.*?)\\\\x22", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL),
            Pattern.compile("src[\r\n]*=[\r\n]*\\\\x27(.*?)\\\\x27", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL),

            // 匹配 SVG 相关的注入
            Pattern.compile("<svg(.*?)>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL),
            Pattern.compile("<animate(.*?)>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL),
            Pattern.compile("<set(.*?)>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL)
    };

    /**
     * @param file             被校验文件
     * @param allowFileMaxSize 允许的文件大小,单位为字节
     * @param fileTypes        支持的文件类型
     * @return bool
     */
    public static void isValidFile(MultipartFile file, Long allowFileMaxSize, String... fileTypes) {

        // 检查文件大小
        if (!isValidFileSize(file, allowFileMaxSize)) {
            throw new ConditionNoPassException("上传文件大小超过限制: " + allowFileMaxSize);
        }

        // 检查文件类型
        String fileExt = getFileExtension(file);
        if (!isValidFileType(fileExt, fileTypes)) {
            log.error("暂不支持文件类型: {}", fileExt);
            throw new ConditionNoPassException("暂不支持文件类型: " + fileExt);
        }

        try (InputStream inputStream = file.getInputStream()) {
            // 魔数校验
            if (!isValidFileMagic(inputStream, fileTypes)) {
                throw new ConditionNoPassException("文件内容和文件类型不匹配");
            }

            // Xss校验
            if (containsXSS(inputStream)) {
                throw new ConditionNoPassException("文件包含非法字符");
            }
            log.info("上传文件校验成功");
        } catch (Exception e) {
            log.error("上传文件失败: {}", e.getMessage());
            throw new ConditionNoPassException(e.getMessage());
        }

    }


    /**
     * @param fileSuffix 文件后缀
     * @param fileTypes  支持的文件类型
     * @return
     */
    private static boolean isValidFileType(String fileSuffix, String[] fileTypes) {
        boolean flag = false;
        if (fileTypes == null || fileTypes.length == 0) {
            flag = FILE_TYPE_MAGIC_NUMBERS.containsKey(fileSuffix.toLowerCase());
        } else {
            for (String fileType : fileTypes) {
                if (fileSuffix.equals(fileType)) {
                    flag = true;
                }
            }
        }
        return flag;
    }

    /**
     * 校验文件大小
     *
     * @param file             文件
     * @param allowFileMaxSize 允许的文件大小
     * @return
     */
    private static boolean isValidFileSize(MultipartFile file, Long allowFileMaxSize) {
        if (allowFileMaxSize == null) {
            allowFileMaxSize = multipartProperties.getMaxFileSize().toBytes();
        }
        log.info("上传文件大小为: {}", file.getSize());
        return file.getSize() <= allowFileMaxSize;
    }

    /**
     * 文件魔数校验
     *
     * @param fis       文件
     * @param fileTypes
     * @return
     */
    private static boolean isValidFileMagic(InputStream fis, String[] fileTypes) throws Exception {

        byte[] bytes = new byte[4];
        fis.read(bytes, 0, bytes.length);
        StringBuilder sb = new StringBuilder();
        for (byte b : bytes) {
            sb.append(String.format("%02X", b));
        }
        String magicNumber = sb.toString();
        log.info("上传文件的魔数为: {}", magicNumber);
        if (fileTypes == null || fileTypes.length == 0) {
            for (String magic : FILE_TYPE_MAGIC_NUMBERS.values()) {
                if (magicNumber.startsWith(magic)) {
                    return true;
                }
            }
        } else {
            for (String fileType : fileTypes) {
                String magic = FILE_TYPE_MAGIC_NUMBERS.get(fileType);
                if (magicNumber.startsWith(magic)) {
                    return true;
                }
            }
        }
        return false;
    }

    /**
     * 返回文件后缀
     *
     * @param file 文件
     * @return
     */
    private static String getFileExtension(MultipartFile file) {
        String suffix = "";
        String originalFilename = file.getOriginalFilename();
        if (originalFilename != null) {
            int lastIndex = originalFilename.lastIndexOf('.');
            if (lastIndex > 0) {
                suffix = originalFilename.substring(lastIndex + 1);
            }
        }
        log.info("上传的文件后缀为: {}", suffix);
        return suffix;
    }


    public static boolean containsXSS(InputStream fis) {

        try (ByteArrayInputStream bos = new ByteArrayInputStream(fis.readAllBytes());
             BufferedReader reader = new BufferedReader(new InputStreamReader(bos))) {
            String currentLine;
            StringBuilder contentBuilder = new StringBuilder();
            while ((currentLine = reader.readLine()) != null) {
                contentBuilder.append(currentLine).append("\n");
            }
            for (Pattern pattern : XSS_PATTERNS) {
                if (pattern.matcher(contentBuilder).find()) {
                    return true;
                }
            }
        } catch (Exception e) {
            log.error("上传文件-流读取操作异常: {}", e.getMessage());
            return false;
        }
        return false;
    }
}

相关推荐

  1. XSS 脚本攻击预防文件

    2024-06-07 23:22:02       31 阅读
  2. 脚本攻击xss

    2024-06-07 23:22:02       62 阅读
  3. xss脚本攻击

    2024-06-07 23:22:02       57 阅读
  4. 脚本攻击XSS

    2024-06-07 23:22:02       32 阅读
  5. XSS脚本攻击

    2024-06-07 23:22:02       27 阅读
  6. XSS脚本攻击)漏洞介绍

    2024-06-07 23:22:02       56 阅读

最近更新

  1. docker php8.1+nginx base 镜像 dockerfile 配置

    2024-06-07 23:22:02       98 阅读
  2. Could not load dynamic library ‘cudart64_100.dll‘

    2024-06-07 23:22:02       106 阅读
  3. 在Django里面运行非项目文件

    2024-06-07 23:22:02       87 阅读
  4. Python语言-面向对象

    2024-06-07 23:22:02       96 阅读

热门阅读

  1. cpprestsdk https双向认证小测

    2024-06-07 23:22:02       32 阅读
  2. Qt 中QList、QListIterator 、QMutableListIterator、QMap用法

    2024-06-07 23:22:02       28 阅读
  3. gitleb详细的搭建步骤

    2024-06-07 23:22:02       34 阅读
  4. Tomcat 启动闪退问题解决方法

    2024-06-07 23:22:02       28 阅读
  5. 负载均衡加权轮询算法

    2024-06-07 23:22:02       29 阅读
  6. Nginx 实战-03-nginx 负载均衡

    2024-06-07 23:22:02       30 阅读
  7. Spark大数据 Spark运行架构与原理

    2024-06-07 23:22:02       32 阅读
  8. cesium 之 flyTo、setView、lookat

    2024-06-07 23:22:02       29 阅读
  9. Python基础总结之functools.partial

    2024-06-07 23:22:02       24 阅读
  10. LeetCode hot100-64-Y

    2024-06-07 23:22:02       31 阅读