大文件分片上传

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Document</title>
</head>

<body>
    <input type="file" class="file">
</body>
<script src="./spark-md5.min.js"></script>
<script src="./spark-md5Test.js"></script>
<script type="module">

    const inpFile = document.querySelector('.file')
    inpFile.onchange = async (e) => {
        const file = e.target.files[0]
        console.time('cutFile')
        await cutFile(file)
        console.timeEnd('cutFile')
        console.log(file)
    }
    async function cutFile(file) {
        const CHUNK_SIZE = 1 * 1024 * 1024
        const chunkList = []
        const chunkCount = Math.ceil(file.size / CHUNK_SIZE)
        for (let i = 0; i <= chunkCount - 1; i++) {
            const c = await createChunk(file, i, CHUNK_SIZE)
            chunkList.push(c)
        }
        console.log(chunkList)
        console.log(chunkCount)
    }

</script>

</html>

async function createChunk(file, index, chunkSize) {
    return new Promise(resolve => {
        const start = index * chunkSize
        const end = start + chunkSize
        const blob = file.slice(start, end)
        const spark = new SparkMD5.ArrayBuffer()
        const fileReader = new FileReader()
        fileReader.readAsArrayBuffer(blob)
        fileReader.onload = (e) => {
            spark.append(e.target.result)
            resolve({
                start,
                end,
                index,
                spark:spark.end(),
                blob,
            })

        }
    })

}

直接分片进行上传,
image
发现耗时长,将文件分片操作改成并发

    async function cutFile(file) {
        const CHUNK_SIZE = 1 * 1024 * 1024
        const chunkList = []
        const chunkCount = Math.ceil(file.size / CHUNK_SIZE)
        for (let i = 0; i <= chunkCount - 1; i++) {
            const c = createChunk(file, i, CHUNK_SIZE)
            chunkList.push(c)
        }
        await Promise.all(chunkList)
        console.log(chunkList)
        console.log(chunkCount)
    }

image
嗯。。。优化了3s,怎么不算优化呢
说明问题不在文件分片上,查阅资料发现,spark的计算是一个cpu密集型的操作任务,并发操作并没有什么作用,这个时候,就该开始多线程了

 function cutFile(file) {
        return new Promise(resolve => {
            const CHUNK_SIZE = 1 * 1024 * 1024
            const chunkList = []
            const result = []
                const THREAD_COUNT = navigator.hardwareConcurrency || 4


            let finished = 0
            const chunkCount = Math.ceil(file.size / CHUNK_SIZE)
            const threadChunkCount = Math.ceil(chunkCount / THREAD_COUNT)

            for (let i = 0; i <= THREAD_COUNT - 1; i++) {
                const worker = new Worker('./work.js', {
                    type: 'module'
                })
                const start = i * threadChunkCount
                const end = Math.min((i + 1) * threadChunkCount, chunkCount)
                worker.postMessage({
                    file,
                    start,
                    end,
                    CHUNK_SIZE,
                })
                worker.onmessage = e => {
                    result[i] = e.data
                    worker.terminate()
                    finished++
                    if (finished === THREAD_COUNT) {
                        resolve(result.flat())
                    }


                }

            }
        })

    }

work.js文件

import { createChunk } from "./spark-md5Test.js"
onmessage = async (e) => {
    const { file, start, end, CHUNK_SIZE } = e.data
    const result = []
    for (let i = start; i < end; i++) {
        const c = createChunk(file, i, CHUNK_SIZE)
        result.push(c)
    }
    const chunks = await Promise.all(result)
    postMessage(chunks)
}

效果也是显著
image
从16秒到11秒再到2秒,perfect!
遇到的问题:
1.对原生html中外部js文件的导入导出理解不透彻,如果要将js文件在其他js文件中导入,要在html中引入,并将其script标签type设置为module,在导入spark-md5.min.js文件时,使用vscode的自动补全,

import SparkMD5 from 'spark-md5'

然后问题一是spark-md5应该是路径的形式,二是该文件并不是使用导出的形式,内部是一个立即执行函数,查看网络上的资料,结果没有找到相似的问题,后面自己思索了一番,突然就想到了

import './spark-md5.min.js'

也是成功解决了这个问题
2.使用ai搜索原因,结果一直给我提供错误的思路,所以遇到问题还是要多思考,而不应该一昧把问题丢给ai

posted @ 2025-07-05 22:27  我就起个名字不至于吧  阅读(2)  评论(0)    收藏  举报