使用文件的一部分生成 JavaScript 文件哈希值

2021-12-27 00:00:00 hash md5 php javascript cryptojs

我正在使用 JavaScript 为唯一文件值生成文件哈希值.请检查以下代码以了解运行良好的哈希生成机制.

但是,我在为大文件生成哈希值时遇到了问题,因为在客户端浏览器崩溃了.

直到 30MB,HASHING 运行良好,但如果我尝试上传大于该值的文件,系统就会崩溃.

我的问题是:

<块引用>

  1. 我能否为文件的一部分生成 HASH 值而不是读取大文件并导致崩溃?如果是,我可以知道如何做那个宽度吗'文件阅读器';

  2. 我可以指定任意数量的字节(例如文件的 2000 个字符)来生成 HASH 值,然后为大文件生成.

我希望上述两种解决方案适用于较大和较小的文件.还有其他选择吗?

我的小提琴演示

解决方案

  1. 我能否为文件的一部分生成 HASH 值而不是读取大文件并导致崩溃?如果是,我能知道如何处理那个宽度的FileReader"吗;

是的,你可以这样做,它被称为渐进式哈希.

var md5 = CryptoJS.algo.MD5.create();md5.update("文件第1部分");md5.update("文件第2部分");md5.update("文件第 3 部分");var hash = md5.finalize();

<块引用>

  1. 我可以指定任意数量的字节(例如文件的 2000 个字符)来生成 HASH 值,然后为大文件生成.

HTML5Rocks 文章介绍了如何可以使用 File.slice 将切片文件传递给 FileReader:

var blob = file.slice(startingByte, endindByte);reader.readAsArrayBuffer(blob);

完整解决方案

我把两者结合了.棘手的部分是同步文件读取,因为 FileReader.readAsArrayBuffer() 是异步的.我写了一个小的 series 函数,它模仿了 series async.js 的功能.必须一个接一个地进行,因为没有办法进入CryptoJS的哈希函数的内部状态.

此外,CryptoJS 不了解 ArrayBuffer 是什么,因此必须将其转换为其原生数据表示形式,即所谓的 WordArray:

function arrayBufferToWordArray(ab) {var i8a = new Uint8Array(ab);var a = [];for (var i = 0; i < i8a.length; i += 4) {a.push(i8a[i] <<24 | i8a[i + 1] <<16 | i8a[i + 2] <<8 | i8a[i + 3]);}返回 CryptoJS.lib.WordArray.create(a, i8a.length);}

另一件事是散列是一种同步操作,其中没有 yield 可以在其他地方继续执行.因此,浏览器将冻结,因为 JavaScript 是单线程的.解决方案是使用 Web Workers 将哈希卸载到不同的线程,以便 UI 线程保持响应.
Web 工作者希望在其构造函数中使用脚本文件,因此我使用了 Rob W 的此解决方案来获得内联脚本.

函数系列(任务,完成){if(!tasks || tasks.length === 0) {完毕();} 别的 {任务[0](函数(){系列(任务.切片(1),完成);});}}函数 webWorkerOnMessage(e){如果(e.data.type ===创建"){md5 = CryptoJS.algo.MD5.create();postMessage({type: "create"});} else if (e.data.type === "update") {函数 arrayBufferToWordArray(ab) {var i8a = new Uint8Array(ab);var a = [];for (var i = 0; i < i8a.length; i += 4) {a.push(i8a[i] <<24 | i8a[i + 1] <<16 | i8a[i + 2] <<8 | i8a[i + 3]);}返回 CryptoJS.lib.WordArray.create(a, i8a.length);}md5.update(arrayBufferToWordArray(e.data.chunk));postMessage({type: "update"});} else if (e.data.type === "finish") {postMessage({type: "finish", hash: ""+md5.finalize()});}}//URL.createObjectURLwindow.URL = window.URL ||window.webkitURL;//服务器响应",用于所有示例变量响应 ="importScripts('https://cdn.rawgit.com/CryptoStore/crypto-js/3.1.2/build/rollups/md5.js');"+"var md5;"+"self.onmessage = "+webWorkerOnMessage.toString();var blob;尝试 {blob = new Blob([response], {type: 'application/javascript'});} catch (e) {//向后兼容window.BlobBuilder = window.BlobBuilder ||window.WebKitBlobBuilder ||窗口.MozBlobBuilder;blob = 新的 BlobBuilder();blob.append(响应);blob = blob.getBlob();}var worker = new Worker(URL.createObjectURL(blob));var 文件 = evt.target.files;//文件列表对象变量块大小 = 1000000;//块大小没有区别变量 i = 0,f = 文件[i],块 = Math.ceil(f.size/chunksize),块任务 = [],startTime = (new Date()).getTime();worker.onmessage = 函数(e){//创建回调for(var j = 0; j <块; j++){(函数(j,f){chunkTasks.push(function(next){var blob = f.slice(j * chunksize, Math.min((j+1) * chunksize, f.size));var reader = new FileReader();reader.onload = function(e) {var chunk = e.target.result;worker.onmessage = 函数(e){//更新回调document.getElementById('num').innerHTML = ""+(j+1)+"/"+chunks;下一个();};worker.postMessage({type: "update", chunk: chunk});};reader.readAsArrayBuffer(blob);});})(j, f);}系列(块任务,功能(){var elem = document.getElementById("hashValueSplit");var telem = document.getElementById("time");worker.onmessage = 函数(e){//结束回调elem.value = e.data.hash;telem.innerHTML = "in " + Math.ceil(((new Date()).getTime() - startTime)/1000) + " seconds";};worker.postMessage({type: "finish"});});//阻塞前面的路...if (document.getElementById("singleHash").checked) {var reader = new FileReader();//关闭以捕获文件信息.reader.onloadend = (function(theFile) {函数 arrayBufferToWordArray(ab) {var i8a = new Uint8Array(ab);var a = [];for (var i = 0; i < i8a.length; i += 4) {a.push(i8a[i] <<24 | i8a[i + 1] <<16 | i8a[i + 2] <<8 | i8a[i + 3]);}返回 CryptoJS.lib.WordArray.create(a, i8a.length);}返回函数(e){var test = e.target.result;var hash = CryptoJS.MD5(arrayBufferToWordArray(test));//var hash = "none";var elem = document.getElementById("hashValue");elem.value = 哈希值;};})(F);//读入图像文件作为数据 URL.reader.readAsArrayBuffer(f);}};worker.postMessage({type: "create"});

DEMO 似乎适用于大文件,但需要相当多的时间.也许这可以使用更快的 MD5 实现来改进.散列一个 3 GB 的文件大约需要 23 分钟.

我的这个答案 展示了一个没有 SHA-256 网络工作者的例子.

I am working with JavaScript to generate File HASH VALUE for unique file values. Kindly check the below code for the Hash Generation Mechanism Which works good.

<script type="text/javascript">
// Reference: https://code.google.com/p/crypto-js/#MD5
function handleFileSelect(evt) 
{   
    var files = evt.target.files; // FileList object
    // Loop through the FileList and render image files as thumbnails.
    for (var i = 0, f; f = files[i]; i++) 
    {
        var reader = new FileReader();
        // Closure to capture the file information.
        reader.onload = (function(theFile) 
        {
            return function(e) 
            {
                var span = document.createElement('span');
                var test = e.target.result;                 
                //var hash = hex_md5(test);
                var hash = CryptoJS.MD5(test);
                var elem = document.getElementById("hashValue");
                elem.value = hash;
            };
        })(f);
        // Read in the image file as a data URL.
        reader.readAsBinaryString(f);
    }
}
document.getElementById('videoupload').addEventListener('change', handleFileSelect, false);
</script>

However I am facing problem when generating HASH VALUE for large files as in client side the browser Crashed.

Up-till 30MB the HASHING works well but if i try to upload larger than that the system crashes.

My Question is:

  1. Can I generate HASH Value for part of file than reading the LARGE files and getting crashes? If yes, Can I know how to do that width 'FileReader';

  2. Can I specify any amount of Byte such as 2000 Character of a file to generate HASH Value then generating for large files.

I hope the above two solution will work for larger and small files. Is there any other options?

My Fiddle Demo

解决方案

  1. Can I generate HASH Value for part of file than reading the LARGE files and getting crashes? If yes, Can I know how to do that width 'FileReader';

Yes, you can do that and it is called Progressive Hashing.

var md5 = CryptoJS.algo.MD5.create();

md5.update("file part 1");
md5.update("file part 2");
md5.update("file part 3");

var hash = md5.finalize();

  1. Can I specify any amount of Byte such as 2000 Character of a file to generate HASH Value then generating for large files.

There's an HTML5Rocks article on how one can use File.slice to pass a sliced file to the FileReader:

var blob = file.slice(startingByte, endindByte);
reader.readAsArrayBuffer(blob);

Full solution

I have combined both. The tricky part was to synchronize the file reading, because FileReader.readAsArrayBuffer() is asynchronous. I've written a small series function which is modeled after the series function of async.js. It has to be done one after the other, because there is is no way to get to the internal state of the hashing function of CryptoJS.

Additionally, CryptoJS doesn't understand what an ArrayBuffer is, so it has to be converted to its native data representation, which is the so-called WordArray:

function arrayBufferToWordArray(ab) {
  var i8a = new Uint8Array(ab);
  var a = [];
  for (var i = 0; i < i8a.length; i += 4) {
    a.push(i8a[i] << 24 | i8a[i + 1] << 16 | i8a[i + 2] << 8 | i8a[i + 3]);
  }
  return CryptoJS.lib.WordArray.create(a, i8a.length);
}

The other thing is that hashing is a synchronous operation where there is no yield to continue execution elsewhere. Because of this, the browser will freeze since JavaScript is single threaded. The solution is to use Web Workers to off-load the hashing to a different thread so that the UI thread keeps responsive.
Web workers expect the script file in their constructors, so I used this solution by Rob W to have an inline script.

function series(tasks, done){
    if(!tasks || tasks.length === 0) {
        done();
    } else {
        tasks[0](function(){
            series(tasks.slice(1), done);
        });
    }
}

function webWorkerOnMessage(e){
    if (e.data.type === "create") {
        md5 = CryptoJS.algo.MD5.create();
        postMessage({type: "create"});
    } else if (e.data.type === "update") {
        function arrayBufferToWordArray(ab) {
            var i8a = new Uint8Array(ab);
            var a = [];
            for (var i = 0; i < i8a.length; i += 4) {
                a.push(i8a[i] << 24 | i8a[i + 1] << 16 | i8a[i + 2] << 8 | i8a[i + 3]);
            }
            return CryptoJS.lib.WordArray.create(a, i8a.length);
        }
        md5.update(arrayBufferToWordArray(e.data.chunk));
        postMessage({type: "update"});
    } else if (e.data.type === "finish") {
        postMessage({type: "finish", hash: ""+md5.finalize()});
    }
}

// URL.createObjectURL
window.URL = window.URL || window.webkitURL;

// "Server response", used in all examples
var response = 
    "importScripts('https://cdn.rawgit.com/CryptoStore/crypto-js/3.1.2/build/rollups/md5.js');"+
    "var md5;"+
    "self.onmessage = "+webWorkerOnMessage.toString();

var blob;
try {
    blob = new Blob([response], {type: 'application/javascript'});
} catch (e) { // Backwards-compatibility
    window.BlobBuilder = window.BlobBuilder || window.WebKitBlobBuilder || window.MozBlobBuilder;
    blob = new BlobBuilder();
    blob.append(response);
    blob = blob.getBlob();
}
var worker = new Worker(URL.createObjectURL(blob));


var files = evt.target.files; // FileList object    
var chunksize = 1000000; // the chunk size doesn't make a difference
var i = 0, 
    f = files[i],
    chunks = Math.ceil(f.size / chunksize),
    chunkTasks = [],
    startTime = (new Date()).getTime();
worker.onmessage = function(e) {
    // create callback

    for(var j = 0; j < chunks; j++){
        (function(j, f){
            chunkTasks.push(function(next){
                var blob = f.slice(j * chunksize, Math.min((j+1) * chunksize, f.size));
                var reader = new FileReader();

                reader.onload = function(e) {
                    var chunk = e.target.result;
                    worker.onmessage = function(e) {
                        // update callback
                        document.getElementById('num').innerHTML = ""+(j+1)+"/"+chunks;
                        next();
                    };
                    worker.postMessage({type: "update", chunk: chunk});
                };
                reader.readAsArrayBuffer(blob);
            });
        })(j, f);
    }
    series(chunkTasks, function(){
        var elem = document.getElementById("hashValueSplit");
        var telem = document.getElementById("time");
        worker.onmessage = function(e) {
            // finish callback
            elem.value = e.data.hash;
            telem.innerHTML = "in " + Math.ceil(((new Date()).getTime() - startTime) / 1000) + " seconds";
        };
        worker.postMessage({type: "finish"});
    });

    // blocking way ahead...
    if (document.getElementById("singleHash").checked) {
        var reader = new FileReader();

        // Closure to capture the file information.
        reader.onloadend = (function(theFile) {
            function arrayBufferToWordArray(ab) {
                var i8a = new Uint8Array(ab);
                var a = [];
                for (var i = 0; i < i8a.length; i += 4) {
                    a.push(i8a[i] << 24 | i8a[i + 1] << 16 | i8a[i + 2] << 8 | i8a[i + 3]);
                }
                return CryptoJS.lib.WordArray.create(a, i8a.length);
            }
            return function(e) {
                var test = e.target.result;
                var hash = CryptoJS.MD5(arrayBufferToWordArray(test));
                //var hash = "none";
                var elem = document.getElementById("hashValue");
                elem.value = hash;
            };
        })(f);

        // Read in the image file as a data URL.
        reader.readAsArrayBuffer(f);
    }
};
worker.postMessage({type: "create"});

DEMO seems to work for big files, but it takes quite a lot of time. Maybe this can be improved using a faster MD5 implementation. It took around 23 minutes to hash a 3 GB file.

This answer of mine shows an example without webworkers for SHA-256.

相关文章