Add dmp worker to leverage CPU intensive calculation to child process

This commit is contained in:
Wu Cheng-Han 2016-11-18 12:09:58 +08:00
parent 55ae64035b
commit c671d54d67
2 changed files with 221 additions and 102 deletions

View file

@ -5,13 +5,53 @@ var Sequelize = require("sequelize");
var LZString = require('lz-string');
var async = require('async');
var moment = require('moment');
var DiffMatchPatch = require('diff-match-patch');
var dmp = new DiffMatchPatch();
var childProcess = require('child_process');
var shortId = require('shortid');
// core
var config = require("../config.js");
var logger = require("../logger.js");
var dmpWorker = createDmpWorker();
var dmpCallbackCache = {};
function createDmpWorker() {
var worker = childProcess.fork("./lib/workers/dmpWorker.js", {
stdio: 'ignore'
});
if (config.debug) logger.info('dmp worker process started');
worker.on('message', function (data) {
if (!data || !data.msg || !data.cacheKey) {
return logger.error('dmp worker error: not enough data on message');
}
var cacheKey = data.cacheKey;
switch(data.msg) {
case 'error':
dmpCallbackCache[cacheKey](data.error, null);
break;
case 'check':
dmpCallbackCache[cacheKey](null, data.result);
break;
}
delete dmpCallbackCache[cacheKey];
});
worker.on('close', function (code) {
dmpWorker = null;
if (config.debug) logger.info('dmp worker process exited with code ' + code);
});
return worker;
}
function sendDmpWorker(data, callback) {
if (!dmpWorker) dmpWorker = createDmpWorker();
var cacheKey = Date.now() + '_' + shortId.generate();
dmpCallbackCache[cacheKey] = callback;
data = Object.assign(data, {
cacheKey: cacheKey
});
dmpWorker.send(data);
}
module.exports = function (sequelize, DataTypes) {
var Revision = sequelize.define("Revision", {
id: {
@ -43,19 +83,6 @@ module.exports = function (sequelize, DataTypes) {
constraints: false
});
},
createPatch: function (lastDoc, CurrDoc) {
var ms_start = (new Date()).getTime();
var diff = dmp.diff_main(lastDoc, CurrDoc);
dmp.diff_cleanupSemantic(diff);
var patch = dmp.patch_make(lastDoc, diff);
patch = dmp.patch_toText(patch);
var ms_end = (new Date()).getTime();
if (config.debug) {
logger.info(patch);
logger.info((ms_end - ms_start) + 'ms');
}
return patch;
},
getNoteRevisions: function (note, callback) {
Revision.findAll({
where: {
@ -96,67 +123,11 @@ module.exports = function (sequelize, DataTypes) {
order: '"createdAt" DESC'
}).then(function (count) {
if (count <= 0) return callback(null, null);
var ms_start = (new Date()).getTime();
var startContent = null;
var lastPatch = [];
var applyPatches = [];
var authorship = [];
if (count <= Math.round(revisions.length / 2)) {
// start from top to target
for (var i = 0; i < count; i++) {
var revision = revisions[i];
if (i == 0) {
startContent = LZString.decompressFromBase64(revision.content || revision.lastContent);
}
if (i != count - 1) {
var patch = dmp.patch_fromText(LZString.decompressFromBase64(revision.patch));
applyPatches = applyPatches.concat(patch);
}
lastPatch = revision.patch;
authorship = revision.authorship;
}
// swap DIFF_INSERT and DIFF_DELETE to achieve unpatching
for (var i = 0, l = applyPatches.length; i < l; i++) {
for (var j = 0, m = applyPatches[i].diffs.length; j < m; j++) {
var diff = applyPatches[i].diffs[j];
if (diff[0] == DiffMatchPatch.DIFF_INSERT)
diff[0] = DiffMatchPatch.DIFF_DELETE;
else if (diff[0] == DiffMatchPatch.DIFF_DELETE)
diff[0] = DiffMatchPatch.DIFF_INSERT;
}
}
} else {
// start from bottom to target
var l = revisions.length - 1;
for (var i = l; i >= count - 1; i--) {
var revision = revisions[i];
if (i == l) {
startContent = LZString.decompressFromBase64(revision.lastContent);
authorship = revision.authorship;
}
if (revision.patch) {
var patch = dmp.patch_fromText(LZString.decompressFromBase64(revision.patch));
applyPatches = applyPatches.concat(patch);
}
lastPatch = revision.patch;
authorship = revision.authorship;
}
}
try {
var finalContent = dmp.patch_apply(applyPatches, startContent)[0];
} catch (err) {
return callback(err, null);
}
var data = {
content: finalContent,
patch: dmp.patch_fromText(LZString.decompressFromBase64(lastPatch)),
authorship: authorship ? JSON.parse(LZString.decompressFromBase64(authorship)) : null
};
var ms_end = (new Date()).getTime();
if (config.debug) {
logger.info((ms_end - ms_start) + 'ms');
}
return callback(null, data);
sendDmpWorker({
msg: 'get revision',
revisions: revisions,
count: count
}, callback);
}).catch(function (err) {
return callback(err, null);
});
@ -254,37 +225,43 @@ module.exports = function (sequelize, DataTypes) {
var latestRevision = revisions[0];
var lastContent = LZString.decompressFromBase64(latestRevision.content || latestRevision.lastContent);
var content = LZString.decompressFromBase64(note.content);
var patch = Revision.createPatch(lastContent, content);
if (!patch) {
// if patch is empty (means no difference) then just update the latest revision updated time
latestRevision.changed('updatedAt', true);
latestRevision.update({
updatedAt: Date.now()
}).then(function (revision) {
Revision.finishSaveNoteRevision(note, revision, callback);
}).catch(function (err) {
return callback(err, null);
});
} else {
Revision.create({
noteId: note.id,
patch: LZString.compressToBase64(patch),
content: note.content,
length: LZString.decompressFromBase64(note.content).length,
authorship: note.authorship
}).then(function (revision) {
// clear last revision content to reduce db size
sendDmpWorker({
msg: 'create patch',
lastDoc: lastContent,
currDoc: content,
}, function (err, patch) {
if (err) logger.error('save note revision error', err);
if (!patch) {
// if patch is empty (means no difference) then just update the latest revision updated time
latestRevision.changed('updatedAt', true);
latestRevision.update({
content: null
}).then(function () {
updatedAt: Date.now()
}).then(function (revision) {
Revision.finishSaveNoteRevision(note, revision, callback);
}).catch(function (err) {
return callback(err, null);
});
}).catch(function (err) {
return callback(err, null);
});
}
} else {
Revision.create({
noteId: note.id,
patch: LZString.compressToBase64(patch),
content: note.content,
length: LZString.decompressFromBase64(note.content).length,
authorship: note.authorship
}).then(function (revision) {
// clear last revision content to reduce db size
latestRevision.update({
content: null
}).then(function () {
Revision.finishSaveNoteRevision(note, revision, callback);
}).catch(function (err) {
return callback(err, null);
});
}).catch(function (err) {
return callback(err, null);
});
}
});
}
}).catch(function (err) {
return callback(err, null);

142
lib/workers/dmpWorker.js Normal file
View file

@ -0,0 +1,142 @@
// external modules
var LZString = require('lz-string');
var DiffMatchPatch = require('diff-match-patch');
var dmp = new DiffMatchPatch();
// core
var config = require("../config.js");
var logger = require("../logger.js");
process.on('message', function(data) {
if (!data || !data.msg || !data.cacheKey) {
return logger.error('dmp worker error: not enough data');
}
switch (data.msg) {
case 'create patch':
if (!data.hasOwnProperty('lastDoc') || !data.hasOwnProperty('currDoc')) {
return logger.error('dmp worker error: not enough data on create patch');
}
try {
var patch = createPatch(data.lastDoc, data.currDoc);
process.send({
msg: 'check',
result: patch,
cacheKey: data.cacheKey
});
} catch (err) {
logger.error('dmp worker error', err);
process.send({
msg: 'error',
error: err,
cacheKey: data.cacheKey
});
}
break;
case 'get revision':
if (!data.hasOwnProperty('revisions') || !data.hasOwnProperty('count')) {
return logger.error('dmp worker error: not enough data on get revision');
}
try {
var result = getRevision(data.revisions, data.count);
process.send({
msg: 'check',
result: result,
cacheKey: data.cacheKey
});
} catch (err) {
logger.error('dmp worker error', err);
process.send({
msg: 'error',
error: err,
cacheKey: data.cacheKey
});
}
break;
}
});
function createPatch(lastDoc, currDoc) {
var ms_start = (new Date()).getTime();
var diff = dmp.diff_main(lastDoc, currDoc);
dmp.diff_cleanupSemantic(diff);
var patch = dmp.patch_make(lastDoc, diff);
patch = dmp.patch_toText(patch);
var ms_end = (new Date()).getTime();
if (config.debug) {
logger.info(patch);
logger.info((ms_end - ms_start) + 'ms');
}
return patch;
}
function getRevision(revisions, count) {
var ms_start = (new Date()).getTime();
var startContent = null;
var lastPatch = [];
var applyPatches = [];
var authorship = [];
if (count <= Math.round(revisions.length / 2)) {
// start from top to target
for (var i = 0; i < count; i++) {
var revision = revisions[i];
if (i == 0) {
startContent = LZString.decompressFromBase64(revision.content || revision.lastContent);
}
if (i != count - 1) {
var patch = dmp.patch_fromText(LZString.decompressFromBase64(revision.patch));
applyPatches = applyPatches.concat(patch);
}
lastPatch = revision.patch;
authorship = revision.authorship;
}
// swap DIFF_INSERT and DIFF_DELETE to achieve unpatching
for (var i = 0, l = applyPatches.length; i < l; i++) {
for (var j = 0, m = applyPatches[i].diffs.length; j < m; j++) {
var diff = applyPatches[i].diffs[j];
if (diff[0] == DiffMatchPatch.DIFF_INSERT)
diff[0] = DiffMatchPatch.DIFF_DELETE;
else if (diff[0] == DiffMatchPatch.DIFF_DELETE)
diff[0] = DiffMatchPatch.DIFF_INSERT;
}
}
} else {
// start from bottom to target
var l = revisions.length - 1;
for (var i = l; i >= count - 1; i--) {
var revision = revisions[i];
if (i == l) {
startContent = LZString.decompressFromBase64(revision.lastContent);
authorship = revision.authorship;
}
if (revision.patch) {
var patch = dmp.patch_fromText(LZString.decompressFromBase64(revision.patch));
applyPatches = applyPatches.concat(patch);
}
lastPatch = revision.patch;
authorship = revision.authorship;
}
}
try {
var finalContent = dmp.patch_apply(applyPatches, startContent)[0];
} catch (err) {
throw new Error(err);
}
var data = {
content: finalContent,
patch: dmp.patch_fromText(LZString.decompressFromBase64(lastPatch)),
authorship: authorship ? JSON.parse(LZString.decompressFromBase64(authorship)) : null
};
var ms_end = (new Date()).getTime();
if (config.debug) {
logger.info((ms_end - ms_start) + 'ms');
}
return data;
}
// log uncaught exception
process.on('uncaughtException', function (err) {
logger.error('An uncaught exception has occured.');
logger.error(err);
logger.error('Process will exit now.');
process.exit(1);
});