|
@@ -0,0 +1,119 @@
|
|
|
+const Saxophone = require('saxophone');
|
|
|
+const file = require('../../file');
|
|
|
+const skip = require('../../skip');
|
|
|
+
|
|
|
+const TAGS = [
|
|
|
+ 'phdthesis',
|
|
|
+ 'mastersthesis',
|
|
|
+ 'incollection',
|
|
|
+ 'book',
|
|
|
+ 'inproceedings',
|
|
|
+ 'proceedings',
|
|
|
+ 'www',
|
|
|
+ 'article'
|
|
|
+];
|
|
|
+
|
|
|
+let instance = null;
|
|
|
+
|
|
|
+const parser = class Parser {
|
|
|
+
|
|
|
+ constructor() {
|
|
|
+ this._auth = [];
|
|
|
+ this._inTag = false;
|
|
|
+ this._tag = '';
|
|
|
+ this._isAuth = false;
|
|
|
+ this.callback = null;
|
|
|
+ this.source = null;
|
|
|
+ this.dest = null;
|
|
|
+ this.sax = new Saxophone();
|
|
|
+ this.sax.on('error', this._error);
|
|
|
+ this.sax.on('tagopen', this._opentag);
|
|
|
+ this.sax.on('tagclose', this._closetag);
|
|
|
+ this.sax.on('text', this._text);
|
|
|
+ this.sax.on('finish', this._finish);
|
|
|
+ }
|
|
|
+
|
|
|
+ parse(callback = null) {
|
|
|
+ if (this.source === null) {
|
|
|
+ throw 'No source file';
|
|
|
+ }
|
|
|
+ if (this.dest === null) {
|
|
|
+ throw 'No destination file';
|
|
|
+ }
|
|
|
+ if (!file.makedir(this.dest, true)) {
|
|
|
+ throw 'Unable to create destination file';
|
|
|
+ }
|
|
|
+ if (file.exist(this.dest)) {
|
|
|
+ if (!file.delete(this.dest)) {
|
|
|
+ throw 'Unable to delete existing destination file';
|
|
|
+ }
|
|
|
+ }
|
|
|
+ this.callback = callback;
|
|
|
+ file.fs.createReadStream(this.source, {start: skip.begin(this.source, '<!DOCTYPE')}).pipe(this.sax);
|
|
|
+ }
|
|
|
+
|
|
|
+ from(source) {
|
|
|
+ if (!file.exist(source)) {
|
|
|
+ throw 'File not found';
|
|
|
+ }
|
|
|
+ if (file.getExtension(source) !== 'xml') {
|
|
|
+ throw 'File is not an XML';
|
|
|
+ }
|
|
|
+ this.source = source;
|
|
|
+ return this;
|
|
|
+ }
|
|
|
+
|
|
|
+ to(dest) {
|
|
|
+ if (!file.makedir(dest, true)) {
|
|
|
+ throw 'Unable to create destination folder';
|
|
|
+ }
|
|
|
+ this.dest = dest;
|
|
|
+ return this;
|
|
|
+ }
|
|
|
+
|
|
|
+ _error(err) {
|
|
|
+ throw err;
|
|
|
+ }
|
|
|
+
|
|
|
+ _opentag(tag) {
|
|
|
+ if (!instance._inTag && TAGS.indexOf(tag.name) !== -1) {
|
|
|
+ instance._inTag = true;
|
|
|
+ instance._tag = tag.name;
|
|
|
+ instance._auth = [];
|
|
|
+ } else if (!instance._isAuth && tag.name === 'author') {
|
|
|
+ instance._isAuth = true;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ _closetag(tag) {
|
|
|
+ if (instance._inTag && instance._tag === tag.name) {
|
|
|
+ instance._inTag = false;
|
|
|
+ // On ne garde que les groupes d'auteurs
|
|
|
+ if (instance._auth.length > 1) {
|
|
|
+ file.append(instance.dest, JSON.stringify(instance._auth) + '\n');
|
|
|
+ }
|
|
|
+ } else if (instance._isAuth && tag.name === 'author') {
|
|
|
+ instance._isAuth = false;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ _text(text) {
|
|
|
+ if (instance._isAuth) {
|
|
|
+ instance._auth.push(text.contents);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ _finish() {
|
|
|
+ if (instance.callback !== null) {
|
|
|
+ instance.callback(instance.dest);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+};
|
|
|
+
|
|
|
+module.exports.get = function () {
|
|
|
+ if (instance === null) {
|
|
|
+ instance = new parser();
|
|
|
+ }
|
|
|
+ return instance;
|
|
|
+};
|