Browse Source

WIP Extract parser

Loquicom 5 years ago
parent
commit
115834c861
1 changed files with 119 additions and 0 deletions
  1. 119 0
      src/extract/parser.js

+ 119 - 0
src/extract/parser.js

@@ -0,0 +1,119 @@
+const Saxophone = require('saxophone');
+const file = require('../../file');
+const skip = require('../../skip');
+
+const TAGS = [
+    'phdthesis',
+    'mastersthesis',
+    'incollection',
+    'book',
+    'inproceedings',
+    'proceedings',
+    'www',
+    'article'
+];
+
+let instance = null;
+
+const parser = class Parser {
+
+    constructor() {
+        this._auth = [];
+        this._inTag = false;
+        this._tag = '';
+        this._isAuth = false;
+        this.callback = null;
+        this.source = null;
+        this.dest = null;
+        this.sax = new Saxophone();
+        this.sax.on('error', this._error);
+        this.sax.on('tagopen', this._opentag);
+        this.sax.on('tagclose', this._closetag);
+        this.sax.on('text', this._text);
+        this.sax.on('finish', this._finish);
+    }
+
+    parse(callback = null) {
+        if (this.source === null) {
+            throw 'No source file';
+        }
+        if (this.dest === null) {
+            throw 'No destination file';
+        }
+        if (!file.makedir(this.dest, true)) {
+            throw 'Unable to create destination file';
+        }
+        if (file.exist(this.dest)) {
+            if (!file.delete(this.dest)) {
+                throw 'Unable to delete existing destination file';
+            }
+        }
+        this.callback = callback;
+        file.fs.createReadStream(this.source, {start: skip.begin(this.source, '<!DOCTYPE')}).pipe(this.sax);
+    }
+
+    from(source) {
+        if (!file.exist(source)) {
+            throw 'File not found';
+        }
+        if (file.getExtension(source) !== 'xml') {
+            throw 'File is not an XML';
+        }
+        this.source = source;
+        return this;
+    }
+
+    to(dest) {
+        if (!file.makedir(dest, true)) {
+            throw 'Unable to create destination folder';
+        }
+        this.dest = dest;
+        return this;
+    }
+
+    _error(err) {
+        throw err;
+    }
+
+    _opentag(tag) {
+        if (!instance._inTag && TAGS.indexOf(tag.name) !== -1) {
+            instance._inTag = true;
+            instance._tag = tag.name;
+            instance._auth = [];
+        } else if (!instance._isAuth && tag.name === 'author') {
+            instance._isAuth = true;
+        }
+    }
+
+    _closetag(tag) {
+        if (instance._inTag && instance._tag === tag.name) {
+            instance._inTag = false;
+            // On ne garde que les groupes d'auteurs
+            if (instance._auth.length > 1) {
+                file.append(instance.dest, JSON.stringify(instance._auth) + '\n');
+            }
+        } else if (instance._isAuth && tag.name === 'author') {
+            instance._isAuth = false;
+        }
+    }
+
+    _text(text) {
+        if (instance._isAuth) {
+            instance._auth.push(text.contents);
+        }
+    }
+
+    _finish() {
+        if (instance.callback !== null) {
+            instance.callback(instance.dest);
+        }
+    }
+
+};
+
+module.exports.get = function () {
+    if (instance === null) {
+        instance = new parser();
+    }
+    return instance;
+};