|
@@ -1,18 +1,28 @@
|
|
|
const file = require('../file');
|
|
|
|
|
|
const ROOT_TAG = 'dblp';
|
|
|
-const TAGS = [];
|
|
|
+const TAGS = [
|
|
|
+ 'phdthesis',
|
|
|
+ 'mastersthesis',
|
|
|
+ 'incollection',
|
|
|
+ 'book',
|
|
|
+ 'inproceedings',
|
|
|
+ 'proceedings',
|
|
|
+ 'www',
|
|
|
+ 'article'
|
|
|
+];
|
|
|
+
|
|
|
+let instance = null;
|
|
|
|
|
|
const parser = class Parser {
|
|
|
|
|
|
- constructor(path) {
|
|
|
- this._group = [];
|
|
|
- this._data = [];
|
|
|
- this._tagOk = false;
|
|
|
+ constructor() {
|
|
|
+ this._auth = [];
|
|
|
+ this._inTag = false;
|
|
|
+ this._tag = '';
|
|
|
this._isAuth = false;
|
|
|
this.callback = null;
|
|
|
- this.result = {};
|
|
|
- this.from(path);
|
|
|
+ this.source = null;
|
|
|
this.dest = null;
|
|
|
this.sax = require('sax').createStream(true);
|
|
|
this.sax.on('error', this._error);
|
|
@@ -22,6 +32,20 @@ const parser = class Parser {
|
|
|
}
|
|
|
|
|
|
parse(callback = null) {
|
|
|
+ if (this.source === null) {
|
|
|
+ throw 'No source file';
|
|
|
+ }
|
|
|
+ if (this.dest === null) {
|
|
|
+ throw 'No destination file';
|
|
|
+ }
|
|
|
+ if (!file.makedir(this.dest, true)) {
|
|
|
+ throw 'Unable to create destination file';
|
|
|
+ }
|
|
|
+ if (file.exist(this.dest)) {
|
|
|
+ if (!file.delete(this.dest)) {
|
|
|
+ throw 'Unable to delete existing destination file';
|
|
|
+ }
|
|
|
+ }
|
|
|
this.callback = callback;
|
|
|
file.fs.createReadStream(this.source).pipe(this.sax);
|
|
|
}
|
|
@@ -36,6 +60,7 @@ const parser = class Parser {
|
|
|
throw 'File is not an XML';
|
|
|
}
|
|
|
this.source = source;
|
|
|
+ return this;
|
|
|
}
|
|
|
|
|
|
to(dest) {
|
|
@@ -46,49 +71,48 @@ const parser = class Parser {
|
|
|
return this;
|
|
|
}
|
|
|
|
|
|
- write() {
|
|
|
- if (this.dest === null) {
|
|
|
- throw 'No destination file';
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
_error(err) {
|
|
|
throw err;
|
|
|
}
|
|
|
|
|
|
_opentag(node) {
|
|
|
- if (!this._tagOk && TAGS.indexOf(node.name) !== -1) {
|
|
|
- this._tagOk = true;
|
|
|
- this._group = [];
|
|
|
- } else if (!this._isAuth && node.name === 'author') {
|
|
|
- this._isAuth = true;
|
|
|
- }
|
|
|
+ if (!instance._inTag && TAGS.indexOf(node.name) !== -1) {
|
|
|
+ instance._inTag = true;
|
|
|
+ instance._tag = node.name;
|
|
|
+ instance._auth = [];
|
|
|
+ } else if (!instance._isAuth && node.name === 'author') {
|
|
|
+ instance._isAuth = true;
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
_closetag(tag) {
|
|
|
if (tag === ROOT_TAG && this.callback !== null) {
|
|
|
- this._finish();
|
|
|
- if (this.callback !== null) {
|
|
|
- this.callback();
|
|
|
+ if (instance.callback !== null) {
|
|
|
+ instance.callback();
|
|
|
}
|
|
|
- } else if (this._tagOk && TAGS.indexOf(tag) !== -1) {
|
|
|
- this._tagOk = false;
|
|
|
- this._data.push(this._group);
|
|
|
- } else if (this._isAuth && tag === 'author') {
|
|
|
- this._isAuth = false;
|
|
|
+ process.exit();
|
|
|
+ } else if (instance._inTag && instance._tag === tag) {
|
|
|
+ instance._inTag = false;
|
|
|
+ // On ne garde que les groupes d'auteurs
|
|
|
+ if (instance._auth.length > 1) {
|
|
|
+ file.append(instance.dest, JSON.stringify(instance._auth) + '\n');
|
|
|
+ }
|
|
|
+ } else if (instance._isAuth && tag === 'author') {
|
|
|
+ instance._isAuth = false;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
_text(text) {
|
|
|
- if (this._isAuth) {
|
|
|
- this._group.push(text);
|
|
|
+ if (instance._isAuth) {
|
|
|
+ instance._auth.push(text);
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
- _finish() {
|
|
|
- console.log(this._data);
|
|
|
- }
|
|
|
|
|
|
};
|
|
|
|
|
|
-module.exports = parser;
|
|
|
+module.exports.get = function () {
|
|
|
+ if (instance === null) {
|
|
|
+ instance = new parser();
|
|
|
+ }
|
|
|
+ return instance;
|
|
|
+};
|