|
@@ -1,30 +1,28 @@
|
|
|
const Saxophone = require('saxophone');
|
|
|
-const file = require('../../file');
|
|
|
-const skip = require('../../skip');
|
|
|
-
|
|
|
-const TAGS = [
|
|
|
- 'phdthesis',
|
|
|
- 'mastersthesis',
|
|
|
- 'incollection',
|
|
|
- 'book',
|
|
|
- 'inproceedings',
|
|
|
- 'proceedings',
|
|
|
- 'www',
|
|
|
- 'article'
|
|
|
-];
|
|
|
+const file = require('../file');
|
|
|
+const skip = require('../skip');
|
|
|
+const attr = require('./attribute');
|
|
|
|
|
|
let instance = null;
|
|
|
|
|
|
const parser = class Parser {
|
|
|
|
|
|
constructor() {
|
|
|
- this._auth = [];
|
|
|
- this._inTag = false;
|
|
|
- this._tag = '';
|
|
|
- this._isAuth = false;
|
|
|
+ // Var de recolte de données
|
|
|
+ this._article = {};
|
|
|
+ this._proceed = {};
|
|
|
+ // Var de position dans le fichier
|
|
|
+ this._inArticle = false;
|
|
|
+ this._inProceed = false;
|
|
|
+ this._inAuth = false;
|
|
|
+ this._inTitle = false;
|
|
|
+ this._inYear = false;
|
|
|
+ this._key = '';
|
|
|
+ // Var gestion class
|
|
|
this.callback = null;
|
|
|
this.source = null;
|
|
|
- this.dest = null;
|
|
|
+ this.auth = null;
|
|
|
+ // Var parametrage sax
|
|
|
this.sax = new Saxophone();
|
|
|
this.sax.on('error', this._error);
|
|
|
this.sax.on('tagopen', this._opentag);
|
|
@@ -37,16 +35,8 @@ const parser = class Parser {
|
|
|
if (this.source === null) {
|
|
|
throw 'No source file';
|
|
|
}
|
|
|
- if (this.dest === null) {
|
|
|
- throw 'No destination file';
|
|
|
- }
|
|
|
- if (!file.makedir(this.dest, true)) {
|
|
|
- throw 'Unable to create destination file';
|
|
|
- }
|
|
|
- if (file.exist(this.dest)) {
|
|
|
- if (!file.delete(this.dest)) {
|
|
|
- throw 'Unable to delete existing destination file';
|
|
|
- }
|
|
|
+ if (this.auth === null) {
|
|
|
+ throw 'No author to search';
|
|
|
}
|
|
|
this.callback = callback;
|
|
|
file.fs.createReadStream(this.source, {start: skip.begin(this.source, '<!DOCTYPE')}).pipe(this.sax);
|
|
@@ -63,11 +53,8 @@ const parser = class Parser {
|
|
|
return this;
|
|
|
}
|
|
|
|
|
|
- to(dest) {
|
|
|
- if (!file.makedir(dest, true)) {
|
|
|
- throw 'Unable to create destination folder';
|
|
|
- }
|
|
|
- this.dest = dest;
|
|
|
+ search(auth) {
|
|
|
+ this.auth = auth;
|
|
|
return this;
|
|
|
}
|
|
|
|
|
@@ -76,36 +63,96 @@ const parser = class Parser {
|
|
|
}
|
|
|
|
|
|
_opentag(tag) {
|
|
|
- if (!instance._inTag && TAGS.indexOf(tag.name) !== -1) {
|
|
|
- instance._inTag = true;
|
|
|
- instance._tag = tag.name;
|
|
|
- instance._auth = [];
|
|
|
- } else if (!instance._isAuth && tag.name === 'author') {
|
|
|
- instance._isAuth = true;
|
|
|
+ if (!instance._inArticle && tag.name === 'article') {
|
|
|
+ const attribute = attr.parse(tag.attrs);
|
|
|
+ instance._inArticle = true;
|
|
|
+ instance._key = attribute.key;
|
|
|
+ instance._article[instance._key] = {key: instance._key, auth: []};
|
|
|
+ } else if (!instance._inProceed && tag.name === 'inproceedings') {
|
|
|
+ const attribute = attr.parse(tag.attrs);
|
|
|
+ instance._inProceed = true;
|
|
|
+ instance._key = attribute.key;
|
|
|
+ instance._proceed[instance._key] = {key: instance._key, auth: []};
|
|
|
+ } else if (!instance._inAuth && tag.name === 'author') {
|
|
|
+ instance._inAuth = true;
|
|
|
+ } else if (!instance._inTitle && tag.name === 'title') {
|
|
|
+ instance._inTitle = true;
|
|
|
+ } else if (!instance._inYear && tag.name === 'year') {
|
|
|
+ instance._inYear = true;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
_closetag(tag) {
|
|
|
- if (instance._inTag && instance._tag === tag.name) {
|
|
|
- instance._inTag = false;
|
|
|
- // On ne garde que les groupes d'auteurs
|
|
|
- if (instance._auth.length > 1) {
|
|
|
- file.append(instance.dest, JSON.stringify(instance._auth) + '\n');
|
|
|
+ if (instance._inArticle && tag.name === 'article') {
|
|
|
+ // Regarde si l'auteur recherché est dans la liste
|
|
|
+ if (instance._article[instance._key]['auth'].indexOf(instance.auth) === -1) {
|
|
|
+ delete instance._article[instance._key];
|
|
|
}
|
|
|
- } else if (instance._isAuth && tag.name === 'author') {
|
|
|
- instance._isAuth = false;
|
|
|
+ // Reset
|
|
|
+ instance._inArticle = false;
|
|
|
+ instance._key = '';
|
|
|
+ } else if (instance._inProceed && tag.name === 'inproceedings') {
|
|
|
+ // Regarde si l'auteur recherché est dans la liste
|
|
|
+ if (instance._proceed[instance._key]['auth'].indexOf(instance.auth) === -1) {
|
|
|
+ delete instance._proceed[instance._key];
|
|
|
+ }
|
|
|
+ // Reset
|
|
|
+ instance._inProceed = false;
|
|
|
+ instance._key = '';
|
|
|
+ } else if (instance._inAuth && tag.name === 'author') {
|
|
|
+ instance._inAuth = false;
|
|
|
+ } else if (instance._inTitle && tag.name === 'title') {
|
|
|
+ instance._inTitle = false;
|
|
|
+ } else if (instance._inYear && tag.name === 'year') {
|
|
|
+ instance._inYear = false;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
_text(text) {
|
|
|
- if (instance._isAuth) {
|
|
|
- instance._auth.push(text.contents);
|
|
|
+ if (instance._inArticle) {
|
|
|
+ if (instance._inAuth) {
|
|
|
+ instance._article[instance._key]['auth'].push(text.contents);
|
|
|
+ } else if (instance._inTitle) {
|
|
|
+ instance._article[instance._key]['title'] = text.contents;
|
|
|
+ } else if (instance._inYear) {
|
|
|
+ instance._article[instance._key]['year'] = text.contents;
|
|
|
+ }
|
|
|
+ } else if (instance._inProceed) {
|
|
|
+ if (instance._inAuth) {
|
|
|
+ instance._proceed[instance._key]['auth'].push(text.contents);
|
|
|
+ } else if (instance._inTitle) {
|
|
|
+ instance._proceed[instance._key]['title'] = text.contents;
|
|
|
+ } else if (instance._inYear) {
|
|
|
+ instance._proceed[instance._key]['year'] = text.contents;
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
|
|
|
_finish() {
|
|
|
+ // Recup resultat
|
|
|
+ const result = {
|
|
|
+ article: instance._article,
|
|
|
+ proceed: instance._proceed,
|
|
|
+ coauth: []
|
|
|
+ };
|
|
|
+ // Recup des co-autheurs
|
|
|
+ for (let key in result.article) {
|
|
|
+ result.article[key].auth.forEach(elt => {
|
|
|
+ if (elt !== instance.auth && result.coauth.indexOf(elt) === -1) {
|
|
|
+ result.coauth.push(elt);
|
|
|
+ }
|
|
|
+ });
|
|
|
+ }
|
|
|
+ for (let key in result.proceed) {
|
|
|
+ result.proceed[key].auth.forEach(elt => {
|
|
|
+ if (elt !== instance.auth && result.coauth.indexOf(elt) === -1) {
|
|
|
+ result.coauth.push(elt);
|
|
|
+ }
|
|
|
+ });
|
|
|
+ }
|
|
|
+ // Appel du callback
|
|
|
if (instance.callback !== null) {
|
|
|
- instance.callback(instance.dest);
|
|
|
+ instance.callback(result);
|
|
|
}
|
|
|
}
|
|
|
|