Search
Takes a NCBI database string and a optional search term and returns a stream of objects found:
ncbi.search('sra', 'solenopsis').on('data', console.log)
=> { uid: '280116',
expxml: {"Summary":{"Title":"Single Solenopsis invicta male","Platform":{"_":"ILLUMINA", [...],
runs: {"Run":[{"acc":"SRR620577","total_spots":"23699662","total_bases":"4787331724", [...],
extlinks: ' ',
createdate: '2013/02/07',
updatedate: '2012/11/28' }
=> { uid: '280243',
expxml: {"Summary":{"Title":"Illumina small-insert paired end","Platform":{"_":"ILLUMINA", [...],
runs: {"Run":[{"acc":"SRR621118","total_spots":"343209818","total_bases":"34320981800", [...],
extlinks: ' ',
createdate: '2013/02/07,
updatedate: '2012/11/28' }
=> [...]
Arguments can be passed as an object instead:
ncbi.search({ db: 'sra', term: 'solenopsis' })
.on('data', console.log)
Advanced options can be passed using the previous syntax:
var options = {
db: 'assembly',
term: 'human',
limit: 500,
throughput: 100
}
The search term can also be passed with write:
var search = ncbi.search('sra').on('data', console.log)
search.write('solenopsis')
Or piped, for example, from a file:
var split = require('split')
fs.createReadStream('searchTerms.txt')
.pipe(split())
.pipe(search)
ncbi.search = function (db, term, cb) {
insight.track('ncbi', 'search')
var opts = typeof db === 'string' ? { db: db, term: term } : db
cb = typeof term === 'function' ? term : cb
var stream = pumpify.obj(
createAPISearchUrl(opts.db, opts.term),
requestStream(true),
createAPIPaginateURL(opts),
requestStream(true),
createAPIDataUrl(),
fetchByID(opts.db)
)
if (opts.term) { stream.write(opts.term); stream.end() }
if (cb) { stream.pipe(concat(cb)) } else { return stream }
}
function createAPISearchUrl (db, term) {
var stream = through.obj(transform)
return stream
function transform (obj, enc, next) {
var query = [
APIROOT + 'esearch.fcgi?',
DEFAULTS,
'db=' + db,
'term=' + encodeURI(obj.toString().replace(/['"]+/g, '')),
'usehistory=y'
].join('&')
debug('esearch request', query)
this.push(query)
next()
}
}
function createAPIPaginateURL (opts) {
var throughput = opts.throughput || RETURNMAX
if (opts.limit < throughput) { throughput = opts.limit }
var stream = through.obj(transform)
return stream
function transform (obj, enc, next) {
var esearchRes = obj.body.esearchresult
if (esearchRes === undefined ||
esearchRes.webenv === undefined ||
esearchRes.count === undefined) {
var msg = 'NCBI returned invalid results, this could be a temporary' +
' issue with NCBI servers.\nRequest URL: ' + obj.url
this.emit('error', new Error(msg))
return next()
}
var count = opts.limit || esearchRes.count
if (parseInt(esearchRes.count, 10) === 1) {
this.push(obj.url)
return next()
}
var urlQuery = URL.parse(obj.url, true).query
var numRequests = Math.ceil(count / throughput)
for (var i = 0; i < numRequests; i++) {
var retstart = i * throughput
var query = [
APIROOT + 'esearch.fcgi?',
DEFAULTS,
'db=' + urlQuery.db,
'term=' + urlQuery.term,
'query_key=1',
'WebEnv=' + esearchRes.webenv,
'retmax=' + throughput,
'retstart=' + retstart
].join('&')
debug('paginate request', query)
this.push(query)
}
next()
}
}
function createAPIDataUrl () {
var stream = through.obj(transform)
return stream
function transform (obj, enc, next) {
var idsChunkLen = 50
var idlist = obj.body.esearchresult.idlist
if (!idlist || idlist.length === 0) { return next() }
for (var i = 0; i < idlist.length; i += idsChunkLen) {
var idsChunk = idlist.slice(i, i + idsChunkLen)
var urlQuery = URL.parse(obj.url, true).query
var query = [
APIROOT + 'esummary.fcgi?',
DEFAULTS,
'db=' + urlQuery.db,
'id=' + idsChunk.join(','),
'usehistory=y'
].join('&')
debug('esummary request', query)
this.push(query)
}
next()
}
}
function fetchByID (db) {
var xmlProperties = XMLPROPERTIES[db] || through.obj()
var lastStream = LASTSTREAM[db] || through.obj
var stream = pumpify.obj(
requestStream(true),
tool.extractProperty('body.result'),
tool.deleteProperty('uids'),
tool.arraySplit(),
tool.XMLToJSProperties(xmlProperties),
lastStream()
)
return stream
}