@@ -191,12 +191,12 @@ def list(ids, raw=False, threads=None, data_dir=None, debug=False):
191191 ... subtaxa = [t for t in tree.traverse]
192192 ... print(f'Top level result: {taxon.name} ({taxon.taxid}); {len(subtaxa)} related taxa')
193193 ...
194- Top level result: Solenopsis (13685); 198 related taxa
195- Top level result: Bos (9903); 26 related taxa
194+ Top level result: Solenopsis (13685); 293 related taxa
195+ Top level result: Bos (9903); 27 related taxa
196196 >>> subtaxa[0]
197197 BasicTaxon(taxid=9904, rank='species', name='Bos gaurus')
198198 >>> pytaxonkit.list([9605], raw=True)
199- {'9605 [genus] Homo': {'9606 [species] Homo sapiens': {'63221 [subspecies] Homo sapiens neanderthalensis': {}, "741158 [subspecies] Homo sapiens subsp. 'Denisova'": {}, '2665952 [no rank] environmental samples': {'2665953 [species] Homo sapiens environmental sample': {}}}, '1425170 [species] Homo heidelbergensis': { }}}
199+ {'9605 [genus] Homo': {'9606 [species] Homo sapiens': {'63221 [subspecies] Homo sapiens neanderthalensis': {}, "741158 [subspecies] Homo sapiens subsp. 'Denisova'": {}} , '1425170 [species] Homo heidelbergensis': {}, ' 2665952 [no rank] environmental samples': {'2665953 [species] Homo sapiens environmental sample': {}}}}
200200 ''' # noqa: E501
201201 idlist = ',' .join (map (str , ids ))
202202 arglist = ['taxonkit' , 'list' , '--json' , '--show-name' , '--show-rank' , '--ids' , idlist ]
@@ -298,14 +298,18 @@ def lineage(ids, formatstr=None, threads=None, data_dir=None, debug=False):
298298
299299 Examples
300300 --------
301+ >>> import pandas
301302 >>> import pytaxonkit
302- >>> result = pytaxonkit.lineage([7399, 1973489 ])
303+ >>> result = pytaxonkit.lineage([1325911, 1649473, 1401311 ])
303304 >>> result.columns
304- Index(['TaxID', 'Code', 'Lineage', 'LineageTaxIDs', 'Rank', 'FullLineage', 'FullLineageTaxIDs'], dtype='object')
305+ Index(['TaxID', 'Code', 'Name', 'Lineage', 'LineageTaxIDs', 'Rank',
306+ 'FullLineage', 'FullLineageTaxIDs'],
307+ dtype='object')
305308 >>> result[['TaxID', 'Lineage', 'LineageTaxIDs']]
306- TaxID Lineage LineageTaxIDs
307- 0 7399 Eukaryota;Arthropoda;Insecta;Hymenoptera;;; 2759;6656;50557;7399;;;
308- 1 1973489 Bacteria;Firmicutes;Bacilli;Bacillales;Bacillaceae;Bacillus;Bacillus sp. ISSFR-25F 2;1239;91061;1385;186817;1386;1973489
309+ TaxID Lineage LineageTaxIDs
310+ 0 1325911 Eukaryota;Arthropoda;Insecta;Hymenoptera;Eucharitidae;Pogonocharis; 2759;6656;50557;7399;216140;1325911;
311+ 1 1649473 Bacteria;Bacteroidetes;Cytophagia;Cytophagales;Cytophagaceae;Nibrella; 2;976;768503;768507;89373;1649473;
312+ 2 1401311 Eukaryota;Arthropoda;Insecta;Coleoptera;Staphylinidae;Styngetus; 2759;6656;50557;7041;29026;1401311;
309313 >>> result = pytaxonkit.lineage(['1382510', '929505', '390333'], formatstr='{f};{g};{s};{S}')
310314 >>> result[['TaxID', 'Lineage', 'LineageTaxIDs']]
311315 TaxID Lineage LineageTaxIDs
@@ -314,13 +318,16 @@ def lineage(ids, formatstr=None, threads=None, data_dir=None, debug=False):
314318 2 390333 Lactobacillaceae;Lactobacillus;Lactobacillus delbrueckii;Lactobacillus delbrueckii subsp. bulgaricus 33958;1578;1584;1585
315319 ''' # noqa: E501
316320 idlist = '\n ' .join (map (str , ids ))
317- arglist = ['taxonkit' , 'lineage' , '--show-lineage-taxids' , '--show-rank' , '--show-status-code' ]
321+ arglist = [
322+ 'taxonkit' , 'lineage' , '--show-lineage-taxids' , '--show-rank' , '--show-status-code' ,
323+ '--show-name'
324+ ]
318325 if threads :
319326 arglist .extend (('--threads' , validate_threads (threads )))
320327 if data_dir :
321328 arglist .extend (('--data-dir' , validate_data_dir (data_dir ))) # pragma: no cover
322329 if debug :
323- log (* arglist ) # pragma: no cover
330+ log (* arglist )
324331 with NamedTemporaryFile (suffix = '-lineage.txt' ) as lineagefile :
325332 proc = Popen (arglist , stdin = PIPE , stdout = lineagefile , stderr = PIPE , universal_newlines = True )
326333 out , err = proc .communicate (input = idlist )
@@ -346,10 +353,12 @@ def lineage(ids, formatstr=None, threads=None, data_dir=None, debug=False):
346353 if proc .returncode != 0 :
347354 raise TaxonKitCLIError (err ) # pragma: no cover
348355 columnorderin = [
349- 'TaxID' , 'Code' , 'FullLineage' , 'FullLineageTaxIDs' , 'Rank' , 'Lineage' , 'LineageTaxIDs'
356+ 'TaxID' , 'Code' , 'FullLineage' , 'FullLineageTaxIDs' , 'Name' , 'Rank' , 'Lineage' ,
357+ 'LineageTaxIDs'
350358 ]
351359 columnorderout = [
352- 'TaxID' , 'Code' , 'Lineage' , 'LineageTaxIDs' , 'Rank' , 'FullLineage' , 'FullLineageTaxIDs'
360+ 'TaxID' , 'Code' , 'Name' , 'Lineage' , 'LineageTaxIDs' , 'Rank' , 'FullLineage' ,
361+ 'FullLineageTaxIDs'
353362 ]
354363 data = pandas .read_csv (
355364 StringIO (out ), sep = '\t ' , header = None , names = columnorderin , index_col = False
@@ -358,26 +367,69 @@ def lineage(ids, formatstr=None, threads=None, data_dir=None, debug=False):
358367 return data
359368
360369
370+ def name (ids , data_dir = None , debug = False ):
371+ '''rapid taxon name retrieval
372+
373+ Uses the `--no-linage` option in `taxonkit lineage` for rapid retrieval of taxon names.
374+
375+ Parameters
376+ ----------
377+ ids : list or iterable
378+ A list of taxids (ints or strings are ok)
379+ data_dir : str, default None
380+ Specify the location of the NCBI taxonomy `.dmp` files; by default, taxonkit searches in
381+ `~/.taxonkit/`
382+ debug : bool, default False
383+ Print debugging output, e.g., system calls to `taxonkit`
384+
385+ Returns
386+ -------
387+ DataFrame
388+ A two-dimensional data structure with TaxIDs and taxon names.
389+
390+ Examples
391+ --------
392+ >>> import pytaxonkit
393+ >>> name(['151837', '2216222', '517824'])
394+ TaxID Name
395+ 0 151837 Hiraea smilacina
396+ 1 2216222 Paramyia sp. BIOUG21706-A10
397+ 2 517824 soil bacterium Cipr-S1N-M1LLLSSL-1
398+ '''
399+ idlist = '\n ' .join (map (str , ids ))
400+ arglist = ['taxonkit' , 'lineage' , '--show-name' , '--no-lineage' ]
401+ if data_dir :
402+ arglist .extend (('--data-dir' , validate_data_dir (data_dir ))) # pragma: no cover
403+ if debug :
404+ log (* arglist )
405+ proc = Popen (arglist , stdin = PIPE , stdout = PIPE , stderr = PIPE , universal_newlines = True )
406+ out , err = proc .communicate (input = idlist )
407+ data = pandas .read_csv (
408+ StringIO (out ), sep = '\t ' , header = None , names = ['TaxID' , 'Name' ], index_col = False
409+ )
410+ return data
411+
412+
361413def test_lineage (capsys ):
362- result = lineage (['446045 ' , '265720' , '2507530' , '106649' ], debug = True )
363- assert result .TaxID .equals (pandas .Series ([446045 , 265720 , 2507530 , 106649 ]))
364- assert result .Code .equals (pandas .Series ([446045 , 265720 , 2507530 , 106649 ]))
414+ result = lineage (['1082657 ' , '265720' , '2507530' , '106649' ], debug = True )
415+ assert result .TaxID .equals (pandas .Series ([1082657 , 265720 , 2507530 , 106649 ]))
416+ assert result .Code .equals (pandas .Series ([1082657 , 265720 , 2507530 , 106649 ]))
365417 assert result .Lineage .equals (pandas .Series ([
366- 'Eukaryota;Arthropoda;Insecta;Diptera;Drosophilidae;Drosophila; ' ,
418+ 'Eukaryota;Discosea;;Longamoebia;Acanthamoebidae;Acanthamoeba;Acanthamoeba sp. TW95 ' ,
367419 'Bacteria;Bacteroidetes;Bacteroidia;Bacteroidales;Porphyromonadaceae;Porphyromonas;'
368420 'Porphyromonas genomosp. P3' ,
369421 'Eukaryota;Basidiomycota;Agaricomycetes;Russulales;Russulaceae;Russula;Russula species' ,
370422 'Bacteria;Proteobacteria;Gammaproteobacteria;Pseudomonadales;Moraxellaceae;Acinetobacter;'
371423 'Acinetobacter guillouiae' ,
372424 ]))
373425 assert result .LineageTaxIDs .equals (pandas .Series ([
374- '2759;6656;50557;7147;7214;7215; ' ,
426+ '2759;555280;;1485168;33677;5754;1082657 ' ,
375427 '2;976;200643;171549;171551;836;265720' ,
376428 '2759;5204;155619;452342;5401;5402;2507520' ,
377429 '2;1224;1236;72274;468;469;106649' ,
378430 ]))
379431 assert result .Rank .equals (pandas .Series ([
380- 'no rank ' , 'species' , 'subspecies' , 'species'
432+ 'species ' , 'species' , 'subspecies' , 'species'
381433 ]))
382434
383435 out , err = capsys .readouterr ()
@@ -393,6 +445,22 @@ def test_lineage_threads():
393445 )
394446
395447
448+ def test_lineage_name ():
449+ result = lineage (['526061' ])
450+ assert result .Name .iloc [0 ] == 'Henosepilachna sp. AGBA-2008'
451+
452+
453+ def test_name_debug (capsys ):
454+ result = name ([207661 , 1353792 , 1597281 ], debug = True )
455+ assert result .Name .equals (pandas .Series ([
456+ 'Ahnfeltiopsis concinna' ,
457+ 'Picobirnavirus turkey/USA-1512/2010' ,
458+ 'Isopoda sp. NZAC 03013534' ,
459+ ]))
460+ out , err = capsys .readouterr ()
461+ assert 'taxonkit lineage --show-name --no-lineage' in err
462+
463+
396464# -------------------------------------------------------------------------------------------------
397465# taxonkit name2taxid
398466# -------------------------------------------------------------------------------------------------
@@ -473,4 +541,4 @@ def test_name2taxid(capsys):
473541
474542def test_name2taxid_threads ():
475543 result = name2taxid (['FCB group' ], threads = '1' )
476- assert str (result ) == ' Name TaxID Rank\n 0 FCB group 1783270 no rank '
544+ assert str (result ) == ' Name TaxID Rank\n 0 FCB group 1783270 clade '
0 commit comments