Skip to content

Commit c8e893b

Browse files
authored
Add stringbyte function (#48)
Add stringbyte function
1 parent f07b9d7 commit c8e893b

3 files changed

Lines changed: 53 additions & 4 deletions

File tree

Project.toml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
11
name = "BioSymbols"
22
uuid = "3c28c6f8-a34d-59c4-9654-267d177fcfa9"
33
authors = ["Ben J. Ward <benjward@protonmail.com>"]
4-
version = "5.0.0"
5-
6-
[deps]
4+
version = "5.1.0"
75

86
[compat]
97
julia = "1"

src/BioSymbols.jl

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,8 @@ export
116116
compatbits,
117117
alphabet,
118118
encoded_data,
119-
encode
119+
encode,
120+
stringbyte
120121

121122

122123
"""
@@ -146,6 +147,42 @@ Base.broadcastable(x::BioSymbol) = (x,)
146147
include("nucleicacid.jl")
147148
include("aminoacid.jl")
148149

150+
# Less efficient fallback. Should only be called for symbols of AsciiAlphabet
151+
"""
152+
stringbyte(::BioSymbol)::UInt8
153+
154+
For biosymbol types that can be represented as ASCII characters, `stringbyte(x)`
155+
returns the printable ASCII byte that represents the character in a string.
156+
157+
# Examples
158+
```julia
159+
julia> stringbyte(DNA_A) == UInt8('A')
160+
true
161+
162+
julia> stringbyte(AA_Gap) == UInt8('-')
163+
true
164+
```
165+
"""
166+
function stringbyte end
167+
168+
# Create a lookup table from biosymbol to the UInt8 for the character that would
169+
# represent it in a string, e.g. DNA_G -> UInt8('G')
170+
for alphabettype in ("DNA", "RNA", "AminoAcid")
171+
tablename = Symbol(uppercase(alphabettype), "_TO_BYTE")
172+
typ = Symbol(alphabettype)
173+
@eval begin
174+
const $(tablename) = let
175+
alph = alphabet($(typ))
176+
bytes = zeros(UInt8, length(alph))
177+
@inbounds for letter in alph
178+
bytes[reinterpret(UInt8, letter) + 1] = UInt8(Char(letter))
179+
end
180+
Tuple(bytes)
181+
end
182+
stringbyte(x::$(typ)) = @inbounds $(tablename)[reinterpret(UInt8, x) + 1]
183+
end
184+
end
185+
149186
"""
150187
isgap(symbol::BioSymbol)
151188

test/runtests.jl

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,14 @@ end
8989
@test encoded_data(RNA_B) === 0b1110
9090
@test encoded_data(RNA_N) === 0b1111
9191
end
92+
93+
@testset "stringbyte" begin
94+
for T in (DNA, RNA)
95+
@test all(alphabet(DNA)) do i
96+
UInt8(Char(i)) == stringbyte(i)
97+
end
98+
end
99+
end
92100
end
93101

94102
@testset "Char" begin
@@ -409,6 +417,12 @@ end
409417
@test_throws InexactError convert(AminoAcid, '')
410418
end
411419

420+
@testset "stringbyte" begin
421+
@test all(alphabet(AminoAcid)) do i
422+
UInt8(Char(i)) == stringbyte(i)
423+
end
424+
end
425+
412426
@testset "isvalid" begin
413427
for aa in alphabet(AminoAcid)
414428
@test isvalid(aa)

0 commit comments

Comments
 (0)