1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156
|
read.abif <- function(filename, max.bytes.in.file = file.info(filename)$size,
pied.de.pilote = 1.2, verbose = FALSE){
#
# Suppress warnings when reading strings with internal nul character:
#
RTC <- function(x, ...) suppressWarnings(rawToChar(x, ...))
#
# Define some shortcuts:
#
SInt32 <- function(f, ...) readBin(f, what = "integer", signed = TRUE, endian = "big", size = 4, ...)
SInt16 <- function(f, ...) readBin(f, what = "integer", signed = TRUE, endian = "big", size = 2, ...)
SInt8 <- function(f, ...) readBin(f, what = "integer", signed = TRUE, endian = "big", size = 1, ...)
UInt32 <- function(f, ...) readBin(f, what = "integer", signed = FALSE, endian = "big", size = 4, ...)
UInt16 <- function(f, ...) readBin(f, what = "integer", signed = FALSE, endian = "big", size = 2, ...)
UInt8 <- function(f, ...) readBin(f, what = "integer", signed = FALSE, endian = "big", size = 1, ...)
f32 <- function(f, ...) readBin(f, what = "numeric", size = 4,endian="little", ...)
f64 <- function(f, ...) readBin(f, what = "numeric", size = 8, endian="little",...)
#
# Load raw data in memory:
#
fc <- file(filename, open = "rb")
rawdata <- readBin(fc, what = "raw", n = pied.de.pilote*max.bytes.in.file,endian="little")
if(verbose) print(paste("number of bytes in file", filename, "is", length(rawdata)))
close(fc)
#
# Make a list to store results:
#
res <- list(Header = NULL, Directory = NA, Data = NA)
#
# Header section is 128 bytes long, located at a fixed position at the
# beginning of the file. We essentially need the number of item and dataoffset
#
res$Header$abif <- RTC(rawdata[1:4])
if(res$Header$abif != "ABIF") stop("file not in ABIF format")
if(verbose) print("OK: File is in ABIF format")
res$Header$version <- SInt16(rawdata[5:6])
if(verbose) print(paste("File in ABIF version", res$Header$version/100))
res$Header$DirEntry.name <- rawdata[7:10]
if(verbose) print(paste("DirEntry name: ", RTC(res$Header$DirEntry.name)))
res$Header$DirEntry.number <- SInt32(rawdata[11:14])
if(verbose) print(paste("DirEntry number: ", res$Header$DirEntry.number))
res$Header$DirEntry.elementtype <- SInt16(rawdata[15:16])
if(verbose) print(paste("DirEntry elementtype: ", res$Header$DirEntry.elementtype))
res$Header$DirEntry.elementsize <- SInt16(rawdata[17:18])
if(verbose) print(paste("DirEntry elementsize: ", res$Header$DirEntry.elementsize))
# This one is important:
res$Header$numelements <- SInt32(rawdata[19:22])
if(verbose) print(paste("DirEntry numelements: ", res$Header$numelements))
# This one is important too:
res$Header$dataoffset <- SInt32(rawdata[27:30])
if(verbose) print(paste("DirEntry dataoffset: ", res$Header$dataoffset))
dataoffset <- res$Header$dataoffset + 1 # start position is 1 in R vectors
res$Header$datahandle <- SInt32(rawdata[31:34])
if(verbose) print(paste("DirEntry datahandle: ", res$Header$datahandle))
res$Header$unused <- SInt16(rawdata[35:128], n = 47)
# Should be ingnored and set to zero
res$Header$unused[1:length(res$Header$unused)] <- 0
if(verbose) print(paste("DirEntry unused: ", length(res$Header$unused), "2-byte integers"))
#
# The directory is located at the offset specified in the header,
# and consist of an array of directory entries.
# We scan the directory to put values in a data.frame:
#
dirdf <- data.frame(list(name = character(0)))
dirdf$name <- as.character(dirdf$name) # force to characters
for(i in seq_len(res$Header$numelements)){
deb <- (i-1)*res$Header$DirEntry.elementsize + dataoffset
direntry <- rawdata[deb:(deb + res$Header$DirEntry.elementsize)]
dirdf[i, "name"] <- RTC(direntry[1:4])
dirdf[i, "tagnumber"] <- SInt32(direntry[5:8])
dirdf[i, "elementtype"] <- SInt16(direntry[9:10])
dirdf[i, "elementsize"] <- SInt16(direntry[11:12])
dirdf[i, "numelements"] <- SInt32(direntry[13:16])
dirdf[i, "datasize"] <- SInt32(direntry[17:20])
dirdf[i, "dataoffset"] <- SInt32(direntry[21:24])
}
if(verbose){
print("Element found:")
print(dirdf$name)
}
#
# Save Directory and make a list to store data:
#
res$Directory <- dirdf
res$Data <- vector("list", nrow(dirdf))
names(res$Data) <- paste(dirdf$name, dirdf$tagnumber, sep = ".")
#
# Data extraction:
#
for(i in seq_len(res$Header$numelements)){
deb <- (i-1)*res$Header$DirEntry.elementsize + dataoffset
# Short data are stored in dataoffset directly:
if(dirdf[i, "datasize"] > 4){
debinraw <- dirdf[i, "dataoffset"] + 1
} else {
debinraw <- deb + 20
}
elementtype <- dirdf[i, "elementtype"]
numelements <- dirdf[i, "numelements"]
elementsize <- dirdf[i, "elementsize"]
data <- rawdata[debinraw:(debinraw + numelements*elementsize)]
# unsigned 8 bits integer:
if(elementtype == 1) res$Data[[i]] <- UInt8(data, n = numelements)
# char or signed 8 bits integer
if(elementtype == 2){
res$Data[[i]] <- tryCatch(RTC(data),finally=paste(rawToChar(data,multiple=TRUE),collapse=""),error=function(er){cat(paste("an error was detected with the following message:",er," but this error was fixed\n",sep=" "))})
}
# unsigned 16 bits integer:
if(elementtype == 3) res$Data[[i]] <- UInt16(data, n = numelements)
# short:
if(elementtype == 4) res$Data[[i]] <- SInt16(data, n = numelements)
# long:
if(elementtype == 5) res$Data[[i]] <- SInt32(data, n = numelements)
# float:
if(elementtype == 7) res$Data[[i]] <- f32(data, n = numelements)
# double:
if(elementtype == 8) res$Data[[i]] <- f64(data, n = numelements)
# date:
if(elementtype == 10)
res$Data[[i]] <- list(year = SInt16(data, n = 1),
month = UInt8(data[-(1:2)], n = 1), day = UInt8(data[-(1:3)], n = 1))
# time:
if(elementtype == 11)
res$Data[[i]] <- list(hour = UInt8(data, n = 1),
minute = UInt8(data[-1], n = 1), second = UInt8(data[-(1:2)], n = 1),
hsecond = UInt8(data[-(1:3)], n = 1))
# pString:
if(elementtype == 18){
n <- SInt8(rawdata[debinraw])
pString <- RTC(rawdata[(debinraw+1):(debinraw+n)])
res$Data[[i]] <- pString
}
# cString:
if(elementtype == 19) res$Data[[i]] <- RTC(data[1:(length(data) - 1) ])
# user:
if(elementtype >= 1024) res$Data[[i]] <- data
# legacy:
if(elementtype %in% c(12, 13))
warning("unimplemented legacy type found in file")
if(elementtype %in% c(6, 9, 14, 15, 16, 17, 20, 128, 256, 384))
warning("unsupported legacy type found in file")
}
return(res)
}
|