Stegano/exif/minimal_exif_reader.py

197 lines
7 KiB
Python

"""
This module offers one class, MinimalExifReader. Pass jpg filename
to the constructor. Will read minimal exif info from the file. Three
"public" functions available:
imageDescription()--returns Exif ImageDescription tag (0x010e) contents,
or '' if not found.
copyright()--returns Exif copyright tag (0x8298) contents, or '' if not
found.
dateTimeOriginal()--returns Exif DateTimeOriginal tag (0x9003) contents,
or '' if not found. If found, the trailing nul char
is stripped. This function also takes an optional
format string to apply time.strftime-style formatting
to the date time.
Brought to you by Megabyte Rodeo Software.
"""
# Written by Chris Stromberger, 10/2004. Public Domain.
# Much is owed to Thierry Bousch's exifdump.py:
# http://topo.math.u-psud.fr/~bousch/exifdump.py
#---------------------------------------------------------------------
class ExifFormatException(Exception):
pass
#---------------------------------------------------------------------
class MinimalExifReader:
IMAGE_DESCRIPTION_TAG = 0x010e
COPYRIGHT_TAG = 0x8298
EXIF_SUBIFD_TAG = 0x8769
DATE_TIME_ORIGINAL_TAG = 0x9003
#---------------------------------------
def __init__(self, filename):
"""Pass in jpg exif file name to process. Will attempt to find tags
of interest."""
self.tagsToFind = {self.IMAGE_DESCRIPTION_TAG:'',
self.COPYRIGHT_TAG:'',
self.DATE_TIME_ORIGINAL_TAG:''}
# Read first bit of file to see if exif file.
f = open(filename, 'rb')
firstTwoBytes = f.read(2)
if firstTwoBytes != '\xff\xd8':
f.close()
raise ExifFormatException("Missing SOI marker")
appMarker = f.read(2)
# See if there's an APP0 section, which sometimes appears.
if appMarker == '\xff\xe0':
#print "Skipping app0"
# Yes, we have app0. Skip over it.
app0DataLength = ord(f.read(1)) * 256 + ord(f.read(1))
app0 = f.read(app0DataLength - 2)
appMarker = f.read(2)
if appMarker != '\xff\xe1':
raise ExifFormatException("Can't find APP1 marker")
exifHeader = f.read(8)
#import binascii
#print binascii.hexlify(exifHeader)
if (exifHeader[2:6] != 'Exif' or
exifHeader[6:8] != '\x00\x00'):
f.close()
raise ExifFormatException("Malformed APP1")
app1DataLength = ord(exifHeader[0]) * 256 + ord(exifHeader[1])
#print app1DataLength
# Read exif info starting at the beginning of the self.tiff section.
# This is 8 bytes into the app1 section, so subtract 8 from
# app1 length.
self.tiff = f.read(app1DataLength - 8)
f.close()
self.endian = self.tiff[0]
if self.endian not in ('I', 'M'):
raise ExifFormatException("Invalid endianess found: %s" % self.endian)
# Now navigate to the items of interest and get them.
ifdStart = self.getValueAtLocation(4, 4)
self.ifdSearch(ifdStart)
#---------------------------------------
def imageDescription(self):
"""Return image description tag contents or '' if not found."""
return self.tagsToFind[self.IMAGE_DESCRIPTION_TAG].strip('\x20\x00')
#---------------------------------------
def copyright(self):
"""Return copyright tag contents or '' if not found."""
return self.tagsToFind[self.COPYRIGHT_TAG].strip('\x20\x00')
#---------------------------------------
def dateTimeOriginal(self, formatString = None):
"""Pass in optional format string to get time.strftime style formatting,
else get default exif format for date time string (without trailing nul).
Returns '' if tag not found."""
# The datetime should end in nul, get rid of it.
if formatString is None or not self.tagsToFind[self.DATE_TIME_ORIGINAL_TAG]:
return self.tagsToFind[self.DATE_TIME_ORIGINAL_TAG].strip('\x20\x00')
else:
# This will only work if the datetime string is in the standard exif format (i.e., hasn't been altered).
try:
import time
return time.strftime(formatString, time.strptime(self.tagsToFind[self.DATE_TIME_ORIGINAL_TAG].strip('\x20\x00'), '%Y:%m:%d %H:%M:%S'))
except:
return self.tagsToFind[self.DATE_TIME_ORIGINAL_TAG].strip('\x20\x00')
#---------------------------------------
def ifdSearch(self, ifdStart):
numIfdEntries = self.getValueAtLocation(ifdStart, 2)
tagsStart = ifdStart + 2
for entryNum in range(numIfdEntries):
# For my purposes, all files will have either no tags, or
# only our tags of interest, so no need to waste time trying to
# break out of the loop early.
thisTagStart = tagsStart + 12 * entryNum
tagId = self.getValueAtLocation(thisTagStart, 2)
if tagId == self.EXIF_SUBIFD_TAG:
# This is a special tag that points to another ifd. Our
# date time original tag is in the sub ifd.
self.ifdSearch(self.getTagValue(thisTagStart))
elif tagId in self.tagsToFind:
assert(not self.tagsToFind[tagId])
self.tagsToFind[tagId] = self.getTagValue(thisTagStart)
#---------------------------------------
def getValueAtLocation(self, offset, length):
slice = self.tiff[offset:offset + length]
if self.endian == 'I':
val = self.s2n_intel(slice)
else:
val = self.s2n_motorola(slice)
return val
#---------------------------------------
def s2n_motorola(self, str):
x = 0
for c in str:
x = (x << 8) | ord(c)
return x
#---------------------------------------
def s2n_intel(self, str):
x = 0
y = 0
for c in str:
x = x | (ord(c) << y)
y = y + 8
return x
#---------------------------------------
def getTagValue(self, thisTagStart):
datatype = self.getValueAtLocation(thisTagStart + 2, 2)
numBytes = [ 1, 1, 2, 4, 8, 1, 1, 2, 4, 8 ] [datatype-1] * self.getValueAtLocation(thisTagStart + 4, 4)
if numBytes > 4:
offsetToValue = self.getValueAtLocation(thisTagStart + 8, 4)
return self.tiff[offsetToValue:offsetToValue + numBytes]
else:
if datatype == 2 or datatype == 1 or datatype == 7:
return self.tiff[thisTagStart + 8:thisTagStart + 8 + numBytes]
else:
return self.getValueAtLocation(thisTagStart + 8, numBytes)
#---------------------------------------
def __str__(self):
return str(self.tagsToFind)
#---------------------------------------------------------------------
if __name__ == '__main__':
import sys
if len(sys.argv) == 1:
print "Pass jpgs to process."
sys.exit(1)
for filename in sys.argv[1:]:
try:
f = MinimalExifReader(filename)
print filename
print "description: '%s'" % f.imageDescription()
print "copyright: '%s'" % f.copyright()
print "dateTimeOriginal: '%s'" % f.dateTimeOriginal()
print "dateTimeOriginal: '%s'" % f.dateTimeOriginal('%B %d, %Y %I:%M:%S %p')
print
except ExifFormatException, ex:
sys.stderr.write("Exif format error: %s\n" % ex)
except:
sys.stderr.write("Unable to process %s\n" % filename)