2015-03-16

LZMA compressed SWF file vs 7z LZMA file

Lần này có việc phải decompress SWF file dùng LZMA, nên note lại cho nhớ.
Nếu sử dụng Python thì nên cài pylzma xong xài script tại đây
https://github.com/OpenGG/swfzip
Cần thiết thì chỉnh 1 chút xíu là dùng được.

Nếu muốn viết tool thay vì chạy script thì có thể dùng library của 7z. Về header của 2 loại file sẽ khác nhau chút đỉnh, nên khi dùng library 7z sẽ không decompress được file ZWS.

Chi tiết nó có ghi ở đây nhưng khá là khó xem (Adobe Flash)

Issue


When you decompress an LZMA-compressed SWF file using the ByteArray.uncompress method, an exception is thrown.

Solution


The LZMA header format Flash authoring uses does not match the header 7z format uses. You can convert the SWF file in the LZMA format into the 7z LZMA format by updating the header.
https://helpx.adobe.com/flash-player/kb/exception-thrown-you-decompress-lzma-compressed.html

Mình sẽ note lại cho kỹ header 2 thằng này như sau, cần phải điều chỉnh lại thông tin trước khi thực hiện decompress với 7z.

//-----------------------------------------------------------------
// SWF file LZMA header
//-----------------------------------------------------------------
// bytes 0-3: ZWS+version 
// bytes 4-7: Uncompressed length, the uncompressed length of the SWF data 
// (includes ZWS+version (4 bytes) and uncompressed length (4 bytes)) 
// bytes 8-11: Compressed length 
//      Compressed length does not include header (4+4+4 bytes) or lzma props (5 bytes)
//      Compressed length does include LZMA end marker (6 bytes)
// bytes 12-16: LZMAproperties 
// bytes 17-n: Compressed data
//
// 17 bytes = 12 bytes header + 5 bytes LZMA properties
//-----------------------------------------------------------------

//-----------------------------------------------------------------
// 7z LZMA header
//-----------------------------------------------------------------
// bytes 0-4: LZMA properties 
// bytes 5-12: Uncompressed length (take the 
// swf lzma length - 8 (don't include ZWS+version + uncompressed length))
// bytes 13-n: Compressed data            
//
// 13 bytes  = 8 bytes uncompressed length + 5 bytes LZMA properties
//-----------------------------------------------------------------

SevenZip.Compression.LZMA.Decoder coder = new SevenZip.Compression.LZMA.Decoder();

// Read the decoder properties (bytes 12-16)
byte[] properties = swfBytes.Skip(12).Take(5).ToArray();
// Calculate uncompressed length (bytes 4-7), subtract 8 (remove header bytes) long len = Le2Int(swfBytes, 4) - 8; coder.SetDecoderProperties(properties); byte[] decompressedData; using (Stream input = new MemoryStream(swfBytes.Skip(17).ToArray())) using (MemoryStream output = new MemoryStream()) { coder.Code(input, output, input.Length, len, null); decompressedData = output.ToArray(); }




Đoạn code chuyển từ int sang bytes, little endian

int Le2Int(byte[] data, int offset = 0)
{
    return (data[offset + 3] << 24)
            | (data[offset + 2] << 16)
            | (data[offset + 1] << 8)
            | data[offset];
}

byte[] Int2Le(int data)
{
    byte[] bytes = new byte[4];
    bytes[0] = (byte)data;
    bytes[1] = (byte)(((uint)data >> 8) & 0xFF);
    bytes[2] = (byte)(((uint)data >> 16) & 0xFF);
    bytes[3] = (byte)(((uint)data >> 24) & 0xFF);
    return bytes;
}
Với Python, unzip các file trong folder. Nếu unzip file xem trong swfzip


#!/usr/bin/python
#
# This script is inspired by jspiro's swf2lzma repo: https://github.com/jspiro/swf2lzma
#
#
# SWF Formats:
## ZWS(LZMA)
## | 4 bytes       | 4 bytes    | 4 bytes       | 5 bytes    | n bytes    | 6 bytes         |
## | 'ZWS'+version | scriptLen  | compressedLen | LZMA props | LZMA data  | LZMA end marker |
##
## scriptLen is the uncompressed length of the SWF data. Includes 4 bytes SWF header and
## 4 bytes for scriptLen itself
##
## compressedLen does not include header (4+4+4 bytes) or lzma props (5 bytes)
## compressedLen does include LZMA end marker (6 bytes)
#
import os
import pylzma
import sys
import struct
import zlib
import shutil

def check(test, msg):
    test or exit("Error: \n" + msg)
    
def debug(msg, level = "info"):
    print '%s : %s' %(level, msg)

def confirm(prompt, resp = False):
    """prompts for yes or no response from the user. Returns True for yes and
    False for no.

    'resp' should be set to the default value assumed by the caller when
    user simply types ENTER.

    >>> confirm(prompt='Create Directory?', resp=True)
    Create Directory? [y]|n:
    True
    >>> confirm(prompt='Create Directory?', resp=False)
    Create Directory? [n]|y:
    False
    >>> confirm(prompt='Create Directory?', resp=False)
    Create Directory? [n]|y: y
    True

    """

    if prompt is None:
        raise Exception('Not valid prompt')

    if resp:
        prompt = '%s %s/%s: ' % (prompt, 'Y', 'n')
    else:
        prompt = '%s %s/%s: ' % (prompt, 'N', 'y')

    while True:
        ans = raw_input(prompt)
        print ''
        if not ans:
            return resp
        if ans not in ['y', 'Y', 'n', 'N']:
            print 'please enter y or n.'
            continue
        if ans == 'y' or ans == 'Y':
            return True
        if ans == 'n' or ans == 'N':
            return False

def unzip(inData):
    if inData[0] == 'C':
        # zlib SWF
        debug('zlib compressed swf detected.')
        decompressData = zlib.decompress(inData[8:])
    elif inData[0] == 'Z':
        # lzma SWF
        debug('lzma compressed swf detected.')
        decompressData = pylzma.decompress(inData[12:])
    elif inData[0] == 'F':
        # uncompressed SWF
        debug('Uncompressed swf detected.')
        decompressData = inData[8:]
    else:
        #exit('not a SWF file')
        decompressData = []
        return decompressData

    sigSize = struct.unpack("<I", inData[4:8])[0]
    debug('Filesize in signature: %s' % sigSize)

    decompressSize = len(decompressData) +8
    debug('Filesize decompressed: %s' % decompressSize)

    check((sigSize == decompressSize), 'Length not correct, decompression failed')
    header = list(struct.unpack("<8B", inData[0:8]))
    header[0] = ord('F')

    debug('Generating uncompressed data')
    return struct.pack("<8B", *header) + decompressData

def zip(inData, compression):
    if(compression == 'lzma'):
        check((inData[0] != 'Z'), "already LZMA compressed")

        rawSwf = unzip(inData);

        debug('Compressing with lzma')
        compressData = pylzma.compress(rawSwf[8:], eos=1)
        # 5 accounts for lzma props

        compressSize = len(compressData) - 5

        header = list(struct.unpack("<12B", inData[0:12]))
        header[0]  = ord('Z')
        header[3]  = header[3]>=13 and header[3] or 13
        header[8]  = (compressSize)       & 0xFF
        header[9]  = (compressSize >> 8)  & 0xFF
        header[10] = (compressSize >> 16) & 0xFF
        header[11] = (compressSize >> 24) & 0xFF

        debug('Packing lzma header')
        headerBytes = struct.pack("<12B", *header);
    else:
        check((inData[0] != 'C'), "already zlib compressed")

        rawSwf = unzip(inData);

        debug('Compressing with zlib')
        compressData = zlib.compress(rawSwf[8:])

        compressSize = len(compressData)

        header = list(struct.unpack("<8B", inData[0:8]))
        header[0] = ord('C')
        header[3]  = header[3]>=6 and header[3] or 6

        debug('Packing zlib header')
        headerBytes = struct.pack("<8B", *header)

    debug('Generating compressed data')
    return headerBytes+compressData

def process(infile, outfile, operation='unzip', compression='zlib'):
    fi = open(infile, "rb")
    infileSize = os.path.getsize(infile)
    inData = fi.read()
    fi.close()
    
    debug('Reading ' + os.path.basename(infile) + ' ver ' + inData[0:3])

    #check((inData[1] == 'W') and (inData[2] == 'S'), "not a SWF file")
    if not ((inData[1] == 'W') and (inData[2] == 'S')):
        return

    if(operation=='unzip'):
        outData = unzip(inData)
        
        if len(outData) == 0:
            return
        
        increment = round(100.0 * len(outData) / infileSize) - 100
        print 'File decompressed, size increased: %d%%' % increment
    else:
        compression = compression == 'lzma' and 'lzma' or 'zlib'
        outData = zip(inData, compression)
        decrement = increment = 100 - round(100.0 * len(outData) / infileSize)
        print 'File compressed with %s, size decreased: %d%% %s' % (compression, decrement,
            decrement<0 and '\n\nNotice: Recompressing may cause filesize increased' or'')

    fo = open(outfile, 'wb')
    fo.write(outData)
    fo.close()

if __name__ == "__main__":
    currentDir = os.getcwd()
    if len(sys.argv) > 1:
        currentDir = sys.argv[1]

    files = []
    if os.path.isdir(currentDir):
        files = os.listdir(currentDir)
    else:
        # Arg is file path?
        files = [ currentDir ]
        currentDir = os.path.dirname(os.path.abspath(currentDir))

    outputDir = os.path.join(currentDir, "unzip")
    if not os.path.isdir(outputDir):
        os.mkdir(outputDir)

    scriptFile = os.path.basename(sys.argv[0])

    for inputFile in files:
        if os.path.isdir(inputFile):
            continue

        [name, ext] = os.path.splitext(os.path.basename(inputFile))
        outputFile = os.path.join(outputDir, name + "_unzip" + ext)
        inputFile = os.path.join(currentDir, os.path.basename(inputFile))
        process(inputFile, outputFile)

    sys.exit(0)