Skip to Content


!pip install pycuda

 

import numpy as np

import pycuda.autoinit

import pycuda.driver as cuda

from pycuda.compiler import SourceModule

import time

 

# CUDA kernel for frequency counting

freq_kernel_code = """

__global__ void computeFrequencies(unsigned char *input, int *frequencies, int size) {

    int idx = blockIdx.x * blockDim.x + threadIdx.x;

    if (idx < size) {

        atomicAdd(&frequencies[input[idx]], 1);

    }

}

"""

 

# CUDA kernel for prefix sum (scan) to build Huffman tree

scan_kernel_code = """

__global__ void prefixSum(int *input, int *output, int size) {

    int idx = blockIdx.x * blockDim.x + threadIdx.x;

    if (idx < size) {

        output[idx] = input[idx];

        for (int i = 1; i <= idx; i++) {

            output[idx] += input[i-1];

        }

    }

}

"""

 

class HuffmanNode:

    def __init__(self, char, freq):

        self.char = char

        self.freq = freq

        self.left = None

        self.right = None

 

def build_huffman_tree(frequencies):

    nodes = [HuffmanNode(i, freq) for i, freq in enumerate(frequencies) if freq > 0]

    while len(nodes) > 1:

        nodes.sort(key=lambda x: x.freq)

        left = nodes.pop(0)

        right = nodes.pop(0)

        parent = HuffmanNode(None, left.freq + right.freq)

        parent.left = left

        parent.right = right

        nodes.append(parent)

    return nodes[0] if nodes else None

 

def generate_codes(root, current_code="", codes=None):

    if codes is None:

        codes = {}

    if root is None:

        return codes

    if root.char is not None:

        codes[root.char] = current_code or "0"

    generate_codes(root.left, current_code + "0", codes)

    generate_codes(root.right, current_code + "1", codes)

    return codes

 

def huffman_encode_gpu(input_string):

    # Convert input to numpy array

    input_array = np.frombuffer(input_string.encode(), dtype=np.uint8)

    input_size = len(input_array)

    # Allocate GPU memory

    input_gpu = cuda.mem_alloc(input_array.nbytes)

    frequencies_gpu = cuda.mem_alloc(256 * 4)  # 256 possible ASCII chars

    frequencies = np.zeros(256, dtype=np.int32)

    # Copy input to GPU

    cuda.memcpy_htod(input_gpu, input_array)

    cuda.memcpy_htod(frequencies_gpu, frequencies)

    # Compile and run frequency kernel

    mod = SourceModule(freq_kernel_code)

    compute_frequencies = mod.get_function("computeFrequencies")

    block_size = 256

    grid_size = (input_size + block_size - 1) // block_size

    compute_frequencies(input_gpu, frequencies_gpu, np.int32(input_size),

                       block=(block_size, 1, 1), grid=(grid_size, 1))

    # Copy frequencies back to host

    cuda.memcpy_dtoh(frequencies, frequencies_gpu)

    # Build Huffman tree and generate codes

    root = build_huffman_tree(frequencies)

    codes = generate_codes(root)

    # Encode the input

    encoded = ""

    for char in input_string:

        encoded += codes[ord(char)]

    return encoded, codes

 

# Example usage

if __name__ == "__main__":

    input_string = "hello world this is a test string for huffman encoding"

    start_time = time.time()

    encoded, codes = huffman_encode_gpu(input_string)

    print(f"Original string: {input_string}")

    print(f"Encoded string: {encoded}")

    print(f"Codes: {codes}")

    print(f"Time taken: {time.time() - start_time:.4f} seconds")

 

Output:

Original string: hello world this is a test string for huffman encoding

Encoded string: 01010011011001101010111001011010011101101101111111000101100010111111000101111111010111110000111011110011110111100011110001001000011101001010011111101010010001000100000111101010011110011100100010101011011100010010000

Codes: {103: '0000', 99: '00010', 109: '00011', 117: '00100', 119: '00101', 101: '0011', 102: '0100', 104: '0101', 108: '0110', 114: '0111', 105: '1000', 110: '1001', 111: '1010', 115: '1011', 116: '1100', 97: '11010', 100: '11011', 32: '111'}

Time taken: 1.0239 seconds

 

Huffman mini-project:

# Install PyCUDA (only needed in Colab or initial setup)

!pip install pycuda


# Import necessary modules

import numpy as np                               # NumPy for numerical operations

import pycuda.autoinit                           # Automatically initializes CUDA driver

import pycuda.driver as cuda                     # PyCUDA driver interface

from pycuda.compiler import SourceModule        # Compiler to compile CUDA C kernels from strings

import time                                      # For measuring execution time


# CUDA kernel for frequency counting of characters in the input

freq_kernel_code = """

_global_ void computeFrequencies(unsigned char *input, int *frequencies, int size) {

    int idx = blockIdx.x * blockDim.x + threadIdx.x;  // Calculate thread index

    if (idx < size) {

        atomicAdd(&frequencies[input[idx]], 1);       // Atomically increment frequency

    }

}

"""


# CUDA kernel for prefix sum (used for building Huffman tree, not used in main)

scan_kernel_code = """

_global_ void prefixSum(int *input, int *output, int size) {

    int idx = blockIdx.x * blockDim.x + threadIdx.x;  // Calculate thread index

    if (idx < size) {

        output[idx] = input[idx];                     // Copy input to output

        for (int i = 1; i <= idx; i++) {

            output[idx] += input[i-1];                // Perform prefix sum sequentially

        }

    }

}

"""


# Class to represent a node in Huffman Tree

class HuffmanNode:

    def _init_(self, char, freq):

        self.char = char                              # Character (None for internal nodes)

        self.freq = freq                              # Frequency of character

        self.left = None                              # Left child

        self.right = None                             # Right child


# Function to build the Huffman Tree from frequency array

def build_huffman_tree(frequencies):

    nodes = [HuffmanNode(i, freq) for i, freq in enumerate(frequencies) if freq > 0]  # Create leaf nodes

    while len(nodes) > 1:

        nodes.sort(key=lambda x: x.freq)              # Sort by frequency

        left = nodes.pop(0)                           # Extract two lowest

        right = nodes.pop(0)

        parent = HuffmanNode(None, left.freq + right.freq)  # Create new parent node

        parent.left = left

        parent.right = right

        nodes.append(parent)                          # Add parent back to node list

    return nodes[0] if nodes else None                # Return root node


# Recursive function to generate Huffman codes from the tree

def generate_codes(root, current_code="", codes=None):

    if codes is None:

        codes = {}

    if root is None:

        return codes

    if root.char is not None:

        codes[root.char] = current_code or "0"        # Assign code to character

    generate_codes(root.left, current_code + "0", codes)   # Left = 0

    generate_codes(root.right, current_code + "1", codes)  # Right = 1

    return codes


# Main function to encode input using GPU-accelerated frequency count

def huffman_encode_gpu(input_string):

    input_array = np.frombuffer(input_string.encode(), dtype=np.uint8)  # Convert input to uint8 array

    input_size = len(input_array)                                       # Size of input array


    input_gpu = cuda.mem_alloc(input_array.nbytes)                      # Allocate memory for input on GPU

    frequencies_gpu = cuda.mem_alloc(256 * 4)                           # Allocate memory for frequencies (256 x 4 bytes)

    frequencies = np.zeros(256, dtype=np.int32)                         # Host-side frequency array


    cuda.memcpy_htod(input_gpu, input_array)                            # Copy input array to GPU

    cuda.memcpy_htod(frequencies_gpu, frequencies)                      # Initialize GPU frequencies to zero


    mod = SourceModule(freq_kernel_code)                                # Compile CUDA kernel

    compute_frequencies = mod.get_function("computeFrequencies")       # Get function handle


    block_size = 256                                                    # CUDA block size

    grid_size = (input_size + block_size - 1) // block_size            # Compute number of blocks

    compute_frequencies(input_gpu, frequencies_gpu, np.int32(input_size),

                       block=(block_size, 1, 1), grid=(grid_size, 1))  # Launch kernel


    cuda.memcpy_dtoh(frequencies, frequencies_gpu)                      # Copy frequencies back to CPU


    root = build_huffman_tree(frequencies)                              # Build Huffman tree

    codes = generate_codes(root)                                        # Generate codes from tree


    encoded = ""                                                        # Encode input string using codes

    for char in input_string:

        encoded += codes[ord(char)]


    return encoded, codes


# Example usage and timing

if _name_ == "_main_":

    input_string = "hello world this is a test string for huffman encoding"  # Input string

    start_time = time.time()                                                 # Start time


    encoded, codes = huffman_encode_gpu(input_string)                        # Encode using GPU


    print(f"Original string: {input_string}")

    print(f"Encoded string: {encoded}")

    print(f"Codes: {codes}")

    print(f"Time taken: {time.time() - start_time:.4f} seconds")             # Print total execution time