5a | padhodosto

!pip install pycuda

import numpy as np

import pycuda.autoinit

import pycuda.driver as cuda

from pycuda.compiler import SourceModule

import time

# CUDA kernel for frequency counting

freq_kernel_code = """

__global__ void computeFrequencies(unsigned char *input, int *frequencies, int size) {

int idx = blockIdx.x * blockDim.x + threadIdx.x;

if (idx < size) {

atomicAdd(&frequencies[input[idx]], 1);

}

"""

# CUDA kernel for prefix sum (scan) to build Huffman tree

scan_kernel_code = """

__global__ void prefixSum(int *input, int *output, int size) {

int idx = blockIdx.x * blockDim.x + threadIdx.x;

if (idx < size) {

output[idx] = input[idx];

for (int i = 1; i <= idx; i++) {

output[idx] += input[i-1];

}

"""

class HuffmanNode:

def __init__(self, char, freq):

self.char = char

self.freq = freq

self.left = None

self.right = None

def build_huffman_tree(frequencies):

nodes = [HuffmanNode(i, freq) for i, freq in enumerate(frequencies) if freq > 0]

while len(nodes) > 1:

nodes.sort(key=lambda x: x.freq)

left = nodes.pop(0)

right = nodes.pop(0)

parent = HuffmanNode(None, left.freq + right.freq)

parent.left = left

parent.right = right

nodes.append(parent)

return nodes[0] if nodes else None

def generate_codes(root, current_code="", codes=None):

if codes is None:

codes = {}

if root is None:

return codes

if root.char is not None:

codes[root.char] = current_code or "0"

generate_codes(root.left, current_code + "0", codes)

generate_codes(root.right, current_code + "1", codes)

return codes

def huffman_encode_gpu(input_string):

# Convert input to numpy array

input_array = np.frombuffer(input_string.encode(), dtype=np.uint8)

input_size = len(input_array)

# Allocate GPU memory

input_gpu = cuda.mem_alloc(input_array.nbytes)

frequencies_gpu = cuda.mem_alloc(256 * 4) # 256 possible ASCII chars

frequencies = np.zeros(256, dtype=np.int32)

# Copy input to GPU

cuda.memcpy_htod(input_gpu, input_array)

cuda.memcpy_htod(frequencies_gpu, frequencies)

# Compile and run frequency kernel

mod = SourceModule(freq_kernel_code)

compute_frequencies = mod.get_function("computeFrequencies")

block_size = 256

grid_size = (input_size + block_size - 1) // block_size

compute_frequencies(input_gpu, frequencies_gpu, np.int32(input_size),

block=(block_size, 1, 1), grid=(grid_size, 1))

# Copy frequencies back to host

cuda.memcpy_dtoh(frequencies, frequencies_gpu)

# Build Huffman tree and generate codes

root = build_huffman_tree(frequencies)

codes = generate_codes(root)

# Encode the input

encoded = ""

for char in input_string:

encoded += codes[ord(char)]

return encoded, codes

# Example usage

if __name__ == "__main__":

input_string = "hello world this is a test string for huffman encoding"

start_time = time.time()

encoded, codes = huffman_encode_gpu(input_string)

print(f"Original string: {input_string}")

print(f"Encoded string: {encoded}")

print(f"Codes: {codes}")

print(f"Time taken: {time.time() - start_time:.4f} seconds")

Output:

Original string: hello world this is a test string for huffman encoding

Encoded string: 01010011011001101010111001011010011101101101111111000101100010111111000101111111010111110000111011110011110111100011110001001000011101001010011111101010010001000100000111101010011110011100100010101011011100010010000

Codes: {103: '0000', 99: '00010', 109: '00011', 117: '00100', 119: '00101', 101: '0011', 102: '0100', 104: '0101', 108: '0110', 114: '0111', 105: '1000', 110: '1001', 111: '1010', 115: '1011', 116: '1100', 97: '11010', 100: '11011', 32: '111'}

Time taken: 1.0239 seconds

Huffman mini-project:

# Install PyCUDA (only needed in Colab or initial setup)

!pip install pycuda

# Import necessary modules

import numpy as np # NumPy for numerical operations

import pycuda.autoinit # Automatically initializes CUDA driver

import pycuda.driver as cuda # PyCUDA driver interface

from pycuda.compiler import SourceModule # Compiler to compile CUDA C kernels from strings

import time # For measuring execution time

# CUDA kernel for frequency counting of characters in the input

freq_kernel_code = """

_global_ void computeFrequencies(unsigned char *input, int *frequencies, int size) {

int idx = blockIdx.x * blockDim.x + threadIdx.x; // Calculate thread index

if (idx < size) {

atomicAdd(&frequencies[input[idx]], 1); // Atomically increment frequency

}

"""

# CUDA kernel for prefix sum (used for building Huffman tree, not used in main)

scan_kernel_code = """

_global_ void prefixSum(int *input, int *output, int size) {

int idx = blockIdx.x * blockDim.x + threadIdx.x; // Calculate thread index

if (idx < size) {

output[idx] = input[idx]; // Copy input to output

for (int i = 1; i <= idx; i++) {

output[idx] += input[i-1]; // Perform prefix sum sequentially

}

"""

# Class to represent a node in Huffman Tree

class HuffmanNode:

def _init_(self, char, freq):

self.char = char # Character (None for internal nodes)

self.freq = freq # Frequency of character

self.left = None # Left child

self.right = None # Right child

# Function to build the Huffman Tree from frequency array

def build_huffman_tree(frequencies):

nodes = [HuffmanNode(i, freq) for i, freq in enumerate(frequencies) if freq > 0] # Create leaf nodes

while len(nodes) > 1:

nodes.sort(key=lambda x: x.freq) # Sort by frequency

left = nodes.pop(0) # Extract two lowest

right = nodes.pop(0)

parent = HuffmanNode(None, left.freq + right.freq) # Create new parent node

parent.left = left

parent.right = right

nodes.append(parent) # Add parent back to node list

return nodes[0] if nodes else None # Return root node

# Recursive function to generate Huffman codes from the tree

def generate_codes(root, current_code="", codes=None):

if codes is None:

codes = {}

if root is None:

return codes

if root.char is not None:

codes[root.char] = current_code or "0" # Assign code to character

generate_codes(root.left, current_code + "0", codes) # Left = 0

generate_codes(root.right, current_code + "1", codes) # Right = 1

return codes

# Main function to encode input using GPU-accelerated frequency count

def huffman_encode_gpu(input_string):

input_array = np.frombuffer(input_string.encode(), dtype=np.uint8) # Convert input to uint8 array

input_size = len(input_array) # Size of input array

input_gpu = cuda.mem_alloc(input_array.nbytes) # Allocate memory for input on GPU

frequencies_gpu = cuda.mem_alloc(256 * 4) # Allocate memory for frequencies (256 x 4 bytes)

frequencies = np.zeros(256, dtype=np.int32) # Host-side frequency array

cuda.memcpy_htod(input_gpu, input_array) # Copy input array to GPU

cuda.memcpy_htod(frequencies_gpu, frequencies) # Initialize GPU frequencies to zero

mod = SourceModule(freq_kernel_code) # Compile CUDA kernel

compute_frequencies = mod.get_function("computeFrequencies") # Get function handle

block_size = 256 # CUDA block size

grid_size = (input_size + block_size - 1) // block_size # Compute number of blocks

compute_frequencies(input_gpu, frequencies_gpu, np.int32(input_size),

block=(block_size, 1, 1), grid=(grid_size, 1)) # Launch kernel

cuda.memcpy_dtoh(frequencies, frequencies_gpu) # Copy frequencies back to CPU

root = build_huffman_tree(frequencies) # Build Huffman tree

codes = generate_codes(root) # Generate codes from tree

encoded = "" # Encode input string using codes

for char in input_string:

encoded += codes[ord(char)]

return encoded, codes

# Example usage and timing

if _name_ == "_main_":

input_string = "hello world this is a test string for huffman encoding" # Input string

start_time = time.time() # Start time

encoded, codes = huffman_encode_gpu(input_string) # Encode using GPU

print(f"Original string: {input_string}")

print(f"Encoded string: {encoded}")

print(f"Codes: {codes}")

print(f"Time taken: {time.time() - start_time:.4f} seconds") # Print total execution time