!pip install pycuda
import numpy as np
import pycuda.autoinit
import pycuda.driver as cuda
from pycuda.compiler import SourceModule
import time
# CUDA kernel for frequency counting
freq_kernel_code = """
__global__ void computeFrequencies(unsigned char *input, int *frequencies, int size) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < size) {
atomicAdd(&frequencies[input[idx]], 1);
}
}
"""
# CUDA kernel for prefix sum (scan) to build Huffman tree
scan_kernel_code = """
__global__ void prefixSum(int *input, int *output, int size) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < size) {
output[idx] = input[idx];
for (int i = 1; i <= idx; i++) {
output[idx] += input[i-1];
}
}
}
"""
class HuffmanNode:
def __init__(self, char, freq):
self.char = char
self.freq = freq
self.left = None
self.right = None
def build_huffman_tree(frequencies):
nodes = [HuffmanNode(i, freq) for i, freq in enumerate(frequencies) if freq > 0]
while len(nodes) > 1:
nodes.sort(key=lambda x: x.freq)
left = nodes.pop(0)
right = nodes.pop(0)
parent = HuffmanNode(None, left.freq + right.freq)
parent.left = left
parent.right = right
nodes.append(parent)
return nodes[0] if nodes else None
def generate_codes(root, current_code="", codes=None):
if codes is None:
codes = {}
if root is None:
return codes
if root.char is not None:
codes[root.char] = current_code or "0"
generate_codes(root.left, current_code + "0", codes)
generate_codes(root.right, current_code + "1", codes)
return codes
def huffman_encode_gpu(input_string):
# Convert input to numpy array
input_array = np.frombuffer(input_string.encode(), dtype=np.uint8)
input_size = len(input_array)
# Allocate GPU memory
input_gpu = cuda.mem_alloc(input_array.nbytes)
frequencies_gpu = cuda.mem_alloc(256 * 4) # 256 possible ASCII chars
frequencies = np.zeros(256, dtype=np.int32)
# Copy input to GPU
cuda.memcpy_htod(input_gpu, input_array)
cuda.memcpy_htod(frequencies_gpu, frequencies)
# Compile and run frequency kernel
mod = SourceModule(freq_kernel_code)
compute_frequencies = mod.get_function("computeFrequencies")
block_size = 256
grid_size = (input_size + block_size - 1) // block_size
compute_frequencies(input_gpu, frequencies_gpu, np.int32(input_size),
block=(block_size, 1, 1), grid=(grid_size, 1))
# Copy frequencies back to host
cuda.memcpy_dtoh(frequencies, frequencies_gpu)
# Build Huffman tree and generate codes
root = build_huffman_tree(frequencies)
codes = generate_codes(root)
# Encode the input
encoded = ""
for char in input_string:
encoded += codes[ord(char)]
return encoded, codes
# Example usage
if __name__ == "__main__":
input_string = "hello world this is a test string for huffman encoding"
start_time = time.time()
encoded, codes = huffman_encode_gpu(input_string)
print(f"Original string: {input_string}")
print(f"Encoded string: {encoded}")
print(f"Codes: {codes}")
print(f"Time taken: {time.time() - start_time:.4f} seconds")
Output:
Original string: hello world this is a test string for huffman encoding
Encoded string: 01010011011001101010111001011010011101101101111111000101100010111111000101111111010111110000111011110011110111100011110001001000011101001010011111101010010001000100000111101010011110011100100010101011011100010010000
Codes: {103: '0000', 99: '00010', 109: '00011', 117: '00100', 119: '00101', 101: '0011', 102: '0100', 104: '0101', 108: '0110', 114: '0111', 105: '1000', 110: '1001', 111: '1010', 115: '1011', 116: '1100', 97: '11010', 100: '11011', 32: '111'}
Time taken: 1.0239 seconds
Huffman mini-project:
# Install PyCUDA (only needed in Colab or initial setup)
!pip install pycuda
# Import necessary modules
import numpy as np # NumPy for numerical operations
import pycuda.autoinit # Automatically initializes CUDA driver
import pycuda.driver as cuda # PyCUDA driver interface
from pycuda.compiler import SourceModule # Compiler to compile CUDA C kernels from strings
import time # For measuring execution time
# CUDA kernel for frequency counting of characters in the input
freq_kernel_code = """
_global_ void computeFrequencies(unsigned char *input, int *frequencies, int size) {
int idx = blockIdx.x * blockDim.x + threadIdx.x; // Calculate thread index
if (idx < size) {
atomicAdd(&frequencies[input[idx]], 1); // Atomically increment frequency
}
}
"""
# CUDA kernel for prefix sum (used for building Huffman tree, not used in main)
scan_kernel_code = """
_global_ void prefixSum(int *input, int *output, int size) {
int idx = blockIdx.x * blockDim.x + threadIdx.x; // Calculate thread index
if (idx < size) {
output[idx] = input[idx]; // Copy input to output
for (int i = 1; i <= idx; i++) {
output[idx] += input[i-1]; // Perform prefix sum sequentially
}
}
}
"""
# Class to represent a node in Huffman Tree
class HuffmanNode:
def _init_(self, char, freq):
self.char = char # Character (None for internal nodes)
self.freq = freq # Frequency of character
self.left = None # Left child
self.right = None # Right child
# Function to build the Huffman Tree from frequency array
def build_huffman_tree(frequencies):
nodes = [HuffmanNode(i, freq) for i, freq in enumerate(frequencies) if freq > 0] # Create leaf nodes
while len(nodes) > 1:
nodes.sort(key=lambda x: x.freq) # Sort by frequency
left = nodes.pop(0) # Extract two lowest
right = nodes.pop(0)
parent = HuffmanNode(None, left.freq + right.freq) # Create new parent node
parent.left = left
parent.right = right
nodes.append(parent) # Add parent back to node list
return nodes[0] if nodes else None # Return root node
# Recursive function to generate Huffman codes from the tree
def generate_codes(root, current_code="", codes=None):
if codes is None:
codes = {}
if root is None:
return codes
if root.char is not None:
codes[root.char] = current_code or "0" # Assign code to character
generate_codes(root.left, current_code + "0", codes) # Left = 0
generate_codes(root.right, current_code + "1", codes) # Right = 1
return codes
# Main function to encode input using GPU-accelerated frequency count
def huffman_encode_gpu(input_string):
input_array = np.frombuffer(input_string.encode(), dtype=np.uint8) # Convert input to uint8 array
input_size = len(input_array) # Size of input array
input_gpu = cuda.mem_alloc(input_array.nbytes) # Allocate memory for input on GPU
frequencies_gpu = cuda.mem_alloc(256 * 4) # Allocate memory for frequencies (256 x 4 bytes)
frequencies = np.zeros(256, dtype=np.int32) # Host-side frequency array
cuda.memcpy_htod(input_gpu, input_array) # Copy input array to GPU
cuda.memcpy_htod(frequencies_gpu, frequencies) # Initialize GPU frequencies to zero
mod = SourceModule(freq_kernel_code) # Compile CUDA kernel
compute_frequencies = mod.get_function("computeFrequencies") # Get function handle
block_size = 256 # CUDA block size
grid_size = (input_size + block_size - 1) // block_size # Compute number of blocks
compute_frequencies(input_gpu, frequencies_gpu, np.int32(input_size),
block=(block_size, 1, 1), grid=(grid_size, 1)) # Launch kernel
cuda.memcpy_dtoh(frequencies, frequencies_gpu) # Copy frequencies back to CPU
root = build_huffman_tree(frequencies) # Build Huffman tree
codes = generate_codes(root) # Generate codes from tree
encoded = "" # Encode input string using codes
for char in input_string:
encoded += codes[ord(char)]
return encoded, codes
# Example usage and timing
if _name_ == "_main_":
input_string = "hello world this is a test string for huffman encoding" # Input string
start_time = time.time() # Start time
encoded, codes = huffman_encode_gpu(input_string) # Encode using GPU
print(f"Original string: {input_string}")
print(f"Encoded string: {encoded}")
print(f"Codes: {codes}")
print(f"Time taken: {time.time() - start_time:.4f} seconds") # Print total execution time