kannadaink/search_hyperparameter.py at main · Manojbhat09/kannadaink · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import cv2
import numpy as np

def evaluate_text_density(binary_image, top_fraction=0.2):
    # Calculate the height of the region of interest (top 20% of the image)
    top_height = int(binary_image.shape[0] * top_fraction)
    # Extract the top part of the binary image
    top_image = binary_image[:top_height, :]
    # Calculate the density of text pixels
    text_pixel_count = np.count_nonzero(top_image)
    total_pixel_count = top_image.size
    text_density = text_pixel_count / total_pixel_count
    return text_density

def hyperparameter_search(image, block_sizes, C_values, top_fraction=0.2):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    best_density = 0
    best_params = None

    for block_size in block_sizes:
        for C in C_values:
            # Ensure block_size is an odd number
            if block_size % 2 == 0:
                block_size += 1

            # Perform adaptive thresholding
            binary_image = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                                 cv2.THRESH_BINARY_INV, block_size, C)


            # Evaluate the text density in the top region of the image
            density = evaluate_text_density(binary_image, top_fraction)

            if density > best_density:
                best_density = density
                best_params = (block_size, C)

    return best_params

# Define the ranges for the block sizes and C values
block_size_range = range(11, 40, 2)  # Grid search range for block size, from 11 to 39
C_value_range = range(-5, 10)  # Grid search range for C, from -5 to 10

# Load the image
image = cv2.imread('kannada.jpg')  # Replace with your actual image path

# Find the best hyperparameters
best_block_size, best_C = hyperparameter_search(image, block_size_range, C_value_range)
print(f"Best block size: {best_block_size}, Best C: {best_C}")

# Apply the best parameters to preprocess the image
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
optimized_binary_image = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                               cv2.THRESH_BINARY_INV, best_block_size, best_C)
print("best params are ", best_block_size, best_C)
# Save or display the image with paragraphs and line spacings
output_image_path = 'binary.jpg'  # Replace with your desired save path
cv2.imwrite(output_image_path, optimized_binary_image)
import pdb; pdb.set_trace()