|
| 1 | +import numpy as np |
| 2 | + |
| 3 | +def generate_binary_array_and_factors(N): |
| 4 | + # Generate binary array |
| 5 | + binary_array = np.random.randint(2, size=N) |
| 6 | + # Generate array of factor pairs |
| 7 | + factor_pairs = [(i, N//i) for i in range(2, int(np.sqrt(N)) + 1) if N % i == 0] |
| 8 | + # Return both |
| 9 | + return binary_array.tolist(), factor_pairs |
| 10 | + |
| 11 | +def generate_one_sample_json_string(binary_array_str, dimensions_str, answer_str): |
| 12 | + base_string = '{"input": [{"role": "system", "content": "Given the user-provided binary array, map the binary array onto a grid, wherein the dimensions of the grid are as provided by the user ([num rows]x[num elements per row]), and the mapping to the grid is done from left to right, top to bottom (provide a visualization of the mapped result). Then explain in a second visualization how the final row of the grid was mapped from the corresponding final binary numbers of the array. Lastly, provide the final row of the grid, in minified JSON format, like this: {\\\"Final Row\\\":[...]}"}, {"role": "user", "content": "Array: ' + binary_array_str + '\\nGrid Dimensions: ' + dimensions_str + '"}], "ideal": "{\\"Final Row\\":' + answer_str + '}"}' |
| 13 | + return base_string |
| 14 | + |
| 15 | +def write_lines_to_file(min_array_len, max_array_len, filename, max_lines = 50): |
| 16 | + num_lines = 0; |
| 17 | + # Open the file for writing |
| 18 | + with open(filename, 'w') as file: |
| 19 | + # Loop through all possible array lengths |
| 20 | + for i in range(min_array_len, max_array_len + 1): |
| 21 | + # Generate a binary array and its factors |
| 22 | + (arr, pairs) = generate_binary_array_and_factors(i) |
| 23 | + # Loop through all the factors |
| 24 | + for j in range(len(pairs)): |
| 25 | + # Get the dimensions of the subarray |
| 26 | + dims = str(pairs[j][0]) + 'x' + str(pairs[j][1]) |
| 27 | + # Get the subarray as a string and remove spaces |
| 28 | + ans = str(arr[-pairs[j][1]:]).replace(' ', '') |
| 29 | + # Generate a JSON string with the array, dimensions, and answer |
| 30 | + line = generate_one_sample_json_string(str(arr).replace(' ', ''), dims, ans) |
| 31 | + # Write the JSON string to the file |
| 32 | + file.write(line + '\n') |
| 33 | + # Increment the number of lines written |
| 34 | + num_lines += 1 |
| 35 | + # If we've written the maximum number of lines, stop generating more lines |
| 36 | + if num_lines == max_lines: |
| 37 | + return |
| 38 | + |
| 39 | +# generate 1k samples (i.e. lines of json) and write to file: samples.jsonl |
| 40 | +write_lines_to_file(40, 500, 'samples.jsonl', 1000) |
0 commit comments