Dpbm
diff --git a/‎dataset.py‎
Lines changed: 0 additions & 232 deletions b/‎dataset.py‎
Lines changed: 0 additions & 232 deletions
diff --git a/‎generate/dataset/dataframe.py‎
Lines changed: 1 addition & 0 deletions b/‎generate/dataset/dataframe.py‎
Lines changed: 1 addition & 0 deletions
@@ -144,201 +144,6 @@ def __str__(self) -> str:
         )
 
 
-class CircuitResult(TypedDict):
-    """Type for circuit results"""
-
-    index: int
-    depth: int
-    file: str
-    measurements: str  # JSON string
-    result: str  # JSON string
-    hash: str
-
-
-def get_circuit_results(qc: QuantumCircuit, sampler: Sampler, shots: int) -> Dist:
-    """Execute circuit on sampler. Returns its quasi dist"""
-    return sampler.run([qc], shots=shots).result().quasi_dists[0]  # type: ignore
-
-
-def fix_dist_gaps(dist: Dist, states: States):
-    """Auxiliary function to fill the remaining bitstrings with 0"""
-    for state in states:
-        result_value = dist.get(state)
-        if result_value is None:
-            dist[state] = 0
-
-
-def generate_circuit_images(
-    base_index: int,
-    states: States,
-    measurements: MeasurementsCombinations,
-    base_image_path: FilePath,
-    n_qubits: int,
-    total_gates: int,
-    shots: int,
-) -> List[CircuitResult]:
-    """
-    Run an experiment, save its images and return its results for different, combinations
-    of measurements.
-    """
-
-    sim = AerSimulator()
-    pm = generate_preset_pass_manager(backend=sim, optimization_level=0)
-    sampler = Sampler()
-    results: List[CircuitResult] = []
-
-    # non-interactive backend
-    matplotlib.use("Agg")
-
-    qc = get_random_circuit(n_qubits, total_gates)
-
-    for index, measurement in enumerate(measurements):
-        image_index = base_index + index
-        image_path = os.path.join(base_image_path, "%d.png" % image_index)
-
-        qc_copy = qc.copy()
-        total_measurements = len(measurement)
-        classical_register = ClassicalRegister(total_measurements, name="c")
-        qc_copy.add_register(classical_register)
-        qc_copy.measure(measurement, list(range(total_measurements)))
-
-        drawing = qc_copy.draw("mpl", filename=image_path, fold=-1, scale=SCALE_CIRCUIT_SIZE)
-        plt.close(drawing)
-        del drawing
-
-        depth = qc_copy.depth()
-        isa_qc = pm.run(qc_copy)
-        del qc_copy
-
-        with open(image_path, "rb") as file:
-            file_hash = hashlib.md5(file.read()).hexdigest()
-
-        outcomes = get_circuit_results(isa_qc, sampler, shots)
-        fix_dist_gaps(outcomes, states)
-
-        del isa_qc
-        gc.collect()
-
-        # once we have more than a few combinations, depending on how many threads we
-        # start, it can use a lot o memory. It also depends on how many states are possible, growing
-        # exponentially with the number of qubits (2^n).
-        results.append(
-            {
-                "index": image_index,
-                "depth": depth,
-                "file": image_path,
-                "result": json.dumps(list(outcomes.values())),
-                "hash": file_hash,
-                "measurements": json.dumps(measurement),
-            }
-        )
-
-    # clear data
-    del sim
-    del pm
-    del sampler
-    gc.collect()
-
-    return results
-
-
-def generate_images(
-    target_folder: FilePath,
-    n_qubits: int,
-    total_gates: int,
-    shots: int,
-    amount_circuits: int,
-    total_threads: int,
-    checkpoint: Checkpoint,
-):
-    """
-    Generate multiple images and saves a dataframe with information about them.
-    It runs in multiple threads(processes in this case) to speed up.
-    """
-
-    dataset_file_path = dataset_file(target_folder)
-
-    bitstrings_to_int = [
-        int("".join(comb), 2) for comb in product("01", repeat=n_qubits)
-    ]
-
-    # get all measurement combinations
-    # may be expensive with a large number of qubits, but for 5,6,... it's good
-    qubits_iter = list(range(n_qubits))
-    measurement_combs: MeasurementsCombinations = [
-        qubits_iter
-    ]  # start with [[0,1,2,3,4,....,n-1]]
-    for amount in range(1, n_qubits):
-        measurement_combs = [
-            *measurement_combs,
-            *list(combinations(qubits_iter, amount)),  # type: ignore
-        ]  # type: ignore
-    total_measurement_combs = len(measurement_combs)
-
-    base_dataset_path = dataset_path(target_folder)
-
-    index = checkpoint.index
-    with tqdm(total=amount_circuits, initial=index) as progress:
-        while index < amount_circuits:
-            args = []
-
-            for _ in range(total_threads):
-                base_index = index * total_measurement_combs
-                args.append(
-                    (
-                        base_index,
-                        bitstrings_to_int,
-                        measurement_combs,
-                        base_dataset_path,
-                        n_qubits,
-                        total_gates,
-                        shots,
-                    )
-                )
-                index += 1
-
-            with ThreadPoolExecutor(max_workers=total_threads) as pool:
-                threads = [pool.submit(generate_circuit_images, *arg) for arg in args]  # type:ignore
-
-                # The best would be using the polars scan_csv and sink_csv to
-                # write memory efficient queries easily.
-                # However, it's an experimental feature, and for some reason they don't work
-                # well together.
-                # https://github.com/pola-rs/polars/issues/22845
-                # https://github.com/pola-rs/polars/issues/20468
-                # to solve that, we gonna use the built-in python's csv library
-                # to append the new lines without loading the whole csv into memory.
-
-                # df = open_csv(dataset_file_path)
-
-                rows: Rows = []
-                for future in as_completed(threads):  # type: ignore
-                    rows = [
-                        *rows,
-                        *[list(result.values()) for result in future.result()],
-                    ]
-
-                append_rows_to_df(dataset_file_path, rows)
-
-                del rows
-                del threads
-                del args
-                gc.collect()
-
-                # save_df(df, dataset_file_path)
-
-                # remove df from memory to open avoid excessive
-                # of memory usage
-                # del df
-                # gc.collect()
-
-            progress.update(total_threads)
-
-            checkpoint.index = index
-            checkpoint.save()
-
-
-
 
 def shuffle_csv(target_folder:FilePath):
     """
@@ -350,43 +155,6 @@ def shuffle_csv(target_folder:FilePath):
     df = shuffle_df(df)
     df.write_csv(file_path)
 
-def transform_images(
-    target_folder: FilePath, new_dim: Dimensions, checkpoint: Checkpoint
-):
-    """Normalize images and save them into a h5 file"""
-    print("%sTransforming images%s" % (Colors.GREENBG, Colors.ENDC))
-
-    df = open_csv(dataset_file(target_folder))
-
-    current_index = checkpoint.index
-    amount_of_rows_per_iteration = 500
-
-    max_width, max_height = new_dim
-
-    while True:
-        collected_rows: List[FilePath] = (
-            df.slice(offset=current_index, length=amount_of_rows_per_iteration)
-            .collect()
-            .get_column("file")
-            .to_list()
-        )
-
-        if len(collected_rows) <= 0:
-            break
-
-        image_i = checkpoint.index
-        with h5py.File(images_h5_file(target_folder), "a") as file:
-            for image_path in tqdm(collected_rows):
-                with Image.open(image_path) as img:
-                    tensor = transform_image(img, max_width, max_height)
-                    file.create_dataset(f"{image_i}", data=tensor)
-
-                image_i += 1
-                checkpoint.index = image_i
-                checkpoint.save()
-
-        current_index += amount_of_rows_per_iteration
-
 
 
 
 
@@ -45,6 +45,7 @@ def df_schema(self) -> Schema:
             "file": pl.String,
             "result": pl.String,
             "hash": pl.String,
+            "total_meas": pl.UInt8,
             "measurements": pl.String,
             "img_width": pl.UInt16,
             "img_height": pl.UInt16,