Hi Magnesh,
I understand that you are trying to augment the point cloud data with 4 augmentation techniques to further use the dataset for classification purpose.
It looks like you're applying multiple augmentation techniques sequentially, and each technique is being applied separately for each of the 4 augmented datasets. This could lead to an unexpected number of total datasets if not managed correctly.
Please find the changes made to your code, along with the updated code snippet:
- Each augmentation technique is applied sequentially within the loop to ensure each augmented dataset undergoes all specified augmentations once per iteration.
- Augmented data is appended directly to "point_clouds" and "classifications" within the loop after all augmentations are applied.
- Used "classification.copy()" to ensure each augmented dataset has its own classification label.
def prepare_dataset(obj_files, augmentation=True):
# Target size per dataset (1 original + 4 augmented)
target_size_per_dataset = 5
total_datasets = 0 # Counter for generated datasets
print(f"Length of obj files: {len(obj_files)}") # Print number of object files
point_clouds = []
classifications = []
# Iterate over each object file
for obj_file in obj_files:
try:
# Get point cloud points
point_cloud = get_point_cloud(directory_path + "/" + obj_file)
# Get centerline points
centerline_file = directory_path + "/" + obj_file[:-4] + '_centerline' + '.dat'
centerline_points = np.loadtxt(centerline_file, skiprows=1, usecols=(0, 1, 2))
# Classify points (done only once for original data)
classification = classify_point(point_cloud, centerline_points)
# Include original data
point_clouds.append(point_cloud)
classifications.append(classification)
# Apply data augmentation techniques
if augmentation:
for _ in range(target_size_per_dataset - 1): # Generate 4 augmented datasets
augmented_pc, augmented_cl = point_cloud, centerline_points
augmented_pc, augmented_cl = random_rotation(augmented_pc, augmented_cl)
augmented_pc, augmented_cl = random_crop(augmented_pc, augmented_cl)
augmented_pc, augmented_cl = random_scale(augmented_pc, augmented_cl)
augmented_pc, augmented_cl = random_noise(augmented_pc, augmented_cl)
# Append augmented data
point_clouds.append(augmented_pc)
classifications.append(classification.copy())
print(f"Augmented datasets generated for {obj_file}: {len(point_clouds)}")
# Convert to PointCloudDataset
point_clouds = np.concatenate(point_clouds)
classifications = np.concatenate(classifications)
point_clouds = point_clouds.reshape((point_clouds.shape[0], point_clouds.shape[1], 1))
dataset = PointCloudDataset(point_clouds.astype(np.float32), classifications.astype(np.int64))
length_after_augmentation = len(point_clouds)
print(f"Length of dataset after augmentation: {length_after_augmentation}")
return dataset
I hope it helps!