Open In Colab

Predicting_mask_for_tifimage#

我們將利用 Training_object_detection_model_workbook 所儲存的 model ,將 tif 檔轉換成特定物件的 shp 遮罩檔。過程中我們會有 5 個步驟, (1) 切分 tif (2) 預測 bounding box (3) 利用 SAM 製作 mask.jpg (4) 合併 jpg 檔 (5) 將 jpg 檔換成 shp 檔

請將 “sam_checkpoint_path”、”average_model.pth”、”source_tiffile” 文件備齊,並將 inputs 各項欄位填妥。以上各項資料均會影響 shp檔生成結果,請好好填妥

⚡ Before you start#

Let’s make sure that we have access to GPU. We can use nvidia-smi command to do that. In case of any problems navigate to Edit -> Notebook settings -> Hardware accelerator, set it to GPU, and then click Save.

install package#

( be aware that super-gradients sometimes change their package ralease version, so please update super-gradients to the latest version to avoid some error about super-gradients)

%pip install segment-geospatial
!pip install super-gradients==3.2.0
!pip install -q supervision
!pip install aspose-words
import os
HOME = os.getcwd()
%cd {HOME}

import sys
!{sys.executable} -m pip install 'git+https://github.com/facebookresearch/segment-anything.git

如果要下載 segment-anything-model 的模型權重可以使用以下的程式碼

%cd {HOME}
!mkdir {HOME}/weights
%cd {HOME}/weights

!wget -q https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth

inputs and setting#

詳細可查看 shpPredictor jupyter book 的 程式碼重要資訊講解, 裏頭會細講參數的注意事項

  • inputs

import torch

inputs = {
      # 模型權重
      "object_detection_model_checkpoint_path": "/content/drive/MyDrive/project_NanShang/resources/average_modelv10_80epochs.pth",
      "sam_checkpoint_path": "/content/weights/sam_vit_h_4b8939.pth",

      # dataset class 數目 和 照片來源檔案
      "num_classes": 2,
      "source_tiffile": "/content/drive/MyDrive/project_NanShang/resources/Nanshan2.tif",

      # 物件偵測精準度控制
      "confidence_threshold": 0.65,
      "tile_size": 700,

      # 照片經緯度資訊
      "top_left_lat": 22.9749,           # tif 檔左上角的緯度
      "top_left_lon": 120.1977,          # tif 檔左上角的經度
      "below_right_lat": 22.9673,         # tif 檔右下角的緯度
      "below_right_lon": 120.2033,        # tif 檔右下角的經度

      "device": 'cuda' if torch.cuda.is_available() else "cpu",
      "model_arch": 'yolo_nas_l',
      "sam_encoder_version": "vit_h",

}
  • settings

import os

STOREHOUSE = "storehouse"
SPLITED_TIFS_DIR = os.path.join(STOREHOUSE, "splited_tifs")
MASK_DIR = os.path.join(STOREHOUSE, "masks")

complete_mask_filename = "map_mask"
COMPLETE_MASK_JPG = os.path.join(STOREHOUSE, complete_mask_filename + ".jpg")
COMPLETE_MASK_TIFF = os.path.join(STOREHOUSE, complete_mask_filename + ".tiff")
COMPLETE_MASK_GEOTIFF = os.path.join(STOREHOUSE, "geo" + complete_mask_filename + ".tiff")

OUTPUT = os.path.join(STOREHOUSE, "output.shp")
data = {}

initialize directory#

import os

%cd {HOME}

os.mkdir(STOREHOUSE)
os.mkdir(SPLITED_TIFS_DIR)
os.mkdir(MASK_DIR)

Step 1 : Cropping tif#

from samgeo.common import split_raster

split_raster(inputs["source_tiffile"], SPLITED_TIFS_DIR, tile_size=inputs["tile_size"])

Step2 : predict bounding box#

from super_gradients.training import models

def load_object_detection_(model_arch: str, num_classes: int, checkpoint_path: str, device: str):

    model = models.get(
        model_arch,
        num_classes=num_classes,
        checkpoint_path=checkpoint_path
    ).to(device)

    return model
import tifffile
import numpy as np
import supervision as sv

tifs_dir = os.listdir(SPLITED_TIFS_DIR)
box_predictor = load_object_detection_(inputs["model_arch"], inputs["num_classes"],
                         inputs["object_detection_model_checkpoint_path"], inputs["device"])
data["detections"] = {}

num_tombs = 0
for tif in tifs_dir:
    # read tif as numpy array
    image = tifffile.imread(os.path.join(SPLITED_TIFS_DIR, tif))
    img_array = np.array(image)

    # predict bounding box
    result = list(box_predictor.predict(img_array, conf=inputs["confidence_threshold"]))[0]
    boxes = result.prediction.bboxes_xyxy
    num_tombs += len(boxes)  # compute the number of tombs

    # make sv.Detections
    detection = sv.Detections(
             xyxy=boxes,
             confidence=result.prediction.confidence,
             class_id=result.prediction.labels.astype(int)
          )
    data["detections"][tif] = detection



# finally we get the total number of tombs
print(f"the number of tombs is {num_tombs}")
data["num_tombs"] = num_tombs

Step 3 : predict_mask#

from segment_anything import sam_model_registry, SamPredictor

def segment(sam_predictor: "SamPredictor", image: np.ndarray, xyxy: np.ndarray) -> np.ndarray:
    sam_predictor.set_image(image)
    result_masks = []
    for box in xyxy:
        masks, scores, logits = sam_predictor.predict(
            box=box,
            multimask_output=True
        )
        index = np.argmax(scores)
        result_masks.append(masks[index])
    return np.array(result_masks)

def load_segment_anything_model(sam_encoder_version, sam_checkpoint_path, device):
    sam = sam_model_registry[sam_encoder_version](checkpoint=sam_checkpoint_path).to(device=device)
    sam_predictor = SamPredictor(sam)
    return sam_predictor
from PIL import Image

tifs_dir = os.listdir(SPLITED_TIFS_DIR)
sam_predictor = load_segment_anything_model(inputs["sam_encoder_version"], inputs["sam_checkpoint_path"],
                                    inputs["device"])
mask_annotator = sv.MaskAnnotator()

for tif in tifs_dir:
    # read tif as numpy array
    image = tifffile.imread(os.path.join(SPLITED_TIFS_DIR, tif))
    img_array = np.array(image)

    # make detection's mask
    data["detections"][tif].mask = segment(
          sam_predictor=sam_predictor,
          image=img_array.copy(),
          xyxy=data["detections"][tif].xyxy
          )

    # produce mask and save as image
    blank = np.zeros_like(img_array)
    mask = mask_annotator.annotate(scene=blank, detections=data["detections"][tif])
    img = Image.fromarray(mask, "RGB")
    img.save(os.path.join(MASK_DIR, "mask" + tif[4:-4] + ".jpg"))

Step 4 : merge image#

from osgeo import gdal

def read_tif(tif_path):
    ds = gdal.Open(tif_path)
    row = ds.RasterXSize
    col = ds.RasterYSize
    return row, col
width, height = read_tif(inputs["source_tiffile"])
size = inputs["tile_size"]
img = Image.new("RGB", (width, height))
mask_dir = os.listdir(MASK_DIR)

# collage small image to a complete image
for mask_filename in mask_dir:
    im = Image.open(os.path.join(MASK_DIR, mask_filename))
    name_list = mask_filename[:-4].split("_")  # ex. turn mask_0_11.jpg into ['mask', '0', '11']
    i = int(name_list[1])      # the 1th value in name_list indicate the order in x ray
    j = int(name_list[2])      # the 2th value in name_list indicate the order in y ray (see samgeo.common.split_raster())
    img.paste(im, (i * size, j * size))

img.save(COMPLETE_MASK_JPG)

Step 5 - 1: image to geotiff#

執行這一步時很吃 RAM。如果 Colab 工作階段因異常原因而中止,那滿有可能是 RAM 不足的問題

import aspose.words as aw
import rasterio
from rasterio.transform import from_origin


COMPLETE_MASK_JPG = "/content/drive/MyDrive/project_NanShang/resources/map_mask_tomb2.jpg"

def jpg2tiff():
    doc = aw.Document()
    builder = aw.DocumentBuilder(doc)
    shape = builder.insert_image(COMPLETE_MASK_JPG)
    shape.image_data.save(COMPLETE_MASK_TIFF)

def tiff2goetiff(below_right_lat, top_left_lat, below_right_lon, top_left_lon):
    # Read the input TIFF
    with rasterio.open(COMPLETE_MASK_TIFF) as src:
        data = src.read()
        dtype = src.dtypes[0]
        count = src.count
        height, width = src.height, src.width

    # compute horizontal pixel size and vertical pixel size in degrees
    dis_lat = below_right_lat - top_left_lat
    dis_lon = below_right_lon - top_left_lon
    pixel_size_x = dis_lon / width  # Example: horizontal pixel size in degrees
    pixel_size_y = dis_lat / height  # Example: vertical pixel size in degrees

    # Define geospatial information
    crs = 'EPSG:4326'  # Example: WGS 84

    # Define the geotransform
    transform = from_origin(top_left_lon, top_left_lat, pixel_size_x, pixel_size_y)

    # Write the GeoTIFF file
    with rasterio.open(
            COMPLETE_MASK_GEOTIFF,
            'w',
            driver='GTiff',
            height=height,
            width=width,
            count=count,
            dtype=dtype,
            crs=crs,
            transform=transform,
    ) as dst:
        dst.write(data)
jpg2tiff()
tiff2goetiff(inputs["below_right_lat"], inputs["top_left_lat"],
                  inputs["below_right_lon"], inputs["top_left_lon"])

Step 5 - 2: geotiff to shp#

from samgeo.common import raster_to_shp

raster_to_shp(COMPLETE_MASK_GEOTIFF, OUTPUT)