iramat-dev/.Rhistory at main · iramat/iramat-dev · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
::: {.column width="30%"}
library(iRamat)
#' Connect the CHIPS DB API and return an R object (dataframe, etc)
#'
#' @name db_api_connect
#' @description Connect the CHIPS DB API and return an R object (dataframe, etc.).
#'   The default dataset is dataset_adisser17, accessed at
#'   <http://157.136.252.188:3000/dataset_adisser17>.
#'
#'   If \code{all_datasets = TRUE}, all datasets whose \code{description} is
#'   "Dataset" in the GitHub-hosted table
#'   \url{https://github.com/iramat/iramat-dev/blob/main/dbs/chips/urls_data.tsv}
#'   are downloaded (using their \code{url_data} column), merged into a single
#'   \code{data.frame}, and a new column \code{dataset_name} is added to indicate
#'   the source dataset for each row.
#'
#' @param d A hash object. If none is provided, a new one will be created.
#'   Ignored when \code{all_datasets = TRUE}.
#' @param api_url An URL landing to an API. Default:
#'   \code{"http://157.136.252.188:3000/dataset_adisser17"}.
#'   Ignored when \code{all_datasets = TRUE}.
#' @param output_format The selected output format. Default "dataframe"
#'   (currently not used).
#' @param verbose if TRUE (by default), verbose.
#' @param all_datasets Logical. If \code{TRUE}, download all datasets with
#'   \code{description == "Dataset"} listed in the GitHub table
#'   \url{https://github.com/iramat/iramat-dev/blob/main/dbs/chips/urls_data.tsv},
#'   merge them into a single \code{data.frame}, and add a column
#'   \code{dataset_name} with the name of the source dataset (last component
#'   of the API URL). Default \code{FALSE}.
#'
#' @return
#'   * If \code{all_datasets = FALSE} (default): a hash where each key is the
#'     dataset label (last part of \code{api_url}) and each value is a dataframe.
#'   * If \code{all_datasets = TRUE}: a single merged \code{data.frame} with an
#'     additional column \code{dataset_name}.
#'
#' @examples
#' # Default behaviour: one dataset stored in a hash
#' df_hash <- db_api_connect()
#' head(df_hash$dataset_adisser17)
#'
#' # All CHIPS datasets listed in urls_data.tsv, merged into a single dataframe
#' df_all <- db_api_connect(all_datasets = TRUE)
#' head(df_all)
#'
#' @export
db_api_connect <- function(d = NA,
api_url = "http://157.136.252.188:3000/dataset_adisser17",
output_format = "dataframe",
verbose = TRUE,
all_datasets = FALSE){
# Helper to fetch a single API URL into a data.frame
fetch_single_api <- function(api_url, verbose = TRUE) {
if (verbose) {
message("Requesting: ", api_url)
}
response <- httr::GET(api_url)
# Check if the request was successful
if (httr::status_code(response) == 200) {
content_json <- httr::content(response, as = "text", encoding = "UTF-8")
df <- jsonlite::fromJSON(content_json, flatten = TRUE)
df <- as.data.frame(df)
} else {
stop("Failed to retrieve data from ", api_url,
". Status code: ", httr::status_code(response))
}
df
}
# ---------------------------------------------------------------------------
# MODE 1: all_datasets = TRUE → read GitHub TSV and merge all datasets
# ---------------------------------------------------------------------------
if (isTRUE(all_datasets)) {
if (verbose) {
message("all_datasets = TRUE: collecting all datasets from urls_data.tsv")
}
# Raw URL for the TSV file on GitHub
urls_file <- "https://raw.githubusercontent.com/iramat/iramat-dev/main/dbs/chips/urls_data.tsv"
urls_df <- utils::read.table(
urls_file,
header = TRUE,
sep = "\t",
stringsAsFactors = FALSE,
check.names = FALSE
)
# Keep only rows with description == "Dataset"
urls_df <- urls_df[urls_df$description == "Dataset" &
!is.na(urls_df$url_data) &
nzchar(urls_df$url_data), ]
if (nrow(urls_df) == 0) {
stop("No dataset URLs found in urls_data.tsv with description == 'Dataset'.")
}
# Fetch all datasets and add dataset_name column
list_dfs <- lapply(seq_len(nrow(urls_df)), function(i) {
api_i <- urls_df$url_data[i]
df_i  <- fetch_single_api(api_i, verbose = verbose)
# Use last part of URL as dataset name
dataset_label <- basename(api_i)
df_i$dataset_name <- dataset_label
df_i
})
# Harmonise columns across data.frames (fill missing with NA) before rbind
all_cols <- unique(unlist(lapply(list_dfs, names)))
list_dfs <- lapply(list_dfs, function(x) {
missing_cols <- setdiff(all_cols, names(x))
if (length(missing_cols) > 0) {
x[missing_cols] <- NA
}
x[, all_cols, drop = FALSE]
})
merged_df <- do.call(rbind, list_dfs)
if (verbose) {
message("Collected and merged ", nrow(urls_df),
" datasets. Total rows: ", nrow(merged_df))
}
return(merged_df)
}
# ---------------------------------------------------------------------------
# MODE 2: original behaviour → single API URL into a hash
# ---------------------------------------------------------------------------
data.label <- unlist(strsplit(api_url, "/"))
df.label   <- data.label[length(data.label)]
if (is.na(d)[1]) {
if (verbose) {
message("Will store the results in a new variable (hash).")
}
d <- hash::hash()
}
df <- fetch_single_api(api_url, verbose = verbose)
d[[df.label]] <- df
return(d)
}
df_all <- db_api_connect(all_datasets = TRUE)
head(df_all)
df <- db_api_connect()
head(df$dataset_adisser17, 1)
CHIPS est une base de données <img src='https://raw.githubusercontent.com/zoometh/thomashuet/master/img/app-bd-postgres.png' style='width: 50px;vertical-align: middle;'>  destinée aux archéométallurgistes du fer, offrant des références chimiques normalisées pour analyser techniques anciennes et réseaux d’échanges. Les données, documentées spatialement, chronologiquement et typologiquement, sont rendues interopérable grâce à des référentiels standardisés (e.g. ), CHIPS propose déjà une interface cartographique et vise des outils visuels avancés.
head(df$dataset_adisser17, 1)
head(t(df$dataset_adisser17), 1)
# install.packages("devtools")
# devtools::install_github("iramat/iRamat")
library(iRamat)
df <- db_api_connect()
names(df)
# [1] "dataset_adisser17"
head(df$dataset_adisser17, 1)
install.packages("lakhesis")
install.packages("eratosthenes")
library(lakhesis)
library(eratosthenes)
load("ser20251001.rda")
library(lakhesis)
library(eratosthenes)
path <- "C:/Users/TH282424/Rprojects/iramat-dev/doc/projects/pci-archaeology/BIB 3993/"
load(paste0(path, "ser20251001.rda"))
# optimality criteria of the seriated matrices
cor_sq(ser_aleria)
library(lakhesis)
library(eratosthenes)
# optimality criteria of the seriated matrices
cor_sq(ser_aleria)
??cor_sq
install.packages("seminr")
library(seminr)
# optimality criteria of the seriated matrices
cor_sq(ser_aleria)
library(seminr)
# optimality criteria of the seriated matrices
cor_sq(ser_aleria)
remove.packages("lakhesis")
install_github("scollinselliott/lakhesis", dependencies = TRUE, build_vignettes = TRUE)
library(devtools)
install_github("scollinselliott/lakhesis", dependencies = TRUE, build_vignettes = TRUE)
library(devtools)
install_github("scollinselliott/lakhesis", dependencies = TRUE, build_vignettes = TRUE)
# library(devtools)
# install_github("scollinselliott/lakhesis", dependencies = TRUE, build_vignettes = TRUE)
library(lakhesis)
library(eratosthenes)
# library(seminr)
path <- "C:/Users/TH282424/Rprojects/iramat-dev/doc/projects/pci-archaeology/BIB 3993/"
load(paste0(path, "ser20251001.rda"))
# optimality criteria of the seriated matrices
cor_sq(ser_aleria)
cor_sq(ser_lattara)
cor_sq(ser_shipwrecks)
conc_wrc(ser_aleria)
# show a seriated matrix using image()
image(ser_aleria)
# show a seriated matrix using pheatmap()
pheatmap::pheatmap(ser_aleria,
cluster_rows = FALSE,
cluster_cols = FALSE,
labels_col = "",
labels_row= "",
legend = FALSE,
color = colorRampPalette(c("white", "black"))(50))
load(paste0(path, "eda20251001.rda"))
# verifies sequences agree
seq_check(contexts_20251001)
str(contexts_20251001)
set.seed(42)
result <- gibbs_ad(contexts_20251001, tpq = tpq_20251001, taq = taq_20251001)
summary(result)
summary(result)
out <- capture.output(summary(result))
writeLines(out, paste0(path, "summary_results.txt"))
# histogram
histsites <- c("El Sec", "Aleria I T. 67", "Aleria II T. 156", "Aleria I T. 32",
"Lattara US 35", "Byrsa II B 19.2", "Punta Scaletta", "Cavalière",
"Grand Congloué B", "Toulouse - Coteaux d’Estarac Puits 8",
"Madrague de Giens", "Lattara US 35168")
histogram(result, events = histsites,
xlim = c(-,50), ylim = c(0, 0.15), opacity = 0.5)
histogram(result, events = histsites,
xlim = c(-400,50), ylim = c(0, 0.15), opacity = 0.5)
dat <- result$deposition$‘Madrague de Giens‘
# hpd of madrague de giens
dat <- result$deposition$`Madrague de Giens`
perc <- seq(min(dat), max(dat), length.out = 101)
x <- hist(dat, perc)$mids
y <- hist(dat, perc)$density
hpd <- rev(x[order(y)])
hpd_10 <- c(min(hpd[1:10]), max(hpd[1:10]))
hpd_95 <- c(min(hpd[1:95]), max(hpd[1:95]))
hpd_95
hpd_10