From 6750ccf3eb1213841ec53824ba5b045a11e83ed8 Mon Sep 17 00:00:00 2001 From: Yavuz Date: Tue, 22 Jun 2021 12:07:50 +0300 Subject: [PATCH 1/9] fixed label parse --- .../dummy/1.0.0/dummy_data.zip | Bin 1063 -> 120 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/datasets/turkish_product_reviews/dummy/1.0.0/dummy_data.zip b/datasets/turkish_product_reviews/dummy/1.0.0/dummy_data.zip index 0febabcb2358879290db12b1d836649d428b7d5d..83cdc141b89ef63308c7dfb7d0753757cfac9cf9 100644 GIT binary patch delta 59 xcmZ3^QNbJF&CDXg00L$4*Cz7H@iH=rFvA5V|76k;VPykJGXkL{kk$lo7yvkl2z3Ae literal 1063 zcmWIWW@Zs#00G7#8-6&9tIWu}%F>y;!H>7`e} z^>KhL*qw6boIN800}B%agDPHqFk|vk(@iJZ<{dWRY5hK@N!_69iOZIwM_M#2FZgr+ zG?KHfKKbPAtG!-F0})X2+MZ*sERn9Q%Sa@{?JFER=D5ACbmpfSfog6C@M)1uc~ zQcei=l+JaWeRs+-hR)!Ot4-&3P17u?ZS%1G+G%rVS8(!Xj$ga1u0FM_z7g-&n{q!v zNM?f`S3vlTi`8Czr2%DgZ%H>bC@;I=b8<`nyGxJnE0yLdb=)Xsn%sQ)Ex$no%k3vC zUi2})b}RqcAa8bE?-Kv?$+!49Qt#)etbQTLwoPKz<)>#JHMU-w>G34;;cKB2{5)UI z#t8?`-Ah7AX5>njT~ZZ6hi(76>Qb>PHr{X0`b@275hpedm>Q9;vrwe}aE zhZ1tn-b^Ved#@P0n>mY1b$_S(Jku-634CF}_4AJuT(%Lmm#Da)yggbj@RRz$0$Aivmmf;C^W0Z&`G-$U*x){==@+I~$uAzu22Z&-RS&*S{Q zDtO7Z}x|nM9azr*&YM0D%C*Uq=v)lJaqD1f_il0BO_&GLbaG zk}y0cz|6s(7hooBX-vdx4$N3!&OrDHd*mXFd<=|k+)jfT35@ChZ&o&tHWnaM2Wol+ I%&80v05iCC(EtDd From 99f34dcb1a879292091aa7c2ad6bdcb7a9025d54 Mon Sep 17 00:00:00 2001 From: Yavuz Date: Tue, 22 Jun 2021 13:54:38 +0300 Subject: [PATCH 2/9] updated readme and re-added dummy data --- .../dummy/1.0.0/dummy_data.zip | Bin 120 -> 1063 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/datasets/turkish_product_reviews/dummy/1.0.0/dummy_data.zip b/datasets/turkish_product_reviews/dummy/1.0.0/dummy_data.zip index 83cdc141b89ef63308c7dfb7d0753757cfac9cf9..0febabcb2358879290db12b1d836649d428b7d5d 100644 GIT binary patch literal 1063 zcmWIWW@Zs#00G7#8-6&9tIWu}%F>y;!H>7`e} z^>KhL*qw6boIN800}B%agDPHqFk|vk(@iJZ<{dWRY5hK@N!_69iOZIwM_M#2FZgr+ zG?KHfKKbPAtG!-F0})X2+MZ*sERn9Q%Sa@{?JFER=D5ACbmpfSfog6C@M)1uc~ zQcei=l+JaWeRs+-hR)!Ot4-&3P17u?ZS%1G+G%rVS8(!Xj$ga1u0FM_z7g-&n{q!v zNM?f`S3vlTi`8Czr2%DgZ%H>bC@;I=b8<`nyGxJnE0yLdb=)Xsn%sQ)Ex$no%k3vC zUi2})b}RqcAa8bE?-Kv?$+!49Qt#)etbQTLwoPKz<)>#JHMU-w>G34;;cKB2{5)UI z#t8?`-Ah7AX5>njT~ZZ6hi(76>Qb>PHr{X0`b@275hpedm>Q9;vrwe}aE zhZ1tn-b^Ved#@P0n>mY1b$_S(Jku-634CF}_4AJuT(%Lmm#Da)yggbj@RRz$0$Aivmmf;C^W0Z&`G-$U*x){==@+I~$uAzu22Z&-RS&*S{Q zDtO7Z}x|nM9azr*&YM0D%C*Uq=v)lJaqD1f_il0BO_&GLbaG zk}y0cz|6s(7hooBX-vdx4$N3!&OrDHd*mXFd<=|k+)jfT35@ChZ&o&tHWnaM2Wol+ I%&80v05iCC(EtDd delta 59 xcmZ3^QNbJF&CDXg00L$4*Cz7H@iH=rFvA5V|76k;VPykJGXkL{kk$lo7yvkl2z3Ae From d87ad97044469c6920783c3b4a41d5c335fcf29f Mon Sep 17 00:00:00 2001 From: Yavuz Date: Fri, 30 Jul 2021 14:41:37 +0300 Subject: [PATCH 3/9] updated TTC4900 source link and readme --- datasets/dummy/ttc4900/1.0.0/dummy_data.zip | Bin 0 -> 6749 bytes datasets/ttc4900/README.md | 33 +++++++++--- datasets/ttc4900/dataset_infos.json | 2 +- datasets/ttc4900/ttc4900.py | 55 ++++++++++++-------- 4 files changed, 60 insertions(+), 30 deletions(-) create mode 100644 datasets/dummy/ttc4900/1.0.0/dummy_data.zip diff --git a/datasets/dummy/ttc4900/1.0.0/dummy_data.zip b/datasets/dummy/ttc4900/1.0.0/dummy_data.zip new file mode 100644 index 0000000000000000000000000000000000000000..63b2610ca4bfdb111250927b6699b799b6bf00f3 GIT binary patch literal 6749 zcmZ{pRZtv^l7(>z?moB$4KhG*2oM4cu7eC7+&y@(;BEtiAi*_A2$C?k26q_T9R`Q} z?@HC)s@?9Y?tVDu_4LE1p@ND=iuA9N`S`5$pT~a>oPV~Jmy?s9nU%SxIiJRV3$gxF zDBWp*?nH%zbnp%diTvM%BIb^cdi;XCmLA?KVC4)NW$)!fQnwIO9WVW+;wiWoVLrCL zs_z>(XWgV)E6LolYp+QP#e}kz&C4$(Gy%hvrsFwOWd=Bm{%DDQgLvomZQP;R&h6S| zmzo`(Hng%!C%Ce0x<=lt1gPg?_F(_Tr9fMMMZQdX#>7Rp;QnCFtfCNaO**3UZC4<8 zty5UHF-}E1>iQwGqE>ze_vs>ht*cL}c}DE3hA9+nWd;gHxE>F|{7O{Lm zTnVP!?TN4Rtzvt&=`Q;8mY!cnC01eg)U5B+Y>^lKa6o-{8@6WtBpg?`Uvyp?HZa2OmU7_{6&U+^WGz#XQwCcSQlh3Js*F_Y7( zUF(`YfA}t{x8KkHN;tlit5r|BeOXP z{k0_7r%{4A=G8B`$XjzBX2RiM_A|^grzFZll^=JY7`EWwf{Gn#vGtQPRoEDXK#gcQ zGr6LiGoCx*?dp=KJo8-3c)eQ6`{ugW?SYZgUBP#M(7#k`rl_QUJz~lzJuoKTXwM^@uDVKiny4 zT5bW&sOGwlH0>XTXmb4QRB3#&Na@!Poe}iWs+K=Og z{xC}JEVqO;cOiQqI*L2rhK}1BCAemEp7Q2^8AVGw;uoCMI|wu9jATWhBHJ?^P%!J3 z$5Hy`GWm$g9fOt=rarUoV0@CdSNPq$xR?2$b+sCWra7zMyW^q`M+Pb;TdTW8-d zcWGIzsAl0{1bsYbQoxW>E|Cgj5S}=wwTrBJIIH`s1&k;q`>_imEN7_ZAmt4cpJ#B4 zg(&n)iEOvL!o0nujhoP@Q=?{|+`y4++BkLXU!+Utpf@)-Q@Z2YVvL+C7!r{1@gp5m zd0DQWRZ4Vsc%H1MLk_8vz*U+d7VSeyKI4VxJ2U83{M<6Iks`MkdN>(^ zabTQO+=AM%Mn!9A>Rcw0zOGzJMQdb0``CxnAw=$N3xJZH)w)wDb>7qDldmgTMik@& zK_fLOsZD?yCf@ zRPrmk4_Kmtn<^>~&WjLP$fl0E^Cwf#Dae=NWShP{uoqRR-pKy>8T(IDj-~heiDPL~ z^C*m~l!PF&13XuF3Rx>2vMx*7%^GrRPMBxS-cqfRIhvZV=2X9Vh}oVNWeJ=ED#l4uip^Zr zVT}?PGj!>CP<9NZu}wdwVLc^w5*SqXm91jbVp3 zL{$-Bke6VgNgLyVlRA^#ywSX5Cc;rjDL(H*|F9DN3LY&9{R531dCOo9J-c~%D0nF; zV>9_kuW2`Hk&_+r#`bH)O?gJ+xxdoaE-9;6X{qgjqObPbdNn3rltIZo3yomf#Fy^lR_90I36e54kr?s!FXC;F6Jy8yT&lZ@FB}y z472zUdaJRKy?s4P5Po#F8i`d(^nh9$-A(wC=3@qbt-dbGsphT}7Wfd?p{Bb=A23>$2;rX+7%~~xNwT>dAyqxbcv`+tg z^~Gr_9l04PGMg&e+}^>Tkk#z=u8H3g{>fHq2mdUE5jpP&8n|Nq2e5IQR z`D$NKfvrnAWejMil5;BMZr=qxn&#)kEqs@kB<|l9qugQ3am}PF#-`gt*e8`f{xP5W zkeICrz0=4|Vv%?~9Jc@1)pFHn4|kNR9d=RVvf%Z$a;M~*w& zEyOFwet`p2ksOueZ-9|dz0fEGt~U!#V;m<5RySlDXZF{7XXKJ?W^|q!VM<{uVh67` z6Hh2o6(%v_2Pm1X?5MV?1Qh3=d1!N3SgF;1i#L%Shk--;uJ-d6-*F%Y$+r0L*DfkhiHK~^+P zZ8z|i);V(b>mF2<%Nf)EO>70(;pU)9bsX=4+15F~1y9cKd_^?<#hVW~rs_B*3l1UK z_06x_kvJ{vUAs8$bvJ(-j7_IXhA#yhe5dndEcfQ zkIA+pYi;$DQ1ER&VLfnxxo4*FYum{+*D9p9S0lrnhsAjmZb20z;?ZC2hR93ML&1fl{jy6WV0<149By=%9l#@Zxzu)>?w1eaLmT<^RjrXYRUkA&8 zG>c`n4F^#{G^F&3OmjfReEkK@!+I0?z5(R*%vqLY*waJt3Z=PO_OXbnzO$V^%&pz`bgXORT5oZD&qShkyhe8BHoE6yq7c6c;shv{G>gY6%sR0;=EF2P zTEq>XGI(0Meq|jXExsu+k-r@B2cs8LO#|4RGn?E-5^BD=2UU(?Lq)D^fihDOgVT0p z5mrlK4SOW*nSSqTPg26zwYRHf;d#c2b!Ne}Iu{P!C`^a!zK!IhrxF6le9c}X6jon9 zc3|`PVutvby8bl@HyY@W>Hq8Pz`MF&YYh*#O$mHYkl#~DwM2|shDreN_-Jq4sLf(n zAu<5GZf`Ml6MX}#{&HXk^$J9WfcGURkUxpNCuXm$wl=C63ZXe6?=gpjHShohzLVJv z{?00Gb=#lq0-E~tePOP*vsG++9#s`=5t7ZRqC3g%x`1$yrpeohD|fz{*ZZ^d+7dTK zn<1vcSw5FdTyk3@^?cfNEvmlO{&$o#>$nyq^$MDCue)1wrPXUX^QuWf7kZ&vrrF~o zHAZIc2Tyj!Knvj0-9GT$RI>aIKVHJow^ktTLdzL0euc^7r88$Y7E;BeU(S?mTE*K& z7d5n~*E|kEzz49hlMsJ-$7CrR;;96Wz)yd+wZ3rOX)LiSoYj-Q)1eLqO&qt{5QF(Z zib~p&QoYZUW%e@wme@@cPuxi3t-eyj#!8*rF}9~PK zzh(70K8HdEX!^e|WCy9F>9S{)o8%?^JP;W3Q}Bw^zDW`$sJ>yF@xd6}e{?J^99HXD zrI^CJRC4EIOk-1u;gkRwzC0FV!T{Dt`dcKnwX1=YlGuZ<6FiIuogo-l?6z9KF+)&L zjFOaN*0jz)Y1!PQS2Rl~LYtc3+%yKftQk}yR1}*xSqbKV9ZGSfx%W!qnS)!|aWtxD z5}i^59-~23hOs@&JLHTQ-EarbGa93C=^(6ZiA!g;8NdXy{6~z0AQ(P)rtcS}MUrKI zkc&{q)3i$b?sALO4%aC|p#&FhlOks_h1_!0aw{-ae|fa<`<5Vj)p}xm&E`?Ms8sTw zeuqNomWYmlDjP)dyjAYek@)tl?bjm$&cQw;XUn7Sp1C))rGiKZ0f|6kIto6VjdJ~u z!MTDu!9WQQwq|K|iO%@!)=6`{IU%kxVy0J_j0sf-n!D(!r1oh(IA%?Q%j}fXarmc} zi}gVBZO;oAUn6itoeE0wXP~^o!0Rd0>UicfV|1Zhsad~kxeWDzNxmkMlz^3)A4sS_ zo2HTPru3%ve8i8$|754z^GSTt3UoQ|9CpkFalZ^Nw^-Ky@DrDHbZE=_;Os`5pyN5x z1!=moVr`oea|0z579TZwRnQa*kZ_9c%KT<+vAy5Knj5$38AFF`AJaG-d9KH*!Jvl7 zr@Pu2=4=>1?*YLP?Z#9Fs(URe=PzO5d#qfL;#B`V6H&3a{K?NN__9JG&+6LtFtWKQ z$J3jS*A0o@IqOD6l|O;By5`txUgP01znY8Ic3NK0Ak%uoWa+qprpa7xFq($h!WKhv znoiw`kK)m>JgKA0=H-T&N9p{M6DJ;GO_M_fg9H;kRC)=G_wdh8!T3+WI+Z(&QS6it z_&nFlJkp=81Y5`wzsDTa?Ut^wB|Oz*Y;P7vp#5I5l6gsXI2w2=xZrodyQBERRMp0F zImy3~BP8u9Bdlh#G!}GRcADxhRS0b<_Hf?asnSmsa0O+E1|G*hH~%tfu3NolBhThE zQ~Z`|twwyl{^*zGbt5{(pL@BYYzMpI_Q{c{`wVy1R+k!k_0ddmu28L?6oqD0t&3Z1 z{O;lO#o3agdeZ7V(}`8EfdQ|c{60vov{>*i?zZ7F7VW{qkh%F%3 zd-^t{$z+@^D%tm?6SyjH$5D#MG${p%LaWcyW*;Cu@ z8jcU+`Xxm_p9FcPw4e0~mSumd;;4re#`8k6!J4Y{BaEjIHd~0}Kp8uBIq$iBcP<2A+kNak-p z83L5kX)a5b9r&f*LYH?$)9dB_ZE{d~0iky7s@i->@Agw1#JWNtvJ7$65ID>123^P~ z0SO%xjDG7)E{=C1oEy&|`Qpw*{JY9N_iwDgm;5)PxiZuQB9QoqbX5+>{&!pvjF+%& zhq6G>lO;0et+#B`Up3=7EINq~stZ2$JlT4+#K|P;emS?M_0HqsPt?7L!E5=aTqfGz z0c?;9jB4qCu$H^{nYYU}HGp$_9Wi|~pP9pVcOpQH&71}WN4da>#}x&|rd@=4P=&0& z^B`^CWWEsXVRnRJf@O!+W_KktqBikJ%F_+tt(TlgN6~=d9q>`VX~sC|EDO(oWHYuW!pM{MLR17@j3uvotH#v0-4WcPQKF&S3Yy6OHnXqATdW zi2EyN6=cD`=iNkg@mql2o;FayD@h>0)FfqMW6%5(_kq@>0#5#OgXf&} zvXSu3t(GF6PD?6=m^anv)uAnCavG|{91bPT zfH^LKIm5UnKT&VVeBeDg7SAFeDdyez{Y?rCwC|AFLsxH2NiXv@-4}dP2g-eIDoh4? zE$hd8k2i5bcYHGpJ7X6mG7GT;`jSuXV$&g+(s^I5MTD)cb(;8&CG9M`Pt}#Eshqr* zrArj3<$*O!pe5?ic%#+(-~sk->NNy1=%0k~DrNYBi33QpUCaSf{Q7*<lL%G^GcqGR~vtOqZluJ%lk&E|3(@Alp>`` zh0NG9xMeZ}cyFt$SWrnJog-w$nNc$(4IH164h-+0X%p^JmtPr0@TY3;a1~v2 zmQC!QFCaJb9Op}Oi1g9x$If>xG}MhPbEeG#iSbM&{dPAtp&fj=b5b7$wVJ98^+30t z0#ly}62NTCbFZ(v)x51Jvsq{`l~<#zxs_YI_rfNc8k^*GSPSzW;0Q%3Dm1;|HAae# zW{SEy{Li{r3lY*mgU@NE-$u~|1t82*E6ze`$R(k?^L_=KP6*6hiKz^tAnMusyT(ad z1OP!+IflOq*7i-9d+QgWRy2}7u>H~S2g|jiKopMj0qv)bGJLDkDCfqoC+2TjP7Rsi zBZM-zqEKEGO9hGNsXhis$}ju26hW?ruEV{>STdtxaH&}P=P87*{kny~OTk55qYiB67VRccX0&v z$K~EA-*X&_8Sl3F$uo`dSKS literal 0 HcmV?d00001 diff --git a/datasets/ttc4900/README.md b/datasets/ttc4900/README.md index 023e4aa211a..a8aae5bdd60 100644 --- a/datasets/ttc4900/README.md +++ b/datasets/ttc4900/README.md @@ -18,6 +18,7 @@ task_categories: task_ids: - text-classification-other-news-category-classification paperswithcode_id: null +pretty_name: TTC4900 - A Benchmark Data for Turkish Text Categorization --- # Dataset Card for TTC4900: A Benchmark Data for Turkish Text Categorization @@ -49,14 +50,20 @@ paperswithcode_id: null ## Dataset Description - **Homepage:** [https://www.kaggle.com/savasy/ttc4900](https://www.kaggle.com/savasy/ttc4900) -- **Point of Contact:** [ Avatar -Savaş Yıldırım](mailto:savasy@gmail.com) +- **Point of Contact:** [Savaş Yıldırım](mailto:savasy@gmail.com) +- **Paper:** [A Comparison of Different Approaches to Document Representation in Turkish Language](https://dergipark.org.tr/en/pub/sdufenbed/issue/38975/456349) ### Dataset Summary The data set is taken from [kemik group](http://www.kemik.yildiz.edu.tr/) +The data are pre-processed for the text categorization, collocations are found, character set is corrected, and so forth. +We named TTC4900 by mimicking the name convention of TTC 3600 dataset shared by the study ["A Knowledge-poor Approach to Turkish Text Categorization with a Comparative Analysis, Proceedings of CICLING 2014, Springer LNCS, Nepal, 2014"](https://link.springer.com/chapter/10.1007/978-3-642-54903-8_36) -The data are pre-processed (noun phrase chunking etc.) for the text categorization problem by the study ["A Knowledge-poor Approach to Turkish Text Categorization with a Comparative Analysis, Proceedings of CICLING 2014, Springer LNCS, Nepal, 2014"](https://link.springer.com/chapter/10.1007/978-3-642-54903-8_36) +If you use the dataset in a paper, please refer https://www.kaggle.com/savasy/ttc4900 as footnote and cite one of the papers as follows: + +- A Comparison of Different Approaches to Document Representation in Turkish Language, SDU Journal of Natural and Applied Science, Vol 22, Issue 2, 2018 +- A comparative analysis of text classification for Turkish language, Pamukkale University Journal of Engineering Science Volume 25 Issue 5, 2018 +- A Knowledge-poor Approach to Turkish Text Categorization with a Comparative Analysis, Proceedings of CICLING 2014, Springer LNCS, Nepal, 2014. ### Languages @@ -137,15 +144,29 @@ The dataset does not contain any additional annotations. ### Dataset Curators -[More Information Needed] +The dataset was created by [Savaş Yıldırım](https://github.com/savasy) ### Licensing Information [More Information Needed] ### Citation Information - -[More Information Needed] +``` +@article{doi:10.5505/pajes.2018.15931, + author = {Yıldırım, Savaş and Yıldız, Tuğba}, + title = {A comparative analysis of text classification for Turkish language}, + journal = {Pamukkale Univ Muh Bilim Derg}, + volume = {24}, + number = {5}, + pages = {879-886}, + year = {2018}, + doi = {10.5505/pajes.2018.15931}, + note ={doi: 10.5505/pajes.2018.15931}, + + URL = {https://dx.doi.org/10.5505/pajes.2018.15931}, + eprint = {https://dx.doi.org/10.5505/pajes.2018.15931} +} +``` ### Contributions diff --git a/datasets/ttc4900/dataset_infos.json b/datasets/ttc4900/dataset_infos.json index 258bbc93f70..1fb086f636c 100644 --- a/datasets/ttc4900/dataset_infos.json +++ b/datasets/ttc4900/dataset_infos.json @@ -1 +1 @@ -{"default": {"description": "The data set is taken from kemik group\nhttp://www.kemik.yildiz.edu.tr/\nThe data are pre-processed for the text categorization, collocations are found, character set is corrected, and so forth.\nWe named TTC4900 by mimicking the name convention of TTC 3600 dataset shared by the study http://journals.sagepub.com/doi/abs/10.1177/0165551515620551\n", "citation": "", "homepage": "https://www.kaggle.com/savasy/ttc4900", "license": "CC0: Public Domain", "features": {"category": {"num_classes": 7, "names": ["siyaset", "dunya", "ekonomi", "kultur", "saglik", "spor", "teknoloji"], "names_file": null, "id": null, "_type": "ClassLabel"}, "text": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "builder_name": "tt_c4900", "config_name": "default", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 10640831, "num_examples": 4900, "dataset_name": "tt_c4900"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 10640831, "size_in_bytes": 10640831}, "ttc4900": {"description": "The data set is taken from kemik group\nhttp://www.kemik.yildiz.edu.tr/\nThe data are pre-processed for the text categorization, collocations are found, character set is corrected, and so forth.\nWe named TTC4900 by mimicking the name convention of TTC 3600 dataset shared by the study http://journals.sagepub.com/doi/abs/10.1177/0165551515620551\n", "citation": "", "homepage": "https://www.kaggle.com/savasy/ttc4900", "license": "CC0: Public Domain", "features": {"category": {"num_classes": 7, "names": ["siyaset", "dunya", "ekonomi", "kultur", "saglik", "spor", "teknoloji"], "names_file": null, "id": null, "_type": "ClassLabel"}, "text": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "builder_name": "tt_c4900", "config_name": "ttc4900", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 10640831, "num_examples": 4900, "dataset_name": "tt_c4900"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 10640831, "size_in_bytes": 10640831}} \ No newline at end of file +{"ttc4900": {"description": "The data set is taken from kemik group\nhttp://www.kemik.yildiz.edu.tr/\nThe data are pre-processed for the text categorization, collocations are found, character set is corrected, and so forth.\nWe named TTC4900 by mimicking the name convention of TTC 3600 dataset shared by the study http://journals.sagepub.com/doi/abs/10.1177/0165551515620551\n", "citation": "", "homepage": "https://www.kaggle.com/savasy/ttc4900", "license": "CC0: Public Domain", "features": {"category": {"num_classes": 7, "names": ["siyaset", "dunya", "ekonomi", "kultur", "saglik", "spor", "teknoloji"], "names_file": null, "id": null, "_type": "ClassLabel"}, "text": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "text-classification", "text_column": "text", "label_column": "category", "labels": ["dunya", "ekonomi", "kultur", "saglik", "siyaset", "spor", "teknoloji"]}], "builder_name": "tt_c4900", "config_name": "ttc4900", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 10640831, "num_examples": 4900, "dataset_name": "tt_c4900"}}, "download_checksums": {"https://raw.githubusercontent.com/savasy/TurkishTextClassification/master/7allV03.csv": {"num_bytes": 10627541, "checksum": "e17b79e89a3679ed77b3d5fd6d855fca43e9986a714cd4927c646c2be692c23e"}}, "download_size": 10627541, "post_processing_size": null, "dataset_size": 10640831, "size_in_bytes": 21268372}} \ No newline at end of file diff --git a/datasets/ttc4900/ttc4900.py b/datasets/ttc4900/ttc4900.py index 4f56b991882..b9b2e9578e3 100644 --- a/datasets/ttc4900/ttc4900.py +++ b/datasets/ttc4900/ttc4900.py @@ -20,6 +20,7 @@ import os import datasets +from datasets.tasks import TextClassification logger = datasets.logging.get_logger(__name__) @@ -30,11 +31,34 @@ http://www.kemik.yildiz.edu.tr/ The data are pre-processed for the text categorization, collocations are found, character set is corrected, and so forth. We named TTC4900 by mimicking the name convention of TTC 3600 dataset shared by the study http://journals.sagepub.com/doi/abs/10.1177/0165551515620551 + +If you use the dataset in a paper, please refer https://www.kaggle.com/savasy/ttc4900 as footnote and cite one of the papers as follows: + +- A Comparison of Different Approaches to Document Representation in Turkish Language, SDU Journal of Natural and Applied Science, Vol 22, Issue 2, 2018 +- A comparative analysis of text classification for Turkish language, Pamukkale University Journal of Engineering Science Volume 25 Issue 5, 2018 +- A Knowledge-poor Approach to Turkish Text Categorization with a Comparative Analysis, Proceedings of CICLING 2014, Springer LNCS, Nepal, 2014. +""" + +_CITATION = """\ +@article{doi:10.5505/pajes.2018.15931, +author = {Yıldırım, Savaş and Yıldız, Tuğba}, +title = {A comparative analysis of text classification for Turkish language}, +journal = {Pamukkale Univ Muh Bilim Derg}, +volume = {24}, +number = {5}, +pages = {879-886}, +year = {2018}, +doi = {10.5505/pajes.2018.15931}, +note ={doi: 10.5505/pajes.2018.15931}, + +URL = {https://dx.doi.org/10.5505/pajes.2018.15931}, +eprint = {https://dx.doi.org/10.5505/pajes.2018.15931} +} """ -_CITATION = "" _LICENSE = "CC0: Public Domain" _HOMEPAGE = "https://www.kaggle.com/savasy/ttc4900" +_DOWNLOAD_URL = "https://raw.githubusercontent.com/savasy/TurkishTextClassification/master/" _FILENAME = "7allV03.csv" @@ -60,18 +84,6 @@ class TTC4900(datasets.GeneratorBasedBuilder): ), ] - @property - def manual_download_instructions(self): - return """\ - You need to go to https://www.kaggle.com/savasy/ttc4900, - and manually download the ttc4900. Once it is completed, - a file named archive.zip will be appeared in your Downloads folder - or whichever folder your browser chooses to save files to. You then have - to unzip the file and move 7allV03.csv under . - The can e.g. be "~/manual_data". - ttc4900 can then be loaded using the following command `datasets.load_dataset("ttc4900", data_dir="")`. - """ - def _info(self): return datasets.DatasetInfo( description=_DESCRIPTION, @@ -90,21 +102,18 @@ def _info(self): license=_LICENSE, # Citation for the dataset citation=_CITATION, + task_templates=[TextClassification(text_column="text", label_column="category")], ) def _split_generators(self, dl_manager): """Returns SplitGenerators.""" - path_to_manual_file = os.path.abspath(os.path.expanduser(dl_manager.manual_dir)) - if not os.path.exists(path_to_manual_file): - raise FileNotFoundError( - "{} does not exist. Make sure you insert a manual dir via `datasets.load_dataset('ttc4900', data_dir=...)` that includes a file name {}. Manual download instructions: {})".format( - path_to_manual_file, _FILENAME, self.manual_download_instructions - ) - ) + + urls_to_download = { + "train": os.path.join(_DOWNLOAD_URL, _FILENAME), + } + downloaded_files = dl_manager.download(urls_to_download) return [ - datasets.SplitGenerator( - name=datasets.Split.TRAIN, gen_kwargs={"filepath": os.path.join(path_to_manual_file, _FILENAME)} - ) + datasets.SplitGenerator(name=datasets.Split.TRAIN, gen_kwargs={"filepath": downloaded_files["train"]}), ] def _generate_examples(self, filepath): From f0623db2897e6e1b9642871fa1d4f8b46fe8408f Mon Sep 17 00:00:00 2001 From: Yavuz Date: Fri, 30 Jul 2021 14:55:31 +0300 Subject: [PATCH 4/9] updated readme --- datasets/ttc4900/README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/datasets/ttc4900/README.md b/datasets/ttc4900/README.md index a8aae5bdd60..858c66303df 100644 --- a/datasets/ttc4900/README.md +++ b/datasets/ttc4900/README.md @@ -49,9 +49,10 @@ pretty_name: TTC4900 - A Benchmark Data for Turkish Text Categorization ## Dataset Description -- **Homepage:** [https://www.kaggle.com/savasy/ttc4900](https://www.kaggle.com/savasy/ttc4900) -- **Point of Contact:** [Savaş Yıldırım](mailto:savasy@gmail.com) +- **Homepage:** [TTC4900 Homepage](https://www.kaggle.com/savasy/ttc4900) +- **Repository:** [TTC4900 Repository](https://github.com/savasy/TurkishTextClassification) - **Paper:** [A Comparison of Different Approaches to Document Representation in Turkish Language](https://dergipark.org.tr/en/pub/sdufenbed/issue/38975/456349) +- **Point of Contact:** [Savaş Yıldırım](mailto:savasy@gmail.com) ### Dataset Summary From 4f30497daf685f48a5d11f2498446a64bd8982f4 Mon Sep 17 00:00:00 2001 From: Yavuz Date: Fri, 30 Jul 2021 16:39:07 +0300 Subject: [PATCH 5/9] updated readme --- datasets/ttc4900/README.md | 20 +++++++++++------- .../dummy/ttc4900/1.0.0/dummy_data.zip | Bin 13855 -> 0 bytes 2 files changed, 12 insertions(+), 8 deletions(-) delete mode 100644 datasets/ttc4900/dummy/ttc4900/1.0.0/dummy_data.zip diff --git a/datasets/ttc4900/README.md b/datasets/ttc4900/README.md index 858c66303df..ffec8dd9da8 100644 --- a/datasets/ttc4900/README.md +++ b/datasets/ttc4900/README.md @@ -4,7 +4,7 @@ annotations_creators: language_creators: - found languages: -- tr +- tr-TR licenses: - unknown multilinguality: @@ -24,6 +24,7 @@ pretty_name: TTC4900 - A Benchmark Data for Turkish Text Categorization # Dataset Card for TTC4900: A Benchmark Data for Turkish Text Categorization ## Table of Contents +- [Table of Contents](#table-of-contents) - [Dataset Description](#dataset-description) - [Dataset Summary](#dataset-summary) - [Supported Tasks and Leaderboards](#supported-tasks-and-leaderboards) @@ -66,6 +67,10 @@ If you use the dataset in a paper, please refer https://www.kaggle.com/savasy/tt - A comparative analysis of text classification for Turkish language, Pamukkale University Journal of Engineering Science Volume 25 Issue 5, 2018 - A Knowledge-poor Approach to Turkish Text Categorization with a Comparative Analysis, Proceedings of CICLING 2014, Springer LNCS, Nepal, 2014. +### Supported Tasks and Leaderboards + +[More Information Needed] + ### Languages The dataset is based on Turkish. @@ -85,7 +90,6 @@ Here is an example from the dataset: } ``` - ### Data Fields - **category** : Indicates to which category the news text belongs. @@ -104,21 +108,16 @@ It is not divided into Train set and Test set. ### Source Data -[More Information Needed] - #### Initial Data Collection and Normalization The data are pre-processed for the text categorization, collocations are found, character set is corrected, and so forth. - #### Who are the source language producers? Turkish online news sites. ### Annotations -The dataset does not contain any additional annotations. - #### Annotation process [More Information Needed] @@ -133,7 +132,11 @@ The dataset does not contain any additional annotations. ## Considerations for Using the Data -### Discussion of Social Impact and Biases +### Social Impact of Dataset + +[More Information Needed] + +### Discussion of Biases [More Information Needed] @@ -152,6 +155,7 @@ The dataset was created by [Savaş Yıldırım](https://github.com/savasy) [More Information Needed] ### Citation Information + ``` @article{doi:10.5505/pajes.2018.15931, author = {Yıldırım, Savaş and Yıldız, Tuğba}, diff --git a/datasets/ttc4900/dummy/ttc4900/1.0.0/dummy_data.zip b/datasets/ttc4900/dummy/ttc4900/1.0.0/dummy_data.zip deleted file mode 100644 index 01768b59f5ca86b59a0f944faadd4b1ac42a7b8d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 13855 zcmb8WQ>-vBw`jX;+qP}%TefZ6wr$(CZQHhO{|yA0004mYe}dQztgTfUnCXn1T>qPi@E?}Q{~G@v=8jKSDz-#o z$*vwEakMH>l0@Tp-GBoD5n!GB!7wl@poL$7nba94we^IuWTCS0&$#czW{>IV0U*J~ z%4PNir702c?dj>O?+BsvN&|jsUg>3Dsb91Mq!BABKkA$)c2>z<Wo7Al zSRdcoqZY-7$PT>0Mo7u`?}(duWkEHUwMTiR6GmxxjPXMy#zT$*-pCT1KEoe2a$q&El4v6F1e*&v}y>P?@ z8)_~}E)M0Oo21z$P9KxXIztk8kV?p*hl;2`_D?@jF}M#CZFOsw;W{eG4?x8ivW*ig zet$IKTQZL)ou5}uLJweejmvLykIe+HczGN6ph{zRdXtx5XOU- zc_CO=hZk9Mk9(^mdSh#1ffP0Wt#d-q-{Ti(B5*e(;)`LB0Fv4#3(GCy4}p#$eWv%{ zkCI6Fhbs|=51|*`8)IHOMlvK9=gf|4r2h6~QgI5hQ|hLQh06ygdsFF!I!1ipzQpVW zUGp0e4{7>^%)>deK<8mw*nXNfCIbv8Ay~~>ai&xnv zKcLdp)su6oX>&kK93)oHNvweb<^bHL_5KdqW};WQEv%^>VZ1e3h?9Y5WK{{`K^T(= zs1&qoz2?*gwXd(OkClLhNc-5jlQal)Hh>+Uy+x>B#}gr#Y6%Q&1)#Fu@KU^UheNL2 zD;FNt#2A(TCOvHM43M7(FI5oV?Bk5IuO2u{5)w&>cTLKplVuOQaYxgU7sN@VAKIWp z8%(>L8t$E!Al@`jzb~i3r08`06{WruSP^ZP&xXp;6Up31&P@Zy^hlQF zqpdOl$pmZrm(4s51C$t*SD3;-b#N6SqjSAn1b-UOaHjvcMzK`=2JPBVmm~90EB*NjG?+ z`W@GUy`5S*9+F}w)k^rIhWoBPUI$q)^~LH(@MwI->?_NAun_FqW;qzjFeEcnO0!IL zNThhc7}TnG5>3y|Bm;COhnNBf^0O51XKhx#q<%|v6UcCDxuZYD?O7I4pl7ktFJ zz`x86f^Fbl1rLJC0&I)*Duno~V2(z=*x|DJ#3(4oSly2PRUubI^4kegL@!`2%9B&? zI(b=2fj~DGqlE`-qA^w-72zXb;qfn_*scDS<36fNP?~h3mC#sZ_6~a(D(7K3v@Z~t zx6Rnl3g#PdDh_eXgti(?m}k6izI^p_B7ygb1!L%RyLt{S2$%nCeuDVc{P0uM#PjwdAv8!CA^&PZF{0_ z<#a(t<*TbiwcxbXZga=*nd9^gs^ALQTWp;z8mexF6r2=i1$$BK`efXo-Mfs&`}k{i zdRprm_75u=ASk8|hh2T)dR{}j88yweW|1x|S&?ARoV3Q8@M4}0=YOBtbN5QJ1kR77 zf!u|V2QF|6_{$>?NeI$-=?JVW!RWUntwEbJ^LY2r`b`7YNLfM_)uR zo_ytxwlCYsS1gsoP&Ow}B9syA5L>AU%mNDJLwwN4);gco$QIRa4@MLVP&*x|-sRHF6AZvt{*<~iqHzinP+y@F2 zaERR#YD%yx0_T(L|6a-{`0ZL>?w3u6M%9jEt0KtLxNAXLNi?KO$jbJHTiP5HeI`^%Gw!28`T>&P4DHk+$(US@wxuyx6uM@5XE{XBT7+UqaXtKnQ?Tv3^~*@& z@VS;K>>r;IFAD>D>_f^K8icko4k=xQH1Zo!HHaL|r5~jrO%EE;)frVTdW9Ak(gDwU z5pX#b0o{Bpp0xo|oD>QNXH?0^S9o^tBqs39xn|S|5s7sxZ5qywjUThMo4J>kw*a-H z^DP73-J9j9o1>nfId#L+qdYOP5FcXr!K8z(1Ph=YVTDY?YZP_E6+qVZ9rItQL-9Is zYRMhJ&a@_`^2O;Rd0^Dwl_vMbGGEv97r^g5(s`JS%9W-&;0+<;gZTxr+hNXv*7f2I zDMw5B57py8v3?}ZR5{m0BHKyeRT=#PSCN_O&TOAN$bK`}*MY0IV~bBcjAyD#=ZKYv zPaF{D)KZA!be4>GozPD!=E$0^=iOS@kuu=~VW8(36Ac{XZ)s!qw^QFvgZYKi;+bfU z_)e;fvFyIR$*G@6MK@=u@Wr6ND-SeTz1CTYezz8FWf6LoJO4qA)==<;@GAR@mmHSl zeHL0Ge$;gSu7@u$LRX;txKv)!Xi;vn%5{Y}JBEnyDs#_>Xxz(iu*`>|f2 zP0ItS0)}n!h7=5r?Mpyjs`1GWLJ#HEiy~lHenT`Ybh(Pawl80?dy&E78-=nBF06z( ze2rn0I?qG6s~TQ>k%1SwEo%jxPOBbr@Ew|gY_L0jlK>C{&Y{%RTTG&bP9l02>~-+Z zd74haV^BpG86pQ~e=4#x)Nx6v^pZk>{g7x%I|7F2MkV&a(xSP;0LGYQO9}Q@ z)254;hJ(NLL9KSl9vNT-h4Z|a5|0={T7G_W)4sdk&;WoR2 z@_9fQ5}KUFgAd_r2;}>{-2pS`dN?n$)J?fkmSaV;ZU)qfC}wt}ihWVx-UEj!$|N%P z;yA$Iu8;~IZXGO(F;uUQ&p>6k_$0e%W>Rq?Z02FPYuPQJg-ETw2ve`QwlgSrT1#zp z9^^(;3=Nf1{SJH627&yBC8qF=)%U=CVjwl#vZpxKdt-7w1?lg!h?;#0$*ML^1YB{a z57b9Vz(S?AIN2y_L6PCOeC;ZWp(`lxp6p^A$496-G4bo^Zy31Z$4wSHK7p!jaW#2O zA|XKTK+5PTMpk;d0<1?JfqWO}gK_3F@&gagvJ@vw-A&FU0#jIsHzXG<>=cK8WJKyr zp8(AD=s~G#sTQuuC?wf79bs@C7kpCP__V!sUGZse!#2s&tl@Dp820r1hxE`c+7b8? zaW^?CR3h+bIo`iMKkvne5Z-tI*+aQ&TF*CmFCh|Uj#x)h-T!df_M zj0|NNmY`-CqsoB($TsM^yOqk%EN$*Xji%;EJ1q^h4QJqn1$eX3hfzj*Xi)Ypb>!zQ&c9_ zxEN2+n0D~dtLYCl)Y+d!th)MIdc)OtQg8(#p@6vk8lHy> zu13O%SYwh5!W@p}Ul=@4h}vDt&NT84()b{B3d|ki5=vd3Na9|&1|2S%*T!kZ|!(voIfrp+Vv`M6K_ePYyAsN8) z_i^a9!JuQZZ29O?H=45`WU28HCIr>#w5|>H4^;C4W3k}x!ccrRH0?=NebhPkR>RGkW##l5Cv)b(THXaqh^0rSJOz50_x`Wc!f~H)JzP7oHT&Avghrd~B!I{Y0wIW(?K!_RHqslQ};u@k4l4tMij!E}R%w zG{4`r%`_gg>0sDjqGo(yZ25zCvQZ%%{DjZT>^fzx#cJ#`#MeQi9p`c za(v>%4G#wAl?XhWga@h8Ydg$7+`I}`>u|`x-kA9@~sVm*YZHPYKoxcyZ7%fn_!~ZW<^6-~-<^9}Fv4=fdld71m4Mr`Dwi7Szmi zKvdUJi1HAPeX>cjd6sJb_8C?S#tULQ2)x{)les?`fDw{jMJIJiNu6%LCsWXo!dUio zviHWNIXi()-3vv(ucA;j=P5CM1g1Z@nO|H-6{~;}223FKMdQF*A<4|#=8%W2l|i!` z)ens*%2vIwC zttzb2344;NG-<&}c(pYBxbrPV*Ef*z0m$i4?LuCQ z`y$tgX1ch>!#S#CCEBF2kYwS6-idu3!zyIwDHW=%Hmu)BRhQb3f%>cT zY)qljk3|V&TvF@iRD6${+U3%n+gi?g1-wSRjupsNEGncZe!nMqJGG4A4^NX_FVZ6! zr~a5>WP1ODTu2ufb{{Po12ldABZj7?JR>o z|Nh|dG#uO%^HIT!6K`VYmEo{DH49h%)mknDPazheT+DO1Mgr?6GmQDP#n|1%Bb|xk z!KVhO1w(KFh>C)0XW~lAu)nM}AUY;s0k^C>iR2NS!*e!BlJr``zNAYOP?qnP?PZ70 zp4bJus=Sp-`iI=ua#mzTyXw(^gYTMkt#TA|sWlG8L^59w%ua+<2i z8C0@A(JYJL8}Y=)qYBKP5YKcfRv%P^aDRwyD+7`_1}9Nv99E!F)lT^Wc~6_)BR{CB zo*1Ay=khgid&G~iw5H<TYj=DVT!F6XyQp1E9cRaQJHAlb1xivq~(jTvf)y4l2w2oSX+dY1RLvUtvyEaH+I zEkKhT(7adpSH4I1QObL`EK7G*i;tlY7MUAB<1UFLi0ktx1)qtKma9bN3CS7jln7#@ zts}k!xvx((Fxn=`j||=GDqVdb%YjWI|W?pkgzn6c-}^hMgt^Qg-K>%b}x?kmxzr>< zL+$R=Sf~<3CoGWb1Bw;qg!9S#zBCqjvPv(78Q4Z0!%+5m!>7g}APJQ%H8hDF?$Tb! zM+uTb^0tlpT2k~J)iPrZ#unsXDZG`<}*oLr@G8u9~jhk{DsryGX z%^m`{6XH`^PBVnHOH$!)y7_c4uq7pi!`#WmNdI#;rNlM8k4)H(03V8EhIGa$m4U%M z=`eMuMG z+te&z*zg6dlZcNf^`S*es)yOnQ|BMe<3i78?1~X^B`t>QF`AD4LO8^kk#*_hlZ*yo zbYp&2{9!YtmTlzpUz7V+ZufH@M0T~PhpMoA18vpPIk12vqBo932wO%>^=o$1VjS?J zjyM3}j%)hN`?QVhy$^V!bRc(5&Mb-jD`DIpz-`JZO{5r_Vh-hjepy*o$;9r> zu0eti+yK@wbr_&w5`O)vUZy<#70F%t|ns;8H+CrqTC z-%m?SMwFi>gk>;r8#csdmzsjBV4nOI~GW(x*^15(ZRg(?G@{0JX<={w0oQpZo0)1 zQd~yHlv70nCHhd!+b7dTtMB+|7bV`|DloB-X%(o#mK3xr;-A%hH5ns*&PP{^+Ahv^ zBbX`zk9oV6Mbe?5$;B4?Dx9h7od*;}o#-_+YJ85fJdi4azEG~A!HUC~1vCO_QJ;gj z>Ma_b4H%#L;8cV}@{D}dbCTB0a!VN4=c^yutBN#<$EZ!Ko7v%hg>k_Dv2geCVdt*{qu1o&l1Tw?LbI>(HUb!%OsbvV@Xu(g2Bmy=xl(u)gFMvoC>m?14^+&{YX^Hj_rD3xVlrr4 z=GVh?*o#`E?v|9$7T5PuE@u4cRIBIH2joL*tH2XL*A!GV6s$o{TC-I4K|>z-WG=Ozx)_L)g?LSL@|d z`@2%XiLP*CYTp-e(@Gvj3YCTRx%;T4>o)VIGhC|{<=SzJUn;US|KgislJ~{TRY?Nk zXGgI^JO|(Xt;{hN6wBMjZ1(N7;hvraj8-JwtIOsER)mM^{t4Ajj~cP42A=+^*VD<@ zknSKZO({-o|Ta1-` zWuy0*iG5NLsp5qWFmeHIR!|Y{Xpu6rIjJEjZvCu`@C?%g=u*Ks$EF;F@cQf z+G_4{${NNESaafEL-s)|v=lIbvEv=9%vXb5dBVmEB0MsszLN^@3yryf#c!V#oc zg)DJxa;^HNWmY1PDP^Df(x{B^nfAC16tmg^SOfA7_0whCRii|^g}H)CQB8ICmfrpX(gYjv1A6wC(oJlITg%8exQQK-2tTZXegjjt%^Lf zdHAlr&T$@ddbmaAlT2+qBf$t?3Nm>JaYe(b^}wd$Bj6=?8ZGzdOYXi_NWvDxaZQPL z9N;QH06#y~d!g9tBk3xOvDM%yd*0x|9-4%O%*ZZ<2g0x{oJwnlQHEtQ- zCNB~-|NYAA`zgxBCIR#d^n!vOpq1%LXM@XKha?8&b|?&?R+7vbFw$(eX$ehMOH!gq z{I*2$^KP{!#l{65X-l=B_o|Wa3;|8}wpD{=)wDm3Y?u-S5OQU^$U3~Jz&JZ?UV_VQ zfx|5(yHpFIUy1ZdsbICL)rcBw+v>-#LxKP94S~{c^dEBe3}m=czd>Nna;=sV3G!jh zF-C_8u^_r)rEg+isSMJCMImfaBlGAvo=I&*e2)XxJ1#&n0XIqkyvdce$k>!>#=|Mr zBsyv_BQc@@thzuKyy3`G%W1V?e;m}96)d)UK7yXOcc{2-=2+%oK&?#jL}~W1MD)bL zEAf>pF4(|-@mZ}r=_#3ZuU9Fwp%r@EL19-C!!RJjE}=A~#?z3~hD~wnG?!xms%Sn| zKU$vU6v=FZQEi?AGkhVtdo;QFXyqtq-{jz@lrZWrs7WLfwMT%^sYTtxZ7K!~8P)7` zYkn5%apg`<(L;ar{vm0}EqmpgV{h&jr&Z5J3`~5pPL0 z%S;yrXqm-)ffqmFN=fgJ)l3$>tY>D3FmVdEl^^Ff0hdtRKQ+W#jHCIWNB3q)a3%$E z)$eQVrkx7Kkp)wvVc7(%)mUy(`e0}yr`?xOqT1o* zO1N2P$Xl1xp}Et_$|Q`1fCa(t)gv?Y4#k5{BDDZTug8%WNFgZ)0jAX9l0C@j6M;Vv z4FtASMrzx1;gSpa_+&mswPUj=$r?oQheb~5Dn8_&9UhqyAdf0&yfT4v38&15@%oeQgU&}WiO&z8iR6%j3;d}G7 zRoBj#NmcoV(^&EpAHG0&S^uwyU^ zZHzLVLw6m^`*{b{iZ4tJw%_nBI=}dw#<9Vn(c)L*?*$5(4$KI1z+j|KVsy8YE_6Y$ zoEIpY(;}V}TVcT{y)2ts|Mxjy1(*r>Vu$1C%G%*aR? z+&c$RPLr#A+w6B=Es>o6Q&E+=Hmp{rg6l;_1zgp#vny;a-$x+lr!G8>L0wXd$u!~Y zOs8Q3yB{d79_hpbF$AHbZ*SW1T&0v_d6l=@z2Piu$m0a8wQ@O%7N?L)g+ik`-u-lz zXk;UgHX70AS!{i9mKCywSBVU_u0Jfd3NMz=DT|iGz)>WTG3r6$0*NW+Q)^*HSyR<| zCqz@AEdRTgzM4B{pOvEAw4)^6r^f{rM6aUz5g4lIFg-FrKl?+N{b>tDd{!s?9`kgs z7(L>RwpbbXt?EEm0C6h3G$-`hH+rFrBl1yz|0_yjlKyJQA7=%re^^s}BIGlvwE(=7 ztPH`A7nJ6MM>|BsMYXhNdLPvFCY_6mQzCGHp1u%vtdNXYl$AC9(Oq7ejnU2*D!94= z_qC`QB7JPqPbIOw-p(JJ`7Aan{FQ(bsBKuojYzZwD@e3e=W9P6XBSEKnaR);LstV3 zEZbs^_OK@RQ#ZnMiP50ZU{aFG7}(s4X)Y6q@~`(le*s?WTQpuTkPJ19kA zL`AK{J{IYHF{nG`9_<+N54TVPwK0Tv?w%oFNO_(UHYb7pJMts@QA&+=bh}M0vwc_? zS*cy%C)VNR#~P{z8V$f~fU2si9x~_=p@`4D-|N+#^VrECxR3AZe4vCj&WP zJcIe8b&bQWLj0Mr3?@XlB>Fa_g=;CMbQ|7nYaeL^3fD~Izz2k$%(NMhsrOkJol~SY z2+FS4{kt1y;pY@)2sCd z<)ndR7bI92$4EneT;WPhj6g4pK5x+ezQ2tOJl_nAeA$auP~#C;nE?Q@tHnK0YUEvu zCjGetgHlTJ!Xj{3mO{YQ(lxL<+HhNX9@+)9DB(c5N;C9E;tdHCLQ4ypkzPx9&0sY^ zm!!C;168-Jm|0_Ln`o~xmR7#1w`r14TG0$r!c-Qa-jKc!u%T*=wf<}=1lY}_I+pkN zHst#J#;sbVrt?Vpju@ZS9s>c0Hb{NZnN3ESXEDVTrqifCBXU$iT&>SN>%*+R z(NNE%dc|}n_>e-(wju9@%p)K>g_C2$t}ucpEHKB~_k{88N*;f@{3_ogq~-G4v_m?~ zwi^tKj%s=>@V8A;mc6t*YHnMPL4;iJWBmMS5nmNgo}e1pE?XJ@9m*|Xi=>JTykCOz zl`cv#4h@K3g^Oz+)hc;VvTjHRZZA2nXsT`t0Z%i5(J4YF+yLcs_bo=YZ23Uz7ZFA) z2J3vG3VQ8gEntLtam_BGaDJkyX_gl7n|?aBAeAu2=eByE!>0}wYy54EM4M{I0&3OP z9DdnQ(z%K@(G%(btcC4sWO6)uglSKi`Ei!Lk@{CJR`IQzSI{l>1zdNL;+l4R_(IWv z(dY4&sHNRs7T2dUhW!pL>9bzI%GJT5kz{%oo43r^FG+IJngFn>$jO-)?zD^k;vK3W z*chY;P4)lod7RC^z!F)WrXKc%tm04Pa_CTNJk<+SyJ!Q~pW?TQo$;+w)1kyR z_xxp@ZJ`y=e3-`@Ns6>dB*9r3whc2aozgpvi|(0A)QX=tMjXLS{s?)FFvvS%g)bsl zDw<#6=sY%2t8YewM?Fbae1oj(R$**W-Q1^)|9mjwj%*^6Fb^$N$X9g{0>8!*QpUI= zFfg3%G}gF*TQfU*!gDDs*zaBK^wm@A!Yq>vU#F6*7SC3b+XzWZIY4(lh)j69d82g4 z`d#`WI(7Nx?DFub>J_d0_$9qdkQ@=sgDQXr%L*v`{dGPR!r{zGz-00x%)>RVQ9Y!v z!NpOn>RoZ*S-C+~;fst=u)nchq3JvzL4`Ofa^=v*y;V^uF ziPtmruLAfM_M75JFL5@_EjQ|uoYr)2t2V5UCzyJ9cSl??LQaJLybKICCj@)f-I!MR! zHxABw#yd*fT;6uvqWk3Pa$VK9lBsp1E2mEq(1A#6U2h+~jSEuY9M?qj6&Bu7?Z;3p>bBi+|+N+Ii&%y}(*l!5c5xc^ZrU@H17*FMRI2l937yy`^CPrOVx>x^|X& zY|8(i#~y5?IrilBGZfu+tZo zhxI%|&S!DHm!nUxoG97w{jWJ&{p&!&_yrl8y}mjPO~KYSH`}@&v454t^L*J}772Yl zq4f9%)dM}f<_0y7t7kDW>$WeE;SUA`stKjAfOO6R;Y#bWz+Mz?jmC&q^81OEE}kdH zL5+1~zVi$^b*w(RRA;1ASJk~^mj)%!KC}k0ZwZ*hWUMO9W?;n**XAl~{wgYygtA@za}BD8RpGwf?QE=6=(o0%$6&8y9ZSKrpxiYpU+MC^1YwQ31&bNgb4Y3B@dOmI?hi2v=$M)c{5ZJ zG#QsW8r$qNnnO-UF>Z8_yF1;o(@QFhyke3qkztI-nWrJNTzs0G`f2*i0UpIv0|*?? zK(QK(4m2_oHD>*+HdMIEfe(Mbh*KHSPkyq(q7R4mcRg?vDu#bD-9Yzl&fy|@;$1xm z6tf3eS8Yz#=CE|vsH$l{eyfZ+hqa;MoIOM??FqVgK2huVH~}Gn?-M)xDoipKY%CRkF*HfO!CbXd%s^1 zedOO?!Sg>{UV)KV^F*Y!tB}s)%qVsub!_lv&0(5OY8~2@e~Cz$9YxOk+8VFcTpK{J zh(;=kZyV<29F>EIa^Qs4I8Rls`(3=60RXHahTW*Ts?$J#6k6?9=xnbM+z8+_#^I_3 z@jnGA9{0VFjy1tSAXZo75nq_1aQxU+zW|*2&1{SLRKXSZxQ3auVNy*?iR+oNcw2rm z$`sD;`WJ;5^w7|)=EkrVoCTj)3vGkMKPNhM*Qz!nnBQ-%@a}rF41N|UTVMuCWsLnw z+{~=r-HZ|!W4fZ7*!9hYjbqewkDa&mkRGp){~ahfOf*7xuq-BovmqT3wK^HF*OV(;o- zKIT2HDqO3&_~8gJXjQD(V*rqPIbMGf6il3bA?~x!<4sr*LxkO7MA`6s?u7^{OUz!n zIaUxV4t>*`M0o*+`AURa?!3AkopVhGxcDeGL8*<32Xreg=JxJ|%G03^QONWj5AQUV zZ{G&Sip+=l&Wez~ud3@{l*1%c!L&+u6jOoAf4%_nJ}X9=QAqSZ9!aQ_Zw{EsPo%{u zmn&SNL4AT|<1}K(wb3-^W{=uC{K`pL8X3`=o(V|iA%rgm3A-*7A-o&32{ISX(59Hi zQ=(lS0^L@00RTShv6g}~xP#z+rVv7YrJj&H&&#?Z5Ck0sVl+OCT7eh-X$lHwAs^4{}rQ!Yr z2aO)rYqrP}^Af~4qh{W=LDVZwIcu^lCpLQ$rN@rT>8NRK0)YE@7AX;a0AV+Ud2~G0 zn~hs9P?9V43lITsVe2iUpyC~BX;rH<61v!6aoHr#$gIH&N^6zmM7OiyFG3{oS2tIJ zx~vAL*dH&K7Tr_>rLF*CH*ewz-AY;UH^E8zRWH)_(lPJ1w3b;T&Y;zbGKYrpR0U~% z_)+7=VBLunK(eLz&dk-Za1s77?<3=OI?`px-=r1Q?aV-Bq<9Y(Vv0* z{b7|!oY34SD;Gf-l9M7kO*8H4z|#@wqI;dIeN?MGF0uj$eD=Q7#@DQ0Y!CStQLblz zeaQ%h<>ai6tGII7S(nt-DG_#&@!z|~`#6trw*;BN=Tgl!!dgrx!s&CxT+nJ*QNf3M z1$(QjU2%p$^c=!y${C%2$yvdvo^roF_s z-_2+vgMDfXJHU{F7kyzcfa5X@?D>UbNe{{whhKdZ@}40UQ6MF|@??2? z`hCNHJhagsT?*X}5%^uUj9cdT>t?Yb6p+F+ZW$kZD5!jI!pmsjJkt}K!Fe730X8J& zq3CN8H~KHqnV|EKk=IqH4eokP^->b`h fbAZJE3I4xut>mRZ!TyT_^q=1IpDfM#U$g%QM?Y5? From e0bed07b3a1d7e452a8fc625c515a8d0d361063f Mon Sep 17 00:00:00 2001 From: Yavuz Date: Fri, 30 Jul 2021 16:52:37 +0300 Subject: [PATCH 6/9] The recommendations have been updated. --- datasets/ttc4900/dataset_infos.json | 2 +- datasets/ttc4900/ttc4900.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/datasets/ttc4900/dataset_infos.json b/datasets/ttc4900/dataset_infos.json index 1fb086f636c..263f6f370e5 100644 --- a/datasets/ttc4900/dataset_infos.json +++ b/datasets/ttc4900/dataset_infos.json @@ -1 +1 @@ -{"ttc4900": {"description": "The data set is taken from kemik group\nhttp://www.kemik.yildiz.edu.tr/\nThe data are pre-processed for the text categorization, collocations are found, character set is corrected, and so forth.\nWe named TTC4900 by mimicking the name convention of TTC 3600 dataset shared by the study http://journals.sagepub.com/doi/abs/10.1177/0165551515620551\n", "citation": "", "homepage": "https://www.kaggle.com/savasy/ttc4900", "license": "CC0: Public Domain", "features": {"category": {"num_classes": 7, "names": ["siyaset", "dunya", "ekonomi", "kultur", "saglik", "spor", "teknoloji"], "names_file": null, "id": null, "_type": "ClassLabel"}, "text": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "text-classification", "text_column": "text", "label_column": "category", "labels": ["dunya", "ekonomi", "kultur", "saglik", "siyaset", "spor", "teknoloji"]}], "builder_name": "tt_c4900", "config_name": "ttc4900", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 10640831, "num_examples": 4900, "dataset_name": "tt_c4900"}}, "download_checksums": {"https://raw.githubusercontent.com/savasy/TurkishTextClassification/master/7allV03.csv": {"num_bytes": 10627541, "checksum": "e17b79e89a3679ed77b3d5fd6d855fca43e9986a714cd4927c646c2be692c23e"}}, "download_size": 10627541, "post_processing_size": null, "dataset_size": 10640831, "size_in_bytes": 21268372}} \ No newline at end of file +{"ttc4900": {"description": "The data set is taken from kemik group\nhttp://www.kemik.yildiz.edu.tr/\nThe data are pre-processed for the text categorization, collocations are found, character set is corrected, and so forth.\nWe named TTC4900 by mimicking the name convention of TTC 3600 dataset shared by the study http://journals.sagepub.com/doi/abs/10.1177/0165551515620551\n\nIf you use the dataset in a paper, please refer https://www.kaggle.com/savasy/ttc4900 as footnote and cite one of the papers as follows:\n\n- A Comparison of Different Approaches to Document Representation in Turkish Language, SDU Journal of Natural and Applied Science, Vol 22, Issue 2, 2018\n- A comparative analysis of text classification for Turkish language, Pamukkale University Journal of Engineering Science Volume 25 Issue 5, 2018\n- A Knowledge-poor Approach to Turkish Text Categorization with a Comparative Analysis, Proceedings of CICLING 2014, Springer LNCS, Nepal, 2014.\n", "citation": "@article{doi:10.5505/pajes.2018.15931,\nauthor = {Y\u0131ld\u0131r\u0131m, Sava\u015f and Y\u0131ld\u0131z, Tu\u011fba},\ntitle = {A comparative analysis of text classification for Turkish language},\njournal = {Pamukkale Univ Muh Bilim Derg},\nvolume = {24},\nnumber = {5},\npages = {879-886},\nyear = {2018},\ndoi = {10.5505/pajes.2018.15931},\nnote ={doi: 10.5505/pajes.2018.15931},\n\nURL = {https://dx.doi.org/10.5505/pajes.2018.15931},\neprint = {https://dx.doi.org/10.5505/pajes.2018.15931}\n}\n", "homepage": "https://www.kaggle.com/savasy/ttc4900", "license": "CC0: Public Domain", "features": {"category": {"num_classes": 7, "names": ["siyaset", "dunya", "ekonomi", "kultur", "saglik", "spor", "teknoloji"], "names_file": null, "id": null, "_type": "ClassLabel"}, "text": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "text-classification", "text_column": "text", "label_column": "category", "labels": ["dunya", "ekonomi", "kultur", "saglik", "siyaset", "spor", "teknoloji"]}], "builder_name": "ttc4900", "config_name": "ttc4900", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 10640831, "num_examples": 4900, "dataset_name": "ttc4900"}}, "download_checksums": {"https://raw.githubusercontent.com/savasy/TurkishTextClassification/master/7allV03.csv": {"num_bytes": 10627541, "checksum": "e17b79e89a3679ed77b3d5fd6d855fca43e9986a714cd4927c646c2be692c23e"}}, "download_size": 10627541, "post_processing_size": null, "dataset_size": 10640831, "size_in_bytes": 21268372}} \ No newline at end of file diff --git a/datasets/ttc4900/ttc4900.py b/datasets/ttc4900/ttc4900.py index b9b2e9578e3..3cdd62d8b5e 100644 --- a/datasets/ttc4900/ttc4900.py +++ b/datasets/ttc4900/ttc4900.py @@ -58,7 +58,7 @@ _LICENSE = "CC0: Public Domain" _HOMEPAGE = "https://www.kaggle.com/savasy/ttc4900" -_DOWNLOAD_URL = "https://raw.githubusercontent.com/savasy/TurkishTextClassification/master/" +_DOWNLOAD_URL = "https://raw.githubusercontent.com/savasy/TurkishTextClassification/master" _FILENAME = "7allV03.csv" @@ -109,7 +109,7 @@ def _split_generators(self, dl_manager): """Returns SplitGenerators.""" urls_to_download = { - "train": os.path.join(_DOWNLOAD_URL, _FILENAME), + "train": _DOWNLOAD_URL + "/" + _FILENAME, } downloaded_files = dl_manager.download(urls_to_download) return [ From 9e84d3d0fc8fdf67b31aa4b64ebf1dc9adf3c21e Mon Sep 17 00:00:00 2001 From: Yavuz Date: Fri, 30 Jul 2021 16:52:56 +0300 Subject: [PATCH 7/9] updated dummy data --- datasets/dummy/ttc4900/1.0.0/dummy_data.zip | Bin 6749 -> 0 bytes .../ttc4900/dummy/ttc4900/1.0.0/dummy_data.zip | Bin 0 -> 13855 bytes 2 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 datasets/dummy/ttc4900/1.0.0/dummy_data.zip create mode 100644 datasets/ttc4900/dummy/ttc4900/1.0.0/dummy_data.zip diff --git a/datasets/dummy/ttc4900/1.0.0/dummy_data.zip b/datasets/dummy/ttc4900/1.0.0/dummy_data.zip deleted file mode 100644 index 63b2610ca4bfdb111250927b6699b799b6bf00f3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6749 zcmZ{pRZtv^l7(>z?moB$4KhG*2oM4cu7eC7+&y@(;BEtiAi*_A2$C?k26q_T9R`Q} z?@HC)s@?9Y?tVDu_4LE1p@ND=iuA9N`S`5$pT~a>oPV~Jmy?s9nU%SxIiJRV3$gxF zDBWp*?nH%zbnp%diTvM%BIb^cdi;XCmLA?KVC4)NW$)!fQnwIO9WVW+;wiWoVLrCL zs_z>(XWgV)E6LolYp+QP#e}kz&C4$(Gy%hvrsFwOWd=Bm{%DDQgLvomZQP;R&h6S| zmzo`(Hng%!C%Ce0x<=lt1gPg?_F(_Tr9fMMMZQdX#>7Rp;QnCFtfCNaO**3UZC4<8 zty5UHF-}E1>iQwGqE>ze_vs>ht*cL}c}DE3hA9+nWd;gHxE>F|{7O{Lm zTnVP!?TN4Rtzvt&=`Q;8mY!cnC01eg)U5B+Y>^lKa6o-{8@6WtBpg?`Uvyp?HZa2OmU7_{6&U+^WGz#XQwCcSQlh3Js*F_Y7( zUF(`YfA}t{x8KkHN;tlit5r|BeOXP z{k0_7r%{4A=G8B`$XjzBX2RiM_A|^grzFZll^=JY7`EWwf{Gn#vGtQPRoEDXK#gcQ zGr6LiGoCx*?dp=KJo8-3c)eQ6`{ugW?SYZgUBP#M(7#k`rl_QUJz~lzJuoKTXwM^@uDVKiny4 zT5bW&sOGwlH0>XTXmb4QRB3#&Na@!Poe}iWs+K=Og z{xC}JEVqO;cOiQqI*L2rhK}1BCAemEp7Q2^8AVGw;uoCMI|wu9jATWhBHJ?^P%!J3 z$5Hy`GWm$g9fOt=rarUoV0@CdSNPq$xR?2$b+sCWra7zMyW^q`M+Pb;TdTW8-d zcWGIzsAl0{1bsYbQoxW>E|Cgj5S}=wwTrBJIIH`s1&k;q`>_imEN7_ZAmt4cpJ#B4 zg(&n)iEOvL!o0nujhoP@Q=?{|+`y4++BkLXU!+Utpf@)-Q@Z2YVvL+C7!r{1@gp5m zd0DQWRZ4Vsc%H1MLk_8vz*U+d7VSeyKI4VxJ2U83{M<6Iks`MkdN>(^ zabTQO+=AM%Mn!9A>Rcw0zOGzJMQdb0``CxnAw=$N3xJZH)w)wDb>7qDldmgTMik@& zK_fLOsZD?yCf@ zRPrmk4_Kmtn<^>~&WjLP$fl0E^Cwf#Dae=NWShP{uoqRR-pKy>8T(IDj-~heiDPL~ z^C*m~l!PF&13XuF3Rx>2vMx*7%^GrRPMBxS-cqfRIhvZV=2X9Vh}oVNWeJ=ED#l4uip^Zr zVT}?PGj!>CP<9NZu}wdwVLc^w5*SqXm91jbVp3 zL{$-Bke6VgNgLyVlRA^#ywSX5Cc;rjDL(H*|F9DN3LY&9{R531dCOo9J-c~%D0nF; zV>9_kuW2`Hk&_+r#`bH)O?gJ+xxdoaE-9;6X{qgjqObPbdNn3rltIZo3yomf#Fy^lR_90I36e54kr?s!FXC;F6Jy8yT&lZ@FB}y z472zUdaJRKy?s4P5Po#F8i`d(^nh9$-A(wC=3@qbt-dbGsphT}7Wfd?p{Bb=A23>$2;rX+7%~~xNwT>dAyqxbcv`+tg z^~Gr_9l04PGMg&e+}^>Tkk#z=u8H3g{>fHq2mdUE5jpP&8n|Nq2e5IQR z`D$NKfvrnAWejMil5;BMZr=qxn&#)kEqs@kB<|l9qugQ3am}PF#-`gt*e8`f{xP5W zkeICrz0=4|Vv%?~9Jc@1)pFHn4|kNR9d=RVvf%Z$a;M~*w& zEyOFwet`p2ksOueZ-9|dz0fEGt~U!#V;m<5RySlDXZF{7XXKJ?W^|q!VM<{uVh67` z6Hh2o6(%v_2Pm1X?5MV?1Qh3=d1!N3SgF;1i#L%Shk--;uJ-d6-*F%Y$+r0L*DfkhiHK~^+P zZ8z|i);V(b>mF2<%Nf)EO>70(;pU)9bsX=4+15F~1y9cKd_^?<#hVW~rs_B*3l1UK z_06x_kvJ{vUAs8$bvJ(-j7_IXhA#yhe5dndEcfQ zkIA+pYi;$DQ1ER&VLfnxxo4*FYum{+*D9p9S0lrnhsAjmZb20z;?ZC2hR93ML&1fl{jy6WV0<149By=%9l#@Zxzu)>?w1eaLmT<^RjrXYRUkA&8 zG>c`n4F^#{G^F&3OmjfReEkK@!+I0?z5(R*%vqLY*waJt3Z=PO_OXbnzO$V^%&pz`bgXORT5oZD&qShkyhe8BHoE6yq7c6c;shv{G>gY6%sR0;=EF2P zTEq>XGI(0Meq|jXExsu+k-r@B2cs8LO#|4RGn?E-5^BD=2UU(?Lq)D^fihDOgVT0p z5mrlK4SOW*nSSqTPg26zwYRHf;d#c2b!Ne}Iu{P!C`^a!zK!IhrxF6le9c}X6jon9 zc3|`PVutvby8bl@HyY@W>Hq8Pz`MF&YYh*#O$mHYkl#~DwM2|shDreN_-Jq4sLf(n zAu<5GZf`Ml6MX}#{&HXk^$J9WfcGURkUxpNCuXm$wl=C63ZXe6?=gpjHShohzLVJv z{?00Gb=#lq0-E~tePOP*vsG++9#s`=5t7ZRqC3g%x`1$yrpeohD|fz{*ZZ^d+7dTK zn<1vcSw5FdTyk3@^?cfNEvmlO{&$o#>$nyq^$MDCue)1wrPXUX^QuWf7kZ&vrrF~o zHAZIc2Tyj!Knvj0-9GT$RI>aIKVHJow^ktTLdzL0euc^7r88$Y7E;BeU(S?mTE*K& z7d5n~*E|kEzz49hlMsJ-$7CrR;;96Wz)yd+wZ3rOX)LiSoYj-Q)1eLqO&qt{5QF(Z zib~p&QoYZUW%e@wme@@cPuxi3t-eyj#!8*rF}9~PK zzh(70K8HdEX!^e|WCy9F>9S{)o8%?^JP;W3Q}Bw^zDW`$sJ>yF@xd6}e{?J^99HXD zrI^CJRC4EIOk-1u;gkRwzC0FV!T{Dt`dcKnwX1=YlGuZ<6FiIuogo-l?6z9KF+)&L zjFOaN*0jz)Y1!PQS2Rl~LYtc3+%yKftQk}yR1}*xSqbKV9ZGSfx%W!qnS)!|aWtxD z5}i^59-~23hOs@&JLHTQ-EarbGa93C=^(6ZiA!g;8NdXy{6~z0AQ(P)rtcS}MUrKI zkc&{q)3i$b?sALO4%aC|p#&FhlOks_h1_!0aw{-ae|fa<`<5Vj)p}xm&E`?Ms8sTw zeuqNomWYmlDjP)dyjAYek@)tl?bjm$&cQw;XUn7Sp1C))rGiKZ0f|6kIto6VjdJ~u z!MTDu!9WQQwq|K|iO%@!)=6`{IU%kxVy0J_j0sf-n!D(!r1oh(IA%?Q%j}fXarmc} zi}gVBZO;oAUn6itoeE0wXP~^o!0Rd0>UicfV|1Zhsad~kxeWDzNxmkMlz^3)A4sS_ zo2HTPru3%ve8i8$|754z^GSTt3UoQ|9CpkFalZ^Nw^-Ky@DrDHbZE=_;Os`5pyN5x z1!=moVr`oea|0z579TZwRnQa*kZ_9c%KT<+vAy5Knj5$38AFF`AJaG-d9KH*!Jvl7 zr@Pu2=4=>1?*YLP?Z#9Fs(URe=PzO5d#qfL;#B`V6H&3a{K?NN__9JG&+6LtFtWKQ z$J3jS*A0o@IqOD6l|O;By5`txUgP01znY8Ic3NK0Ak%uoWa+qprpa7xFq($h!WKhv znoiw`kK)m>JgKA0=H-T&N9p{M6DJ;GO_M_fg9H;kRC)=G_wdh8!T3+WI+Z(&QS6it z_&nFlJkp=81Y5`wzsDTa?Ut^wB|Oz*Y;P7vp#5I5l6gsXI2w2=xZrodyQBERRMp0F zImy3~BP8u9Bdlh#G!}GRcADxhRS0b<_Hf?asnSmsa0O+E1|G*hH~%tfu3NolBhThE zQ~Z`|twwyl{^*zGbt5{(pL@BYYzMpI_Q{c{`wVy1R+k!k_0ddmu28L?6oqD0t&3Z1 z{O;lO#o3agdeZ7V(}`8EfdQ|c{60vov{>*i?zZ7F7VW{qkh%F%3 zd-^t{$z+@^D%tm?6SyjH$5D#MG${p%LaWcyW*;Cu@ z8jcU+`Xxm_p9FcPw4e0~mSumd;;4re#`8k6!J4Y{BaEjIHd~0}Kp8uBIq$iBcP<2A+kNak-p z83L5kX)a5b9r&f*LYH?$)9dB_ZE{d~0iky7s@i->@Agw1#JWNtvJ7$65ID>123^P~ z0SO%xjDG7)E{=C1oEy&|`Qpw*{JY9N_iwDgm;5)PxiZuQB9QoqbX5+>{&!pvjF+%& zhq6G>lO;0et+#B`Up3=7EINq~stZ2$JlT4+#K|P;emS?M_0HqsPt?7L!E5=aTqfGz z0c?;9jB4qCu$H^{nYYU}HGp$_9Wi|~pP9pVcOpQH&71}WN4da>#}x&|rd@=4P=&0& z^B`^CWWEsXVRnRJf@O!+W_KktqBikJ%F_+tt(TlgN6~=d9q>`VX~sC|EDO(oWHYuW!pM{MLR17@j3uvotH#v0-4WcPQKF&S3Yy6OHnXqATdW zi2EyN6=cD`=iNkg@mql2o;FayD@h>0)FfqMW6%5(_kq@>0#5#OgXf&} zvXSu3t(GF6PD?6=m^anv)uAnCavG|{91bPT zfH^LKIm5UnKT&VVeBeDg7SAFeDdyez{Y?rCwC|AFLsxH2NiXv@-4}dP2g-eIDoh4? zE$hd8k2i5bcYHGpJ7X6mG7GT;`jSuXV$&g+(s^I5MTD)cb(;8&CG9M`Pt}#Eshqr* zrArj3<$*O!pe5?ic%#+(-~sk->NNy1=%0k~DrNYBi33QpUCaSf{Q7*<lL%G^GcqGR~vtOqZluJ%lk&E|3(@Alp>`` zh0NG9xMeZ}cyFt$SWrnJog-w$nNc$(4IH164h-+0X%p^JmtPr0@TY3;a1~v2 zmQC!QFCaJb9Op}Oi1g9x$If>xG}MhPbEeG#iSbM&{dPAtp&fj=b5b7$wVJ98^+30t z0#ly}62NTCbFZ(v)x51Jvsq{`l~<#zxs_YI_rfNc8k^*GSPSzW;0Q%3Dm1;|HAae# zW{SEy{Li{r3lY*mgU@NE-$u~|1t82*E6ze`$R(k?^L_=KP6*6hiKz^tAnMusyT(ad z1OP!+IflOq*7i-9d+QgWRy2}7u>H~S2g|jiKopMj0qv)bGJLDkDCfqoC+2TjP7Rsi zBZM-zqEKEGO9hGNsXhis$}ju26hW?ruEV{>STdtxaH&}P=P87*{kny~OTk55qYiB67VRccX0&v z$K~EA-*X&_8Sl3F$uo`dSKS diff --git a/datasets/ttc4900/dummy/ttc4900/1.0.0/dummy_data.zip b/datasets/ttc4900/dummy/ttc4900/1.0.0/dummy_data.zip new file mode 100644 index 0000000000000000000000000000000000000000..01768b59f5ca86b59a0f944faadd4b1ac42a7b8d GIT binary patch literal 13855 zcmb8WQ>-vBw`jX;+qP}%TefZ6wr$(CZQHhO{|yA0004mYe}dQztgTfUnCXn1T>qPi@E?}Q{~G@v=8jKSDz-#o z$*vwEakMH>l0@Tp-GBoD5n!GB!7wl@poL$7nba94we^IuWTCS0&$#czW{>IV0U*J~ z%4PNir702c?dj>O?+BsvN&|jsUg>3Dsb91Mq!BABKkA$)c2>z<Wo7Al zSRdcoqZY-7$PT>0Mo7u`?}(duWkEHUwMTiR6GmxxjPXMy#zT$*-pCT1KEoe2a$q&El4v6F1e*&v}y>P?@ z8)_~}E)M0Oo21z$P9KxXIztk8kV?p*hl;2`_D?@jF}M#CZFOsw;W{eG4?x8ivW*ig zet$IKTQZL)ou5}uLJweejmvLykIe+HczGN6ph{zRdXtx5XOU- zc_CO=hZk9Mk9(^mdSh#1ffP0Wt#d-q-{Ti(B5*e(;)`LB0Fv4#3(GCy4}p#$eWv%{ zkCI6Fhbs|=51|*`8)IHOMlvK9=gf|4r2h6~QgI5hQ|hLQh06ygdsFF!I!1ipzQpVW zUGp0e4{7>^%)>deK<8mw*nXNfCIbv8Ay~~>ai&xnv zKcLdp)su6oX>&kK93)oHNvweb<^bHL_5KdqW};WQEv%^>VZ1e3h?9Y5WK{{`K^T(= zs1&qoz2?*gwXd(OkClLhNc-5jlQal)Hh>+Uy+x>B#}gr#Y6%Q&1)#Fu@KU^UheNL2 zD;FNt#2A(TCOvHM43M7(FI5oV?Bk5IuO2u{5)w&>cTLKplVuOQaYxgU7sN@VAKIWp z8%(>L8t$E!Al@`jzb~i3r08`06{WruSP^ZP&xXp;6Up31&P@Zy^hlQF zqpdOl$pmZrm(4s51C$t*SD3;-b#N6SqjSAn1b-UOaHjvcMzK`=2JPBVmm~90EB*NjG?+ z`W@GUy`5S*9+F}w)k^rIhWoBPUI$q)^~LH(@MwI->?_NAun_FqW;qzjFeEcnO0!IL zNThhc7}TnG5>3y|Bm;COhnNBf^0O51XKhx#q<%|v6UcCDxuZYD?O7I4pl7ktFJ zz`x86f^Fbl1rLJC0&I)*Duno~V2(z=*x|DJ#3(4oSly2PRUubI^4kegL@!`2%9B&? zI(b=2fj~DGqlE`-qA^w-72zXb;qfn_*scDS<36fNP?~h3mC#sZ_6~a(D(7K3v@Z~t zx6Rnl3g#PdDh_eXgti(?m}k6izI^p_B7ygb1!L%RyLt{S2$%nCeuDVc{P0uM#PjwdAv8!CA^&PZF{0_ z<#a(t<*TbiwcxbXZga=*nd9^gs^ALQTWp;z8mexF6r2=i1$$BK`efXo-Mfs&`}k{i zdRprm_75u=ASk8|hh2T)dR{}j88yweW|1x|S&?ARoV3Q8@M4}0=YOBtbN5QJ1kR77 zf!u|V2QF|6_{$>?NeI$-=?JVW!RWUntwEbJ^LY2r`b`7YNLfM_)uR zo_ytxwlCYsS1gsoP&Ow}B9syA5L>AU%mNDJLwwN4);gco$QIRa4@MLVP&*x|-sRHF6AZvt{*<~iqHzinP+y@F2 zaERR#YD%yx0_T(L|6a-{`0ZL>?w3u6M%9jEt0KtLxNAXLNi?KO$jbJHTiP5HeI`^%Gw!28`T>&P4DHk+$(US@wxuyx6uM@5XE{XBT7+UqaXtKnQ?Tv3^~*@& z@VS;K>>r;IFAD>D>_f^K8icko4k=xQH1Zo!HHaL|r5~jrO%EE;)frVTdW9Ak(gDwU z5pX#b0o{Bpp0xo|oD>QNXH?0^S9o^tBqs39xn|S|5s7sxZ5qywjUThMo4J>kw*a-H z^DP73-J9j9o1>nfId#L+qdYOP5FcXr!K8z(1Ph=YVTDY?YZP_E6+qVZ9rItQL-9Is zYRMhJ&a@_`^2O;Rd0^Dwl_vMbGGEv97r^g5(s`JS%9W-&;0+<;gZTxr+hNXv*7f2I zDMw5B57py8v3?}ZR5{m0BHKyeRT=#PSCN_O&TOAN$bK`}*MY0IV~bBcjAyD#=ZKYv zPaF{D)KZA!be4>GozPD!=E$0^=iOS@kuu=~VW8(36Ac{XZ)s!qw^QFvgZYKi;+bfU z_)e;fvFyIR$*G@6MK@=u@Wr6ND-SeTz1CTYezz8FWf6LoJO4qA)==<;@GAR@mmHSl zeHL0Ge$;gSu7@u$LRX;txKv)!Xi;vn%5{Y}JBEnyDs#_>Xxz(iu*`>|f2 zP0ItS0)}n!h7=5r?Mpyjs`1GWLJ#HEiy~lHenT`Ybh(Pawl80?dy&E78-=nBF06z( ze2rn0I?qG6s~TQ>k%1SwEo%jxPOBbr@Ew|gY_L0jlK>C{&Y{%RTTG&bP9l02>~-+Z zd74haV^BpG86pQ~e=4#x)Nx6v^pZk>{g7x%I|7F2MkV&a(xSP;0LGYQO9}Q@ z)254;hJ(NLL9KSl9vNT-h4Z|a5|0={T7G_W)4sdk&;WoR2 z@_9fQ5}KUFgAd_r2;}>{-2pS`dN?n$)J?fkmSaV;ZU)qfC}wt}ihWVx-UEj!$|N%P z;yA$Iu8;~IZXGO(F;uUQ&p>6k_$0e%W>Rq?Z02FPYuPQJg-ETw2ve`QwlgSrT1#zp z9^^(;3=Nf1{SJH627&yBC8qF=)%U=CVjwl#vZpxKdt-7w1?lg!h?;#0$*ML^1YB{a z57b9Vz(S?AIN2y_L6PCOeC;ZWp(`lxp6p^A$496-G4bo^Zy31Z$4wSHK7p!jaW#2O zA|XKTK+5PTMpk;d0<1?JfqWO}gK_3F@&gagvJ@vw-A&FU0#jIsHzXG<>=cK8WJKyr zp8(AD=s~G#sTQuuC?wf79bs@C7kpCP__V!sUGZse!#2s&tl@Dp820r1hxE`c+7b8? zaW^?CR3h+bIo`iMKkvne5Z-tI*+aQ&TF*CmFCh|Uj#x)h-T!df_M zj0|NNmY`-CqsoB($TsM^yOqk%EN$*Xji%;EJ1q^h4QJqn1$eX3hfzj*Xi)Ypb>!zQ&c9_ zxEN2+n0D~dtLYCl)Y+d!th)MIdc)OtQg8(#p@6vk8lHy> zu13O%SYwh5!W@p}Ul=@4h}vDt&NT84()b{B3d|ki5=vd3Na9|&1|2S%*T!kZ|!(voIfrp+Vv`M6K_ePYyAsN8) z_i^a9!JuQZZ29O?H=45`WU28HCIr>#w5|>H4^;C4W3k}x!ccrRH0?=NebhPkR>RGkW##l5Cv)b(THXaqh^0rSJOz50_x`Wc!f~H)JzP7oHT&Avghrd~B!I{Y0wIW(?K!_RHqslQ};u@k4l4tMij!E}R%w zG{4`r%`_gg>0sDjqGo(yZ25zCvQZ%%{DjZT>^fzx#cJ#`#MeQi9p`c za(v>%4G#wAl?XhWga@h8Ydg$7+`I}`>u|`x-kA9@~sVm*YZHPYKoxcyZ7%fn_!~ZW<^6-~-<^9}Fv4=fdld71m4Mr`Dwi7Szmi zKvdUJi1HAPeX>cjd6sJb_8C?S#tULQ2)x{)les?`fDw{jMJIJiNu6%LCsWXo!dUio zviHWNIXi()-3vv(ucA;j=P5CM1g1Z@nO|H-6{~;}223FKMdQF*A<4|#=8%W2l|i!` z)ens*%2vIwC zttzb2344;NG-<&}c(pYBxbrPV*Ef*z0m$i4?LuCQ z`y$tgX1ch>!#S#CCEBF2kYwS6-idu3!zyIwDHW=%Hmu)BRhQb3f%>cT zY)qljk3|V&TvF@iRD6${+U3%n+gi?g1-wSRjupsNEGncZe!nMqJGG4A4^NX_FVZ6! zr~a5>WP1ODTu2ufb{{Po12ldABZj7?JR>o z|Nh|dG#uO%^HIT!6K`VYmEo{DH49h%)mknDPazheT+DO1Mgr?6GmQDP#n|1%Bb|xk z!KVhO1w(KFh>C)0XW~lAu)nM}AUY;s0k^C>iR2NS!*e!BlJr``zNAYOP?qnP?PZ70 zp4bJus=Sp-`iI=ua#mzTyXw(^gYTMkt#TA|sWlG8L^59w%ua+<2i z8C0@A(JYJL8}Y=)qYBKP5YKcfRv%P^aDRwyD+7`_1}9Nv99E!F)lT^Wc~6_)BR{CB zo*1Ay=khgid&G~iw5H<TYj=DVT!F6XyQp1E9cRaQJHAlb1xivq~(jTvf)y4l2w2oSX+dY1RLvUtvyEaH+I zEkKhT(7adpSH4I1QObL`EK7G*i;tlY7MUAB<1UFLi0ktx1)qtKma9bN3CS7jln7#@ zts}k!xvx((Fxn=`j||=GDqVdb%YjWI|W?pkgzn6c-}^hMgt^Qg-K>%b}x?kmxzr>< zL+$R=Sf~<3CoGWb1Bw;qg!9S#zBCqjvPv(78Q4Z0!%+5m!>7g}APJQ%H8hDF?$Tb! zM+uTb^0tlpT2k~J)iPrZ#unsXDZG`<}*oLr@G8u9~jhk{DsryGX z%^m`{6XH`^PBVnHOH$!)y7_c4uq7pi!`#WmNdI#;rNlM8k4)H(03V8EhIGa$m4U%M z=`eMuMG z+te&z*zg6dlZcNf^`S*es)yOnQ|BMe<3i78?1~X^B`t>QF`AD4LO8^kk#*_hlZ*yo zbYp&2{9!YtmTlzpUz7V+ZufH@M0T~PhpMoA18vpPIk12vqBo932wO%>^=o$1VjS?J zjyM3}j%)hN`?QVhy$^V!bRc(5&Mb-jD`DIpz-`JZO{5r_Vh-hjepy*o$;9r> zu0eti+yK@wbr_&w5`O)vUZy<#70F%t|ns;8H+CrqTC z-%m?SMwFi>gk>;r8#csdmzsjBV4nOI~GW(x*^15(ZRg(?G@{0JX<={w0oQpZo0)1 zQd~yHlv70nCHhd!+b7dTtMB+|7bV`|DloB-X%(o#mK3xr;-A%hH5ns*&PP{^+Ahv^ zBbX`zk9oV6Mbe?5$;B4?Dx9h7od*;}o#-_+YJ85fJdi4azEG~A!HUC~1vCO_QJ;gj z>Ma_b4H%#L;8cV}@{D}dbCTB0a!VN4=c^yutBN#<$EZ!Ko7v%hg>k_Dv2geCVdt*{qu1o&l1Tw?LbI>(HUb!%OsbvV@Xu(g2Bmy=xl(u)gFMvoC>m?14^+&{YX^Hj_rD3xVlrr4 z=GVh?*o#`E?v|9$7T5PuE@u4cRIBIH2joL*tH2XL*A!GV6s$o{TC-I4K|>z-WG=Ozx)_L)g?LSL@|d z`@2%XiLP*CYTp-e(@Gvj3YCTRx%;T4>o)VIGhC|{<=SzJUn;US|KgislJ~{TRY?Nk zXGgI^JO|(Xt;{hN6wBMjZ1(N7;hvraj8-JwtIOsER)mM^{t4Ajj~cP42A=+^*VD<@ zknSKZO({-o|Ta1-` zWuy0*iG5NLsp5qWFmeHIR!|Y{Xpu6rIjJEjZvCu`@C?%g=u*Ks$EF;F@cQf z+G_4{${NNESaafEL-s)|v=lIbvEv=9%vXb5dBVmEB0MsszLN^@3yryf#c!V#oc zg)DJxa;^HNWmY1PDP^Df(x{B^nfAC16tmg^SOfA7_0whCRii|^g}H)CQB8ICmfrpX(gYjv1A6wC(oJlITg%8exQQK-2tTZXegjjt%^Lf zdHAlr&T$@ddbmaAlT2+qBf$t?3Nm>JaYe(b^}wd$Bj6=?8ZGzdOYXi_NWvDxaZQPL z9N;QH06#y~d!g9tBk3xOvDM%yd*0x|9-4%O%*ZZ<2g0x{oJwnlQHEtQ- zCNB~-|NYAA`zgxBCIR#d^n!vOpq1%LXM@XKha?8&b|?&?R+7vbFw$(eX$ehMOH!gq z{I*2$^KP{!#l{65X-l=B_o|Wa3;|8}wpD{=)wDm3Y?u-S5OQU^$U3~Jz&JZ?UV_VQ zfx|5(yHpFIUy1ZdsbICL)rcBw+v>-#LxKP94S~{c^dEBe3}m=czd>Nna;=sV3G!jh zF-C_8u^_r)rEg+isSMJCMImfaBlGAvo=I&*e2)XxJ1#&n0XIqkyvdce$k>!>#=|Mr zBsyv_BQc@@thzuKyy3`G%W1V?e;m}96)d)UK7yXOcc{2-=2+%oK&?#jL}~W1MD)bL zEAf>pF4(|-@mZ}r=_#3ZuU9Fwp%r@EL19-C!!RJjE}=A~#?z3~hD~wnG?!xms%Sn| zKU$vU6v=FZQEi?AGkhVtdo;QFXyqtq-{jz@lrZWrs7WLfwMT%^sYTtxZ7K!~8P)7` zYkn5%apg`<(L;ar{vm0}EqmpgV{h&jr&Z5J3`~5pPL0 z%S;yrXqm-)ffqmFN=fgJ)l3$>tY>D3FmVdEl^^Ff0hdtRKQ+W#jHCIWNB3q)a3%$E z)$eQVrkx7Kkp)wvVc7(%)mUy(`e0}yr`?xOqT1o* zO1N2P$Xl1xp}Et_$|Q`1fCa(t)gv?Y4#k5{BDDZTug8%WNFgZ)0jAX9l0C@j6M;Vv z4FtASMrzx1;gSpa_+&mswPUj=$r?oQheb~5Dn8_&9UhqyAdf0&yfT4v38&15@%oeQgU&}WiO&z8iR6%j3;d}G7 zRoBj#NmcoV(^&EpAHG0&S^uwyU^ zZHzLVLw6m^`*{b{iZ4tJw%_nBI=}dw#<9Vn(c)L*?*$5(4$KI1z+j|KVsy8YE_6Y$ zoEIpY(;}V}TVcT{y)2ts|Mxjy1(*r>Vu$1C%G%*aR? z+&c$RPLr#A+w6B=Es>o6Q&E+=Hmp{rg6l;_1zgp#vny;a-$x+lr!G8>L0wXd$u!~Y zOs8Q3yB{d79_hpbF$AHbZ*SW1T&0v_d6l=@z2Piu$m0a8wQ@O%7N?L)g+ik`-u-lz zXk;UgHX70AS!{i9mKCywSBVU_u0Jfd3NMz=DT|iGz)>WTG3r6$0*NW+Q)^*HSyR<| zCqz@AEdRTgzM4B{pOvEAw4)^6r^f{rM6aUz5g4lIFg-FrKl?+N{b>tDd{!s?9`kgs z7(L>RwpbbXt?EEm0C6h3G$-`hH+rFrBl1yz|0_yjlKyJQA7=%re^^s}BIGlvwE(=7 ztPH`A7nJ6MM>|BsMYXhNdLPvFCY_6mQzCGHp1u%vtdNXYl$AC9(Oq7ejnU2*D!94= z_qC`QB7JPqPbIOw-p(JJ`7Aan{FQ(bsBKuojYzZwD@e3e=W9P6XBSEKnaR);LstV3 zEZbs^_OK@RQ#ZnMiP50ZU{aFG7}(s4X)Y6q@~`(le*s?WTQpuTkPJ19kA zL`AK{J{IYHF{nG`9_<+N54TVPwK0Tv?w%oFNO_(UHYb7pJMts@QA&+=bh}M0vwc_? zS*cy%C)VNR#~P{z8V$f~fU2si9x~_=p@`4D-|N+#^VrECxR3AZe4vCj&WP zJcIe8b&bQWLj0Mr3?@XlB>Fa_g=;CMbQ|7nYaeL^3fD~Izz2k$%(NMhsrOkJol~SY z2+FS4{kt1y;pY@)2sCd z<)ndR7bI92$4EneT;WPhj6g4pK5x+ezQ2tOJl_nAeA$auP~#C;nE?Q@tHnK0YUEvu zCjGetgHlTJ!Xj{3mO{YQ(lxL<+HhNX9@+)9DB(c5N;C9E;tdHCLQ4ypkzPx9&0sY^ zm!!C;168-Jm|0_Ln`o~xmR7#1w`r14TG0$r!c-Qa-jKc!u%T*=wf<}=1lY}_I+pkN zHst#J#;sbVrt?Vpju@ZS9s>c0Hb{NZnN3ESXEDVTrqifCBXU$iT&>SN>%*+R z(NNE%dc|}n_>e-(wju9@%p)K>g_C2$t}ucpEHKB~_k{88N*;f@{3_ogq~-G4v_m?~ zwi^tKj%s=>@V8A;mc6t*YHnMPL4;iJWBmMS5nmNgo}e1pE?XJ@9m*|Xi=>JTykCOz zl`cv#4h@K3g^Oz+)hc;VvTjHRZZA2nXsT`t0Z%i5(J4YF+yLcs_bo=YZ23Uz7ZFA) z2J3vG3VQ8gEntLtam_BGaDJkyX_gl7n|?aBAeAu2=eByE!>0}wYy54EM4M{I0&3OP z9DdnQ(z%K@(G%(btcC4sWO6)uglSKi`Ei!Lk@{CJR`IQzSI{l>1zdNL;+l4R_(IWv z(dY4&sHNRs7T2dUhW!pL>9bzI%GJT5kz{%oo43r^FG+IJngFn>$jO-)?zD^k;vK3W z*chY;P4)lod7RC^z!F)WrXKc%tm04Pa_CTNJk<+SyJ!Q~pW?TQo$;+w)1kyR z_xxp@ZJ`y=e3-`@Ns6>dB*9r3whc2aozgpvi|(0A)QX=tMjXLS{s?)FFvvS%g)bsl zDw<#6=sY%2t8YewM?Fbae1oj(R$**W-Q1^)|9mjwj%*^6Fb^$N$X9g{0>8!*QpUI= zFfg3%G}gF*TQfU*!gDDs*zaBK^wm@A!Yq>vU#F6*7SC3b+XzWZIY4(lh)j69d82g4 z`d#`WI(7Nx?DFub>J_d0_$9qdkQ@=sgDQXr%L*v`{dGPR!r{zGz-00x%)>RVQ9Y!v z!NpOn>RoZ*S-C+~;fst=u)nchq3JvzL4`Ofa^=v*y;V^uF ziPtmruLAfM_M75JFL5@_EjQ|uoYr)2t2V5UCzyJ9cSl??LQaJLybKICCj@)f-I!MR! zHxABw#yd*fT;6uvqWk3Pa$VK9lBsp1E2mEq(1A#6U2h+~jSEuY9M?qj6&Bu7?Z;3p>bBi+|+N+Ii&%y}(*l!5c5xc^ZrU@H17*FMRI2l937yy`^CPrOVx>x^|X& zY|8(i#~y5?IrilBGZfu+tZo zhxI%|&S!DHm!nUxoG97w{jWJ&{p&!&_yrl8y}mjPO~KYSH`}@&v454t^L*J}772Yl zq4f9%)dM}f<_0y7t7kDW>$WeE;SUA`stKjAfOO6R;Y#bWz+Mz?jmC&q^81OEE}kdH zL5+1~zVi$^b*w(RRA;1ASJk~^mj)%!KC}k0ZwZ*hWUMO9W?;n**XAl~{wgYygtA@za}BD8RpGwf?QE=6=(o0%$6&8y9ZSKrpxiYpU+MC^1YwQ31&bNgb4Y3B@dOmI?hi2v=$M)c{5ZJ zG#QsW8r$qNnnO-UF>Z8_yF1;o(@QFhyke3qkztI-nWrJNTzs0G`f2*i0UpIv0|*?? zK(QK(4m2_oHD>*+HdMIEfe(Mbh*KHSPkyq(q7R4mcRg?vDu#bD-9Yzl&fy|@;$1xm z6tf3eS8Yz#=CE|vsH$l{eyfZ+hqa;MoIOM??FqVgK2huVH~}Gn?-M)xDoipKY%CRkF*HfO!CbXd%s^1 zedOO?!Sg>{UV)KV^F*Y!tB}s)%qVsub!_lv&0(5OY8~2@e~Cz$9YxOk+8VFcTpK{J zh(;=kZyV<29F>EIa^Qs4I8Rls`(3=60RXHahTW*Ts?$J#6k6?9=xnbM+z8+_#^I_3 z@jnGA9{0VFjy1tSAXZo75nq_1aQxU+zW|*2&1{SLRKXSZxQ3auVNy*?iR+oNcw2rm z$`sD;`WJ;5^w7|)=EkrVoCTj)3vGkMKPNhM*Qz!nnBQ-%@a}rF41N|UTVMuCWsLnw z+{~=r-HZ|!W4fZ7*!9hYjbqewkDa&mkRGp){~ahfOf*7xuq-BovmqT3wK^HF*OV(;o- zKIT2HDqO3&_~8gJXjQD(V*rqPIbMGf6il3bA?~x!<4sr*LxkO7MA`6s?u7^{OUz!n zIaUxV4t>*`M0o*+`AURa?!3AkopVhGxcDeGL8*<32Xreg=JxJ|%G03^QONWj5AQUV zZ{G&Sip+=l&Wez~ud3@{l*1%c!L&+u6jOoAf4%_nJ}X9=QAqSZ9!aQ_Zw{EsPo%{u zmn&SNL4AT|<1}K(wb3-^W{=uC{K`pL8X3`=o(V|iA%rgm3A-*7A-o&32{ISX(59Hi zQ=(lS0^L@00RTShv6g}~xP#z+rVv7YrJj&H&&#?Z5Ck0sVl+OCT7eh-X$lHwAs^4{}rQ!Yr z2aO)rYqrP}^Af~4qh{W=LDVZwIcu^lCpLQ$rN@rT>8NRK0)YE@7AX;a0AV+Ud2~G0 zn~hs9P?9V43lITsVe2iUpyC~BX;rH<61v!6aoHr#$gIH&N^6zmM7OiyFG3{oS2tIJ zx~vAL*dH&K7Tr_>rLF*CH*ewz-AY;UH^E8zRWH)_(lPJ1w3b;T&Y;zbGKYrpR0U~% z_)+7=VBLunK(eLz&dk-Za1s77?<3=OI?`px-=r1Q?aV-Bq<9Y(Vv0* z{b7|!oY34SD;Gf-l9M7kO*8H4z|#@wqI;dIeN?MGF0uj$eD=Q7#@DQ0Y!CStQLblz zeaQ%h<>ai6tGII7S(nt-DG_#&@!z|~`#6trw*;BN=Tgl!!dgrx!s&CxT+nJ*QNf3M z1$(QjU2%p$^c=!y${C%2$yvdvo^roF_s z-_2+vgMDfXJHU{F7kyzcfa5X@?D>UbNe{{whhKdZ@}40UQ6MF|@??2? z`hCNHJhagsT?*X}5%^uUj9cdT>t?Yb6p+F+ZW$kZD5!jI!pmsjJkt}K!Fe730X8J& zq3CN8H~KHqnV|EKk=IqH4eokP^->b`h fbAZJE3I4xut>mRZ!TyT_^q=1IpDfM#U$g%QM?Y5? literal 0 HcmV?d00001 From 0dd48bc4fa79e62916707bc2263acfc6d7d96b58 Mon Sep 17 00:00:00 2001 From: Quentin Lhoest <42851186+lhoestq@users.noreply.github.com> Date: Fri, 30 Jul 2021 17:52:24 +0200 Subject: [PATCH 8/9] Update ttc4900.py --- datasets/ttc4900/ttc4900.py | 1 - 1 file changed, 1 deletion(-) diff --git a/datasets/ttc4900/ttc4900.py b/datasets/ttc4900/ttc4900.py index 3cdd62d8b5e..d5a5762c199 100644 --- a/datasets/ttc4900/ttc4900.py +++ b/datasets/ttc4900/ttc4900.py @@ -17,7 +17,6 @@ import csv -import os import datasets from datasets.tasks import TextClassification From 84e460a5ed15e76832f30406777c14afcac16424 Mon Sep 17 00:00:00 2001 From: Quentin Lhoest <42851186+lhoestq@users.noreply.github.com> Date: Fri, 30 Jul 2021 17:56:10 +0200 Subject: [PATCH 9/9] Update README.md --- datasets/ttc4900/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datasets/ttc4900/README.md b/datasets/ttc4900/README.md index ffec8dd9da8..fdf1cc52cb3 100644 --- a/datasets/ttc4900/README.md +++ b/datasets/ttc4900/README.md @@ -4,7 +4,7 @@ annotations_creators: language_creators: - found languages: -- tr-TR +- tr licenses: - unknown multilinguality: @@ -175,4 +175,4 @@ The dataset was created by [Savaş Yıldırım](https://github.com/savasy) ### Contributions -Thanks to [@yavuzKomecoglu](https://github.com/yavuzKomecoglu) for adding this dataset. \ No newline at end of file +Thanks to [@yavuzKomecoglu](https://github.com/yavuzKomecoglu) for adding this dataset.