Content translation/analytics/queries
Pages created during the month in all languages (from the central DB)
edit- All CX-created pages in Spanish: http://quarry.wmflabs.org/query/3041
select count(*) as article_count
from cx_translations
where
(translation_status = 'published' or translation_target_url is not null) AND
translation_last_updated_timestamp between 20151000000000 and 20151099999999
order by article_count;
Pages created till the end of a month in all languages (from the central DB)
edit- All CX-created pages in Spanish: http://quarry.wmflabs.org/query/3041
- CX-created pages in Spanish 2015 January-March: http://quarry.wmflabs.org/query/3042
select count(*) as article_count
from cx_translations
where
(translation_status = 'published' or translation_target_url is not null) AND
translation_last_updated_timestamp < 20151100000000
order by article_count;
Number of new translators during a month
editselect translation_started_by, count(*) as article_count
from cx_translations
where (translation_status = 'published' or translation_target_url is not null) and
translation_start_timestamp < '20151100000000'
group by translation_started_by
order by article_count;
Number of translators who first published in a given month. The query gives result for end of a month. To get new translators in a given month, substract the number the query returns for the previous month.
Translator with the highest cumulative number of translations till the end of a month
editselect
translation_started_by,
count(translation_target_title)
from
`cx_translations`
where
(translation_status = 'published' or translation_target_url is not null) and
translation_last_updated_timestamp < 20151100000000
group by translation_started_by
order by count(translation_target_title) desc limit 2;
Language with the most translators
editselect
translation_target_language,
count(distinct(translation_started_by)) as translator_count
from
`cx_translations`
where
( translation_status = 'published' or translation_target_url is not null ) and
translation_last_updated_timestamp < 20151100000000
group by translation_target_language
order by translator_count desc limit 1;
Users having X translations - all time
editselect
translation_started_by,
count(translation_id) as article_count
from
`cx_translations`
where
(translation_status = 'published' or translation_target_url is not null)
group by translation_started_by having article_count = 1;
Users having a range of X translations
editselect
translation_started_by,
count(translation_id) as article_count
from
`cx_translations`
where
(translation_status = 'published' or translation_target_url is not null)
group by translation_started_by having article_count between 5 and 20;
Users having X translations - in a range of dates
editselect
translation_started_by,
count(translation_id) as article_count
from
`cx_translations`
where
(translation_status = 'published' or translation_target_url is not null) and
translation_start_timestamp like '201511%'
group by translation_started_by having article_count = 1;
Users having a range of X translations - in a range of dates
editselect
translation_started_by,
count(translation_id) as article_count
from
`cx_translations`
where
(translation_status = 'published' or translation_target_url is not null) and
translation_start_timestamp like '201511%'
group by translation_started_by having article_count between 5 and 20;
Total number of translators in a range of dates
editSELECT
translation_started_by,
COUNT(translation_id) AS article_count
FROM
`cx_translations`
WHERE
(translation_status = 'published' OR translation_target_url IS NOT NULL) AND
translation_last_updated_timestamp BETWEEN 20160100000000 AND 20160400000000
GROUP BY
translation_started_by having article_count > 0;
Language with the most translations
editselect
translation_target_language,
count(translation_target_title)
from
`cx_translations`
where
( translation_status = 'published' or translation_target_url is not null ) and
translation_last_updated_timestamp < 20151100000000
group by translation_target_language
order by count(translation_target_title) desc limit 1;
Pages created in the main namespace (from the wiki)
edit- All CX-created pages in Spanish: http://quarry.wmflabs.org/query/3041
- CX-created pages in Spanish 2015 January-March: http://quarry.wmflabs.org/query/3042
select
page_id,
page_namespace,
page_title,
max(ct_rev_id),
ct_params,
user_name,
rev_timestamp
from
`change_tag`,
`revision`,
`user`,
`page`
where
ct_tag = 'contenttranslation' and
page_namespace = 0 and
rev_id = ct_rev_id and
rev_page = page_id and
rev_user = user_id
GROUP BY page_id, rev_timestamp
ORDER BY null;
General page creations per day
editselect
substr(`timestamp`, 1, 8) as create_date, count(event_title)
from
log.PageCreation_7481635
where
webHost = 'ca.wikipedia.org'
and `timestamp` between 20140701000000 and 20140725000000
and event_namespace = 0
group by create_date
order by create_date;
Number of users who created pages
editselect articles.user_name, count(articles.user_name) as excitement from
( select
page_id,
page_namespace,
page_title,
max(ct_rev_id),
ct_params,
user_name,
rev_timestamp
from
`change_tag`,
`revision`,
`user`,
`page`
where
page_namespace = 0 and
ct_tag = 'contenttranslation' and
rev_id = ct_rev_id and
rev_page = page_id and
rev_user = user_id
GROUP BY page_id, rev_timestamp
ORDER BY null
) as articles
group by articles.user_name
order by excitement;
Number of users who created more than one page
editLive example: http://quarry.wmflabs.org/query/3033
select articles.user_name, count(articles.user_name) as excitement from
( select
page_id,
page_namespace,
page_title,
max(ct_rev_id),
ct_params,
user_name,
rev_timestamp
from
`change_tag`,
`revision`,
`user`,
`page`
where
page_namespace = 0 and
ct_tag = 'contenttranslation' and
rev_id = ct_rev_id and
rev_page = page_id and
rev_user = user_id
GROUP BY page_id, rev_timestamp
ORDER BY null
) as articles
group by articles.user_name
having excitement > 1
order by excitement;
Deleted pages
editSELECT
yearweek(ar_timestamp) month,
count(ar_page_id) deletions
FROM
`change_tag`,
`archive`
WHERE
ar_namespace = '0' AND
ct_tag = 'contenttranslation' AND
ar_rev_id = ct_rev_id
GROUP BY
yearweek(ar_timestamp);
Weekly deletion trend query: http://quarry.wmflabs.org/query/5023
The 5 most prolific translators to a language
editselect
translation_started_by,
count(translation_target_title)
from
`cx_translations`
where
(translation_status = 'published' or translation_target_url is not null) and
translation_target_language = 'he'
group by translation_started_by
order by count(translation_target_title) desc
limit 5;
The latest published translation by a given translator ID
editselect
translation_source_language,
translation_target_language,
translation_source_title,
translation_target_title
from cx_translations
where
(translation_status = 'published' or translation_target_url is not null) and
translation_started_by = 1156
order by translation_last_updated_timestamp desc
limit 1;
Language with the most translations
editselect
translation_target_language,
count(translation_target_title)
from
`cx_translations`
where
( translation_status = 'published' or translation_target_url is not null ) and
translation_last_updated_timestamp < 20150900000000
group by translation_target_language
order by count(translation_target_title) desc limit 1;
Number of translations in a language pair on a certain day
editAll:
select
count(*)
from
cx_translations
where
translation_source_language = 'en' and
translation_target_language = 'ru' and
translation_start_timestamp like '20151119%';
Published:
select
count(*)
from
cx_translations
where
(translation_status = 'published' or translation_target_url is not null) AND
translation_source_language = 'en' and
translation_target_language = 'ru' and
translation_start_timestamp like '20151119%';
Created pages (from the wiki)
editSELECT
page_title
FROM
`change_tag`,
`revision`,
`page`
WHERE
ct_tag = 'contenttranslation' AND
rev_id = ct_rev_id AND
page_id = rev_page
GROUP BY
page_title;
Count pages published in a range of dates
editSELECT
COUNT(*)
FROM
cx_translations
WHERE
( translation_last_updated_timestamp BETWEEN 20170313000000 AND 20170319999999 ) AND
( translation_status = 'published' OR translation_target_url IS NOT NULL );
EventLogging
editSaving failures
editSELECT
timestamp,
userAgent,
wiki,
event_sourceLanguage,
event_sourceTitle,
event_targetLanguage,
event_targetTitle,
event_token,
event_trace,
event_version
FROM
log.ContentTranslationError_11767097
WHERE
event_context = 'save-failure' and
timestamp like '20151221%'
GROUP BY
event_session
ORDER BY
timestamp;
Total
editSELECT
timestamp,
userAgent,
wiki,
event_sourceLanguage,
event_sourceTitle,
event_targetLanguage,
event_targetTitle,
event_token,
event_trace,
event_version
FROM
log.ContentTranslationError_11767097
WHERE
event_context = 'save-failure' and
timestamp like '20151221%'
ORDER BY
timestamp;
Restoring failures
editSELECT
timestamp,
userAgent,
wiki,
event_sourceLanguage,
event_sourceTitle,
event_targetLanguage,
event_targetTitle,
event_token,
event_trace,
event_version
FROM
log.ContentTranslationError_11767097
WHERE
event_context = 'restore-failure' and
timestamp like '20151221%'
GROUP BY
event_session
ORDER BY
timestamp;
Publishing failures
editSELECT
timestamp,
userAgent,
wiki,
event_sourceLanguage,
event_sourceTitle,
event_targetLanguage,
event_targetTitle,
event_token,
event_trace,
event_version
FROM
log.ContentTranslationError_11767097
WHERE
event_context = 'publish-failure' and
timestamp like '20151221%'
GROUP BY
event_session
ORDER BY
timestamp;
Suggestions
editAccepted suggestions
editSELECT
event_targetLanguage,
count(event_targetLanguage)
FROM
log.ContentTranslationCTA_11616099 cta
WHERE
cta.event_cta like 'suggestions%'
AND
cta.event_action = 'accept'
GROUP BY
event_targetLanguage;
Until a particular day
editSELECT
event_targetLanguage,
count(event_targetLanguage)
FROM
log.ContentTranslationCTA_11616099 cta
WHERE
cta.event_cta like 'suggestions%' AND
cta.timestamp < 20160425000000 -- write the appropriate timestamp here
AND
cta.event_action = 'accept'
GROUP BY
event_targetLanguage;
Daily
editSELECT
event_targetLanguage,
count(event_targetLanguage)
FROM
log.ContentTranslationCTA_11616099 cta
WHERE
cta.event_cta = 'suggestions'
AND
cta.event_action = 'accept'
GROUP BY
event_targetLanguage;
Weekly
editSELECT
event_targetLanguage, count(event_targetLanguage)
FROM
log.ContentTranslationCTA_11616099 cta
WHERE
cta.event_cta = 'suggestions'
AND
cta.event_action = 'accept' AND timestamp > '20151119000000'
AND
timestamp < '20151126000000'
GROUP BY
event_targetLanguage;
Campaign
editWhere CAMPAIGN value can be: newarticle, europeana2802016 etc
SELECT
count(uuid)
FROM
log.ContentTranslationCTA_11616099 cta
WHERE
timestamp > 20160201000000
AND
event_cta ='CAMPAIGN';